Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/hch/vfs-queue

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/hch/vfs-queue:
  vfs: add d_prune dentry operation
  vfs: protect i_nlink
  filesystems: add set_nlink()
  filesystems: add missing nlink wrappers
  logfs: remove unnecessary nlink setting
  ocfs2: remove unnecessary nlink setting
  jfs: remove unnecessary nlink setting
  hypfs: remove unnecessary nlink setting
  vfs: ignore error on forced remount
  readlinkat: ensure we return ENOENT for the empty pathname for normal lookups
  vfs: fix dentry leak in simple_fill_super()
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 22f3a0e..b100adc 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -73,14 +73,6 @@
 			also be used to enable or disable barriers, for
 			consistency with other ext3 mount options.
 
-orlov		(*)	This enables the new Orlov block allocator. It is
-			enabled by default.
-
-oldalloc		This disables the Orlov block allocator and enables
-			the old block allocator.  Orlov should have better
-			performance - we'd like to get some feedback if it's
-			the contrary for you.
-
 user_xattr		Enables Extended User Attributes.  Additionally, you
 			need to have extended attribute support enabled in the
 			kernel configuration (CONFIG_EXT3_FS_XATTR).  See the
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 232a575..4917cf2 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -160,7 +160,9 @@
                      	lead to any number of problems.
 
 data=journal		All data are committed into the journal prior to being
-			written into the main file system.
+			written into the main file system.  Enabling
+			this mode will disable delayed allocation and
+			O_DIRECT support.
 
 data=ordered	(*)	All data are forced directly out to the main file
 			system prior to its metadata being committed to the
@@ -201,30 +203,19 @@
 			table readahead algorithm will pre-read into
 			the buffer cache.  The default value is 32 blocks.
 
-orlov		(*)	This enables the new Orlov block allocator. It is
-			enabled by default.
-
-oldalloc		This disables the Orlov block allocator and enables
-			the old block allocator.  Orlov should have better
-			performance - we'd like to get some feedback if it's
-			the contrary for you.
-
-user_xattr		Enables Extended User Attributes.  Additionally, you
-			need to have extended attribute support enabled in the
-			kernel configuration (CONFIG_EXT4_FS_XATTR).  See the
-			attr(5) manual page and http://acl.bestbits.at/ to
-			learn more about extended attributes.
-
-nouser_xattr		Disables Extended User Attributes.
-
-acl			Enables POSIX Access Control Lists support.
-			Additionally, you need to have ACL support enabled in
-			the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL).
-			See the acl(5) manual page and http://acl.bestbits.at/
-			for more information.
+nouser_xattr		Disables Extended User Attributes. If you have extended
+			attribute support enabled in the kernel configuration
+			(CONFIG_EXT4_FS_XATTR), extended attribute support
+			is enabled by default on mount. See the attr(5) manual
+			page and http://acl.bestbits.at/ for more information
+			about extended attributes.
 
 noacl			This option disables POSIX Access Control List
-			support.
+			support. If ACL support is enabled in the kernel
+			configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL is
+			enabled by default on mount. See the acl(5) manual
+			page and http://acl.bestbits.at/ for more information
+			about acl.
 
 bsddf		(*)	Make 'df' act like BSD.
 minixdf			Make 'df' act like Minix.
@@ -419,8 +410,8 @@
 In the event of a crash, the journal can be replayed, bringing both data and
 metadata into a consistent state.  This mode is the slowest except when data
 needs to be read from and written to disk at the same time where it
-outperforms all others modes.  Currently ext4 does not have delayed
-allocation support if this data journalling mode is selected.
+outperforms all others modes.  Enabling this mode will disable delayed
+allocation and O_DIRECT support.
 
 /proc entries
 =============
diff --git a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
index 5d0fc8b..77dfecf 100644
--- a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
+++ b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
@@ -134,13 +134,13 @@
 
   ______________________________________________________________________
 
-  11..  IInnttrroodduuccttiioonn
+  1.  Introduction
 
   Welcome to User Mode Linux.  It's going to be fun.
 
 
 
-  11..11..  HHooww iiss UUsseerr MMooddee LLiinnuuxx DDiiffffeerreenntt??
+  1.1.  How is User Mode Linux Different?
 
   Normally, the Linux Kernel talks straight to your hardware (video
   card, keyboard, hard drives, etc), and any programs which run ask the
@@ -181,7 +181,7 @@
 
 
 
-  11..22..  WWhhyy WWoouulldd II WWaanntt UUsseerr MMooddee LLiinnuuxx??
+  1.2.  Why Would I Want User Mode Linux?
 
 
   1. If User Mode Linux crashes, your host kernel is still fine.
@@ -206,12 +206,12 @@
 
 
 
-  22..  CCoommppiilliinngg tthhee kkeerrnneell aanndd mmoodduulleess
+  2.  Compiling the kernel and modules
 
 
 
 
-  22..11..  CCoommppiilliinngg tthhee kkeerrnneell
+  2.1.  Compiling the kernel
 
 
   Compiling the user mode kernel is just like compiling any other
@@ -322,7 +322,7 @@
   bug fixes and enhancements that have gone into subsequent releases.
 
 
-  22..22..  CCoommppiilliinngg aanndd iinnssttaalllliinngg kkeerrnneell mmoodduulleess
+  2.2.  Compiling and installing kernel modules
 
   UML modules are built in the same way as the native kernel (with the
   exception of the 'ARCH=um' that you always need for UML):
@@ -386,19 +386,19 @@
 
 
 
-  22..33..  CCoommppiilliinngg aanndd iinnssttaalllliinngg uummll__uuttiilliittiieess
+  2.3.  Compiling and installing uml_utilities
 
   Many features of the UML kernel require a user-space helper program,
   so a uml_utilities package is distributed separately from the kernel
   patch which provides these helpers. Included within this is:
 
-  +o  port-helper - Used by consoles which connect to xterms or ports
+  o  port-helper - Used by consoles which connect to xterms or ports
 
-  +o  tunctl - Configuration tool to create and delete tap devices
+  o  tunctl - Configuration tool to create and delete tap devices
 
-  +o  uml_net - Setuid binary for automatic tap device configuration
+  o  uml_net - Setuid binary for automatic tap device configuration
 
-  +o  uml_switch - User-space virtual switch required for daemon
+  o  uml_switch - User-space virtual switch required for daemon
      transport
 
      The uml_utilities tree is compiled with:
@@ -423,11 +423,11 @@
 
 
 
-  33..  RRuunnnniinngg UUMMLL aanndd llooggggiinngg iinn
+  3.  Running UML and logging in
 
 
 
-  33..11..  RRuunnnniinngg UUMMLL
+  3.1.  Running UML
 
   It runs on 2.2.15 or later, and all 2.4 kernels.
 
@@ -454,7 +454,7 @@
 
 
 
-  33..22..  LLooggggiinngg iinn
+  3.2.  Logging in
 
 
 
@@ -468,7 +468,7 @@
 
   There are a couple of other ways to log in:
 
-  +o  On a virtual console
+  o  On a virtual console
 
 
 
@@ -480,7 +480,7 @@
 
 
 
-  +o  Over the serial line
+  o  Over the serial line
 
 
      In the boot output, find a line that looks like:
@@ -503,7 +503,7 @@
 
 
 
-  +o  Over the net
+  o  Over the net
 
 
      If the network is running, then you can telnet to the virtual
@@ -514,13 +514,13 @@
   down and the process will exit.
 
 
-  33..33..  EExxaammpplleess
+  3.3.  Examples
 
   Here are some examples of UML in action:
 
-  +o  A login session <http://user-mode-linux.sourceforge.net/login.html>
+  o  A login session <http://user-mode-linux.sourceforge.net/login.html>
 
-  +o  A virtual network <http://user-mode-linux.sourceforge.net/net.html>
+  o  A virtual network <http://user-mode-linux.sourceforge.net/net.html>
 
 
 
@@ -528,12 +528,12 @@
 
 
 
-  44..  UUMMLL oonn 22GG//22GG hhoossttss
+  4.  UML on 2G/2G hosts
 
 
 
 
-  44..11..  IInnttrroodduuccttiioonn
+  4.1.  Introduction
 
 
   Most Linux machines are configured so that the kernel occupies the
@@ -546,7 +546,7 @@
 
 
 
-  44..22..  TThhee pprroobblleemm
+  4.2.  The problem
 
 
   The prebuilt UML binaries on this site will not run on 2G/2G hosts
@@ -558,7 +558,7 @@
 
 
 
-  44..33..  TThhee ssoolluuttiioonn
+  4.3.  The solution
 
 
   The fix for this is to rebuild UML from source after enabling
@@ -576,7 +576,7 @@
 
 
 
-  55..  SSeettttiinngg uupp sseerriiaall lliinneess aanndd ccoonnssoolleess
+  5.  Setting up serial lines and consoles
 
 
   It is possible to attach UML serial lines and consoles to many types
@@ -586,12 +586,12 @@
   You can attach them to host ptys, ttys, file descriptors, and ports.
   This allows you to do things like
 
-  +o  have a UML console appear on an unused host console,
+  o  have a UML console appear on an unused host console,
 
-  +o  hook two virtual machines together by having one attach to a pty
+  o  hook two virtual machines together by having one attach to a pty
      and having the other attach to the corresponding tty
 
-  +o  make a virtual machine accessible from the net by attaching a
+  o  make a virtual machine accessible from the net by attaching a
      console to a port on the host.
 
 
@@ -599,7 +599,7 @@
 
 
 
-  55..11..  SSppeecciiffyyiinngg tthhee ddeevviiccee
+  5.1.  Specifying the device
 
   Devices are specified with "con" or "ssl" (console or serial line,
   respectively), optionally with a device number if you are talking
@@ -626,13 +626,13 @@
 
 
 
-  55..22..  SSppeecciiffyyiinngg tthhee cchhaannnneell
+  5.2.  Specifying the channel
 
   There are a number of different types of channels to attach a UML
   device to, each with a different way of specifying exactly what to
   attach to.
 
-  +o  pseudo-terminals - device=pty pts terminals - device=pts
+  o  pseudo-terminals - device=pty pts terminals - device=pts
 
 
      This will cause UML to allocate a free host pseudo-terminal for the
@@ -640,20 +640,20 @@
      log.  You access it by attaching a terminal program to the
      corresponding tty:
 
-  +o  screen /dev/pts/n
+  o  screen /dev/pts/n
 
-  +o  screen /dev/ttyxx
+  o  screen /dev/ttyxx
 
-  +o  minicom -o -p /dev/ttyxx - minicom seems not able to handle pts
+  o  minicom -o -p /dev/ttyxx - minicom seems not able to handle pts
      devices
 
-  +o  kermit - start it up, 'open' the device, then 'connect'
+  o  kermit - start it up, 'open' the device, then 'connect'
 
 
 
 
 
-  +o  terminals - device=tty:tty device file
+  o  terminals - device=tty:tty device file
 
 
      This will make UML attach the device to the specified tty (i.e
@@ -672,7 +672,7 @@
 
 
 
-  +o  xterms - device=xterm
+  o  xterms - device=xterm
 
 
      UML will run an xterm and the device will be attached to it.
@@ -681,7 +681,7 @@
 
 
 
-  +o  Port - device=port:port number
+  o  Port - device=port:port number
 
 
      This will attach the UML devices to the specified host port.
@@ -725,7 +725,7 @@
 
 
 
-  +o  already-existing file descriptors - device=file descriptor
+  o  already-existing file descriptors - device=file descriptor
 
 
      If you set up a file descriptor on the UML command line, you can
@@ -743,7 +743,7 @@
 
 
 
-  +o  Nothing - device=null
+  o  Nothing - device=null
 
 
      This allows the device to be opened, in contrast to 'none', but
@@ -754,7 +754,7 @@
 
 
 
-  +o  None - device=none
+  o  None - device=none
 
 
      This causes the device to disappear.
@@ -770,7 +770,7 @@
 
 
 
-  will cause serial line 3 to accept input on the host's /dev/tty3 and
+  will cause serial line 3 to accept input on the host's /dev/tty2 and
   display output on an xterm.  That's a silly example - the most common
   use of this syntax is to reattach the main console to stdin and stdout
   as shown above.
@@ -785,7 +785,7 @@
 
 
 
-  55..33..  EExxaammpplleess
+  5.3.  Examples
 
   There are a number of interesting things you can do with this
   capability.
@@ -838,7 +838,7 @@
   prompt of the other virtual machine.
 
 
-  66..  SSeettttiinngg uupp tthhee nneettwwoorrkk
+  6.  Setting up the network
 
 
 
@@ -858,19 +858,19 @@
   There are currently five transport types available for a UML virtual
   machine to exchange packets with other hosts:
 
-  +o  ethertap
+  o  ethertap
 
-  +o  TUN/TAP
+  o  TUN/TAP
 
-  +o  Multicast
+  o  Multicast
 
-  +o  a switch daemon
+  o  a switch daemon
 
-  +o  slip
+  o  slip
 
-  +o  slirp
+  o  slirp
 
-  +o  pcap
+  o  pcap
 
      The TUN/TAP, ethertap, slip, and slirp transports allow a UML
      instance to exchange packets with the host.  They may be directed
@@ -893,28 +893,28 @@
   With so many host transports, which one should you use?  Here's when
   you should use each one:
 
-  +o  ethertap - if you want access to the host networking and it is
+  o  ethertap - if you want access to the host networking and it is
      running 2.2
 
-  +o  TUN/TAP - if you want access to the host networking and it is
+  o  TUN/TAP - if you want access to the host networking and it is
      running 2.4.  Also, the TUN/TAP transport is able to use a
      preconfigured device, allowing it to avoid using the setuid uml_net
      helper, which is a security advantage.
 
-  +o  Multicast - if you want a purely virtual network and you don't want
+  o  Multicast - if you want a purely virtual network and you don't want
      to set up anything but the UML
 
-  +o  a switch daemon - if you want a purely virtual network and you
+  o  a switch daemon - if you want a purely virtual network and you
      don't mind running the daemon in order to get somewhat better
      performance
 
-  +o  slip - there is no particular reason to run the slip backend unless
+  o  slip - there is no particular reason to run the slip backend unless
      ethertap and TUN/TAP are just not available for some reason
 
-  +o  slirp - if you don't have root access on the host to setup
+  o  slirp - if you don't have root access on the host to setup
      networking, or if you don't want to allocate an IP to your UML
 
-  +o  pcap - not much use for actual network connectivity, but great for
+  o  pcap - not much use for actual network connectivity, but great for
      monitoring traffic on the host
 
      Ethertap is available on 2.4 and works fine.  TUN/TAP is preferred
@@ -926,7 +926,7 @@
      exploit the helper's root privileges.
 
 
-  66..11..  GGeenneerraall sseettuupp
+  6.1.  General setup
 
   First, you must have the virtual network enabled in your UML.  If are
   running a prebuilt kernel from this site, everything is already
@@ -995,7 +995,7 @@
 
 
 
-  66..22..  UUsseerrssppaaccee ddaaeemmoonnss
+  6.2.  Userspace daemons
 
   You will likely need the setuid helper, or the switch daemon, or both.
   They are both installed with the RPM and deb, so if you've installed
@@ -1011,7 +1011,7 @@
 
 
 
-  66..33..  SSppeecciiffyyiinngg eetthheerrnneett aaddddrreesssseess
+  6.3.  Specifying ethernet addresses
 
   Below, you will see that the TUN/TAP, ethertap, and daemon interfaces
   allow you to specify hardware addresses for the virtual ethernet
@@ -1023,11 +1023,11 @@
   sufficient to guarantee a unique hardware address for the device.  A
   couple of exceptions are:
 
-  +o  Another set of virtual ethernet devices are on the same network and
+  o  Another set of virtual ethernet devices are on the same network and
      they are assigned hardware addresses using a different scheme which
      may conflict with the UML IP address-based scheme
 
-  +o  You aren't going to use the device for IP networking, so you don't
+  o  You aren't going to use the device for IP networking, so you don't
      assign the device an IP address
 
      If you let the driver provide the hardware address, you should make
@@ -1049,7 +1049,7 @@
 
 
 
-  66..44..  UUMMLL iinntteerrffaaccee sseettuupp
+  6.4.  UML interface setup
 
   Once the network devices have been described on the command line, you
   should boot UML and log in.
@@ -1131,7 +1131,7 @@
 
 
 
-  66..55..  MMuullttiiccaasstt
+  6.5.  Multicast
 
   The simplest way to set up a virtual network between multiple UMLs is
   to use the mcast transport.  This was written by Harald Welte and is
@@ -1194,7 +1194,7 @@
 
 
 
-  66..66..  TTUUNN//TTAAPP wwiitthh tthhee uummll__nneett hheellppeerr
+  6.6.  TUN/TAP with the uml_net helper
 
   TUN/TAP is the preferred mechanism on 2.4 to exchange packets with the
   host.  The TUN/TAP backend has been in UML since 2.4.9-3um.
@@ -1247,10 +1247,10 @@
   There are a couple potential problems with running the TUN/TAP
   transport on a 2.4 host kernel
 
-  +o  TUN/TAP seems not to work on 2.4.3 and earlier.  Upgrade the host
+  o  TUN/TAP seems not to work on 2.4.3 and earlier.  Upgrade the host
      kernel or use the ethertap transport.
 
-  +o  With an upgraded kernel, TUN/TAP may fail with
+  o  With an upgraded kernel, TUN/TAP may fail with
 
 
        File descriptor in bad state
@@ -1269,7 +1269,7 @@
 
 
 
-  66..77..  TTUUNN//TTAAPP wwiitthh aa pprreeccoonnffiigguurreedd ttaapp ddeevviiccee
+  6.7.  TUN/TAP with a preconfigured tap device
 
   If you prefer not to have UML use uml_net (which is somewhat
   insecure), with UML 2.4.17-11, you can set up a TUN/TAP device
@@ -1277,7 +1277,7 @@
   there is no need for root assistance.  Setting up the device is done
   as follows:
 
-  +o  Create the device with tunctl (available from the UML utilities
+  o  Create the device with tunctl (available from the UML utilities
      tarball)
 
 
@@ -1291,7 +1291,7 @@
   where uid is the user id or username that UML will be run as.  This
   will tell you what device was created.
 
-  +o  Configure the device IP (change IP addresses and device name to
+  o  Configure the device IP (change IP addresses and device name to
      suit)
 
 
@@ -1303,7 +1303,7 @@
 
 
 
-  +o  Set up routing and arping if desired - this is my recipe, there are
+  o  Set up routing and arping if desired - this is my recipe, there are
      other ways of doing the same thing
 
 
@@ -1338,7 +1338,7 @@
   utility which reads the information from a config file and sets up
   devices at boot time.
 
-  +o  Rather than using up two IPs and ARPing for one of them, you can
+  o  Rather than using up two IPs and ARPing for one of them, you can
      also provide direct access to your LAN by the UML by using a
      bridge.
 
@@ -1417,7 +1417,7 @@
   Note that 'br0' should be setup using ifconfig with the existing IP
   address of eth0, as eth0 no longer has its own IP.
 
-  +o
+  o
 
 
      Also, the /dev/net/tun device must be writable by the user running
@@ -1438,11 +1438,11 @@
   devices and chgrp /dev/net/tun to that group with mode 664 or 660.
 
 
-  +o  Once the device is set up, run UML with 'eth0=tuntap,device name'
+  o  Once the device is set up, run UML with 'eth0=tuntap,device name'
      (i.e. 'eth0=tuntap,tap0') on the command line (or do it with the
      mconsole config command).
 
-  +o  Bring the eth device up in UML and you're in business.
+  o  Bring the eth device up in UML and you're in business.
 
      If you don't want that tap device any more, you can make it non-
      persistent with
@@ -1465,7 +1465,7 @@
 
 
 
-  66..88..  EEtthheerrttaapp
+  6.8.  Ethertap
 
   Ethertap is the general mechanism on 2.2 for userspace processes to
   exchange packets with the kernel.
@@ -1561,9 +1561,9 @@
 
 
 
-  66..99..  TThhee sswwiittcchh ddaaeemmoonn
+  6.9.  The switch daemon
 
-  NNoottee: This is the daemon formerly known as uml_router, but which was
+  Note: This is the daemon formerly known as uml_router, but which was
   renamed so the network weenies of the world would stop growling at me.
 
 
@@ -1649,7 +1649,7 @@
 
 
 
-  66..1100..  SSlliipp
+  6.10.  Slip
 
   Slip is another, less general, mechanism for a process to communicate
   with the host networking.  In contrast to the ethertap interface,
@@ -1681,7 +1681,7 @@
 
 
 
-  66..1111..  SSlliirrpp
+  6.11.  Slirp
 
   slirp uses an external program, usually /usr/bin/slirp, to provide IP
   only networking connectivity through the host. This is similar to IP
@@ -1737,7 +1737,7 @@
 
 
 
-  66..1122..  ppccaapp
+  6.12.  pcap
 
   The pcap transport is attached to a UML ethernet device on the command
   line or with uml_mconsole with the following syntax:
@@ -1777,7 +1777,7 @@
 
 
 
-  66..1133..  SSeettttiinngg uupp tthhee hhoosstt yyoouurrsseellff
+  6.13.  Setting up the host yourself
 
   If you don't specify an address for the host side of the ethertap or
   slip device, UML won't do any setup on the host.  So this is what is
@@ -1785,7 +1785,7 @@
   192.168.0.251 and a UML-side IP of 192.168.0.250 - adjust to suit your
   own network):
 
-  +o  The device needs to be configured with its IP address.  Tap devices
+  o  The device needs to be configured with its IP address.  Tap devices
      are also configured with an mtu of 1484.  Slip devices are
      configured with a point-to-point address pointing at the UML ip
      address.
@@ -1805,7 +1805,7 @@
 
 
 
-  +o  If a tap device is being set up, a route is set to the UML IP.
+  o  If a tap device is being set up, a route is set to the UML IP.
 
 
        UML# route add -host 192.168.0.250 gw 192.168.0.251
@@ -1814,7 +1814,7 @@
 
 
 
-  +o  To allow other hosts on your network to see the virtual machine,
+  o  To allow other hosts on your network to see the virtual machine,
      proxy arp is set up for it.
 
 
@@ -1824,7 +1824,7 @@
 
 
 
-  +o  Finally, the host is set up to route packets.
+  o  Finally, the host is set up to route packets.
 
 
        host#  echo 1 > /proc/sys/net/ipv4/ip_forward
@@ -1838,12 +1838,12 @@
 
 
 
-  77..  SShhaarriinngg FFiilleessyysstteemmss bbeettwweeeenn VViirrttuuaall MMaacchhiinneess
+  7.  Sharing Filesystems between Virtual Machines
 
 
 
 
-  77..11..  AA wwaarrnniinngg
+  7.1.  A warning
 
   Don't attempt to share filesystems simply by booting two UMLs from the
   same file.  That's the same thing as booting two physical machines
@@ -1851,7 +1851,7 @@
 
 
 
-  77..22..  UUssiinngg llaayyeerreedd bblloocckk ddeevviicceess
+  7.2.  Using layered block devices
 
   The way to share a filesystem between two virtual machines is to use
   the copy-on-write (COW) layering capability of the ubd block driver.
@@ -1896,7 +1896,7 @@
 
 
 
-  77..33..  NNoottee!!
+  7.3.  Note!
 
   When checking the size of the COW file in order to see the gobs of
   space that you're saving, make sure you use 'ls -ls' to see the actual
@@ -1926,7 +1926,7 @@
 
 
 
-  77..44..  AAnnootthheerr wwaarrnniinngg
+  7.4.  Another warning
 
   Once a filesystem is being used as a readonly backing file for a COW
   file, do not boot directly from it or modify it in any way.  Doing so
@@ -1952,7 +1952,7 @@
 
 
 
-  77..55..  uummll__mmoooo :: MMeerrggiinngg aa CCOOWW ffiillee wwiitthh iittss bbaacckkiinngg ffiillee
+  7.5.  uml_moo : Merging a COW file with its backing file
 
   Depending on how you use UML and COW devices, it may be advisable to
   merge the changes in the COW file into the backing file every once in
@@ -2001,7 +2001,7 @@
 
 
 
-  88..  CCrreeaattiinngg ffiilleessyysstteemmss
+  8.  Creating filesystems
 
 
   You may want to create and mount new UML filesystems, either because
@@ -2015,7 +2015,7 @@
   should be easy to translate to the filesystem of your choice.
 
 
-  88..11..  CCrreeaattee tthhee ffiilleessyysstteemm ffiillee
+  8.1.  Create the filesystem file
 
   dd is your friend.  All you need to do is tell dd to create an empty
   file of the appropriate size.  I usually make it sparse to save time
@@ -2032,7 +2032,7 @@
 
 
 
-  88..22..  AAssssiiggnn tthhee ffiillee ttoo aa UUMMLL ddeevviiccee
+  8.2.  Assign the file to a UML device
 
   Add an argument like the following to the UML command line:
 
@@ -2045,7 +2045,7 @@
 
 
 
-  88..33..  CCrreeaattiinngg aanndd mmoouunnttiinngg tthhee ffiilleessyysstteemm
+  8.3.  Creating and mounting the filesystem
 
   Make sure that the filesystem is available, either by being built into
   the kernel, or available as a module, then boot up UML and log in.  If
@@ -2096,7 +2096,7 @@
 
 
 
-  99..  HHoosstt ffiillee aacccceessss
+  9.  Host file access
 
 
   If you want to access files on the host machine from inside UML, you
@@ -2112,7 +2112,7 @@
   files contained in it just as you would on the host.
 
 
-  99..11..  UUssiinngg hhoossttffss
+  9.1.  Using hostfs
 
   To begin with, make sure that hostfs is available inside the virtual
   machine with
@@ -2151,7 +2151,7 @@
 
 
 
-  99..22..  hhoossttffss aass tthhee rroooott ffiilleessyysstteemm
+  9.2.  hostfs as the root filesystem
 
   It's possible to boot from a directory hierarchy on the host using
   hostfs rather than using the standard filesystem in a file.
@@ -2194,20 +2194,20 @@
   UML should then boot as it does normally.
 
 
-  99..33..  BBuuiillddiinngg hhoossttffss
+  9.3.  Building hostfs
 
   If you need to build hostfs because it's not in your kernel, you have
   two choices:
 
 
 
-  +o  Compiling hostfs into the kernel:
+  o  Compiling hostfs into the kernel:
 
 
      Reconfigure the kernel and set the 'Host filesystem' option under
 
 
-  +o  Compiling hostfs as a module:
+  o  Compiling hostfs as a module:
 
 
      Reconfigure the kernel and set the 'Host filesystem' option under
@@ -2228,7 +2228,7 @@
 
 
 
-  1100..  TThhee MMaannaaggeemmeenntt CCoonnssoollee
+  10.  The Management Console
 
 
 
@@ -2240,15 +2240,15 @@
 
   There are a number of things you can do with the mconsole interface:
 
-  +o  get the kernel version
+  o  get the kernel version
 
-  +o  add and remove devices
+  o  add and remove devices
 
-  +o  halt or reboot the machine
+  o  halt or reboot the machine
 
-  +o  Send SysRq commands
+  o  Send SysRq commands
 
-  +o  Pause and resume the UML
+  o  Pause and resume the UML
 
 
   You need the mconsole client (uml_mconsole) which is present in CVS
@@ -2300,28 +2300,28 @@
 
   You'll get a prompt, at which you can run one of these commands:
 
-  +o  version
+  o  version
 
-  +o  halt
+  o  halt
 
-  +o  reboot
+  o  reboot
 
-  +o  config
+  o  config
 
-  +o  remove
+  o  remove
 
-  +o  sysrq
+  o  sysrq
 
-  +o  help
+  o  help
 
-  +o  cad
+  o  cad
 
-  +o  stop
+  o  stop
 
-  +o  go
+  o  go
 
 
-  1100..11..  vveerrssiioonn
+  10.1.  version
 
   This takes no arguments.  It prints the UML version.
 
@@ -2342,7 +2342,7 @@
 
 
 
-  1100..22..  hhaalltt aanndd rreebboooott
+  10.2.  halt and reboot
 
   These take no arguments.  They shut the machine down immediately, with
   no syncing of disks and no clean shutdown of userspace.  So, they are
@@ -2357,7 +2357,7 @@
 
 
 
-  1100..33..  ccoonnffiigg
+  10.3.  config
 
   "config" adds a new device to the virtual machine.  Currently the ubd
   and network drivers support this.  It takes one argument, which is the
@@ -2378,7 +2378,7 @@
 
 
 
-  1100..44..  rreemmoovvee
+  10.4.  remove
 
   "remove" deletes a device from the system.  Its argument is just the
   name of the device to be removed. The device must be idle in whatever
@@ -2397,7 +2397,7 @@
 
 
 
-  1100..55..  ssyyssrrqq
+  10.5.  sysrq
 
   This takes one argument, which is a single letter.  It calls the
   generic kernel's SysRq driver, which does whatever is called for by
@@ -2407,14 +2407,14 @@
 
 
 
-  1100..66..  hheellpp
+  10.6.  help
 
   "help" returns a string listing the valid commands and what each one
   does.
 
 
 
-  1100..77..  ccaadd
+  10.7.  cad
 
   This invokes the Ctl-Alt-Del action on init.  What exactly this ends
   up doing is up to /etc/inittab.  Normally, it reboots the machine.
@@ -2432,7 +2432,7 @@
 
 
 
-  1100..88..  ssttoopp
+  10.8.  stop
 
   This puts the UML in a loop reading mconsole requests until a 'go'
   mconsole command is received. This is very useful for making backups
@@ -2448,7 +2448,7 @@
 
 
 
-  1100..99..  ggoo
+  10.9.  go
 
   This resumes a UML after being paused by a 'stop' command. Note that
   when the UML has resumed, TCP connections may have timed out and if
@@ -2462,10 +2462,10 @@
 
 
 
-  1111..  KKeerrnneell ddeebbuuggggiinngg
+  11.  Kernel debugging
 
 
-  NNoottee:: The interface that makes debugging, as described here, possible
+  Note: The interface that makes debugging, as described here, possible
   is present in 2.4.0-test6 kernels and later.
 
 
@@ -2485,7 +2485,7 @@
 
 
 
-  1111..11..  SSttaarrttiinngg tthhee kkeerrnneell uunnddeerr ggddbb
+  11.1.  Starting the kernel under gdb
 
   You can have the kernel running under the control of gdb from the
   beginning by putting 'debug' on the command line.  You will get an
@@ -2498,7 +2498,7 @@
   There is a transcript of a debugging session  here <debug-
   session.html> , with breakpoints being set in the scheduler and in an
   interrupt handler.
-  1111..22..  EExxaammiinniinngg sslleeeeppiinngg pprroocceesssseess
+  11.2.  Examining sleeping processes
 
   Not every bug is evident in the currently running process.  Sometimes,
   processes hang in the kernel when they shouldn't because they've
@@ -2516,7 +2516,7 @@
 
   Now what you do is this:
 
-  +o  detach from the current thread
+  o  detach from the current thread
 
 
        (UML gdb)  det
@@ -2525,7 +2525,7 @@
 
 
 
-  +o  attach to the thread you are interested in
+  o  attach to the thread you are interested in
 
 
        (UML gdb)  att <host pid>
@@ -2534,7 +2534,7 @@
 
 
 
-  +o  look at its stack and anything else of interest
+  o  look at its stack and anything else of interest
 
 
        (UML gdb)  bt
@@ -2545,7 +2545,7 @@
   Note that you can't do anything at this point that requires that a
   process execute, e.g. calling a function
 
-  +o  when you're done looking at that process, reattach to the current
+  o  when you're done looking at that process, reattach to the current
      thread and continue it
 
 
@@ -2569,12 +2569,12 @@
 
 
 
-  1111..33..  RRuunnnniinngg dddddd oonn UUMMLL
+  11.3.  Running ddd on UML
 
   ddd works on UML, but requires a special kludge.  The process goes
   like this:
 
-  +o  Start ddd
+  o  Start ddd
 
 
        host% ddd linux
@@ -2583,14 +2583,14 @@
 
 
 
-  +o  With ps, get the pid of the gdb that ddd started.  You can ask the
+  o  With ps, get the pid of the gdb that ddd started.  You can ask the
      gdb to tell you, but for some reason that confuses things and
      causes a hang.
 
-  +o  run UML with 'debug=parent gdb-pid=<pid>' added to the command line
+  o  run UML with 'debug=parent gdb-pid=<pid>' added to the command line
      - it will just sit there after you hit return
 
-  +o  type 'att 1' to the ddd gdb and you will see something like
+  o  type 'att 1' to the ddd gdb and you will see something like
 
 
        0xa013dc51 in __kill ()
@@ -2602,12 +2602,12 @@
 
 
 
-  +o  At this point, type 'c', UML will boot up, and you can use ddd just
+  o  At this point, type 'c', UML will boot up, and you can use ddd just
      as you do on any other process.
 
 
 
-  1111..44..  DDeebbuuggggiinngg mmoodduulleess
+  11.4.  Debugging modules
 
   gdb has support for debugging code which is dynamically loaded into
   the process.  This support is what is needed to debug kernel modules
@@ -2823,7 +2823,7 @@
 
 
 
-  1111..55..  AAttttaacchhiinngg ggddbb ttoo tthhee kkeerrnneell
+  11.5.  Attaching gdb to the kernel
 
   If you don't have the kernel running under gdb, you can attach gdb to
   it later by sending the tracing thread a SIGUSR1.  The first line of
@@ -2857,7 +2857,7 @@
 
 
 
-  1111..66..  UUssiinngg aalltteerrnnaattee ddeebbuuggggeerrss
+  11.6.  Using alternate debuggers
 
   UML has support for attaching to an already running debugger rather
   than starting gdb itself.  This is present in CVS as of 17 Apr 2001.
@@ -2886,7 +2886,7 @@
   An example of an alternate debugger is strace.  You can strace the
   actual kernel as follows:
 
-  +o  Run the following in a shell
+  o  Run the following in a shell
 
 
        host%
@@ -2894,10 +2894,10 @@
 
 
 
-  +o  Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out
+  o  Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out
      by the previous command
 
-  +o  Hit return in the shell, and UML will start running, and strace
+  o  Hit return in the shell, and UML will start running, and strace
      output will start accumulating in the output file.
 
      Note that this is different from running
@@ -2917,9 +2917,9 @@
 
 
 
-  1122..  KKeerrnneell ddeebbuuggggiinngg eexxaammpplleess
+  12.  Kernel debugging examples
 
-  1122..11..  TThhee ccaassee ooff tthhee hhuunngg ffsscckk
+  12.1.  The case of the hung fsck
 
   When booting up the kernel, fsck failed, and dropped me into a shell
   to fix things up.  I ran fsck -y, which hung:
@@ -3154,9 +3154,9 @@
 
   The interesting things here are :
 
-  +o  There are two segfaults on this stack (frames 9 and 14)
+  o  There are two segfaults on this stack (frames 9 and 14)
 
-  +o  The first faulting address (frame 11) is 0x50000800
+  o  The first faulting address (frame 11) is 0x50000800
 
   (gdb) p (void *)1342179328
   $16 = (void *) 0x50000800
@@ -3399,7 +3399,7 @@
   on will be somewhat clearer.
 
 
-  1122..22..  EEppiissooddee 22:: TThhee ccaassee ooff tthhee hhuunngg ffsscckk
+  12.2.  Episode 2: The case of the hung fsck
 
   After setting a trap in the SEGV handler for accesses to the signal
   thread's stack, I reran the kernel.
@@ -3788,12 +3788,12 @@
 
 
 
-  1133..  WWhhaatt ttoo ddoo wwhheenn UUMMLL ddooeessnn''tt wwoorrkk
+  13.  What to do when UML doesn't work
 
 
 
 
-  1133..11..  SSttrraannggee ccoommppiillaattiioonn eerrrroorrss wwhheenn yyoouu bbuuiilldd ffrroomm ssoouurrccee
+  13.1.  Strange compilation errors when you build from source
 
   As of test11, it is necessary to have "ARCH=um" in the environment or
   on the make command line for all steps in building UML, including
@@ -3824,8 +3824,8 @@
 
 
 
-  1133..33..  AA vvaarriieettyy ooff ppaanniiccss aanndd hhaannggss wwiitthh //ttmmpp oonn aa rreeiisseerrffss  ffiilleessyyss--
-  tteemm
+  13.3.  A variety of panics and hangs with /tmp on a reiserfs  filesys-
+  tem
 
   I saw this on reiserfs 3.5.21 and it seems to be fixed in 3.5.27.
   Panics preceded by
@@ -3842,8 +3842,8 @@
 
 
 
-  1133..44..  TThhee ccoommppiillee ffaaiillss wwiitthh eerrrroorrss aabboouutt ccoonnfflliiccttiinngg ttyyppeess ffoorr
-  ''ooppeenn'',, ''dduupp'',, aanndd ''wwaaiittppiidd''
+  13.4.  The compile fails with errors about conflicting types for
+  'open', 'dup', and 'waitpid'
 
   This happens when you build in /usr/src/linux.  The UML build makes
   the include/asm link point to include/asm-um.  /usr/include/asm points
@@ -3854,14 +3854,14 @@
 
 
 
-  1133..55..  UUMMLL ddooeessnn''tt wwoorrkk wwhheenn //ttmmpp iiss aann NNFFSS ffiilleessyysstteemm
+  13.5.  UML doesn't work when /tmp is an NFS filesystem
 
   This seems to be a similar situation with the ReiserFS problem above.
   Some versions of NFS seems not to handle mmap correctly, which UML
   depends on.  The workaround is have /tmp be a non-NFS directory.
 
 
-  1133..66..  UUMMLL hhaannggss oonn bboooott wwhheenn ccoommppiilleedd wwiitthh ggpprrooff ssuuppppoorrtt
+  13.6.  UML hangs on boot when compiled with gprof support
 
   If you build UML with gprof support and, early in the boot, it does
   this
@@ -3878,7 +3878,7 @@
 
 
 
-  1133..77..  ssyyssllooggdd ddiieess wwiitthh aa SSIIGGTTEERRMM oonn ssttaarrttuupp
+  13.7.  syslogd dies with a SIGTERM on startup
 
   The exact boot error depends on the distribution that you're booting,
   but Debian produces this:
@@ -3897,17 +3897,17 @@
 
 
 
-  1133..88..  TTUUNN//TTAAPP nneettwwoorrkkiinngg ddooeessnn''tt wwoorrkk oonn aa 22..44 hhoosstt
+  13.8.  TUN/TAP networking doesn't work on a 2.4 host
 
   There are a couple of problems which were
   <http://www.geocrawler.com/lists/3/SourceForge/597/0/> name="pointed
   out">  by Tim Robinson <timro at trkr dot net>
 
-  +o  It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier.
+  o  It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier.
      The fix is to upgrade to something more recent and then read the
      next item.
 
-  +o  If you see
+  o  If you see
 
 
        File descriptor in bad state
@@ -3921,8 +3921,8 @@
 
 
 
-  1133..99..  YYoouu ccaann nneettwwoorrkk ttoo tthhee hhoosstt bbuutt nnoott ttoo ootthheerr mmaacchhiinneess oonn tthhee
-  nneett
+  13.9.  You can network to the host but not to other machines on the
+  net
 
   If you can connect to the host, and the host can connect to UML, but
   you cannot connect to any other machines, then you may need to enable
@@ -3972,7 +3972,7 @@
 
 
 
-  1133..1100..  II hhaavvee nnoo rroooott aanndd II wwaanntt ttoo ssccrreeaamm
+  13.10.  I have no root and I want to scream
 
   Thanks to Birgit Wahlich for telling me about this strange one.  It
   turns out that there's a limit of six environment variables on the
@@ -3987,7 +3987,7 @@
 
 
 
-  1133..1111..  UUMMLL bbuuiilldd ccoonnfflliicctt bbeettwweeeenn ppttrraaccee..hh aanndd uuccoonntteexxtt..hh
+  13.11.  UML build conflict between ptrace.h and ucontext.h
 
   On some older systems, /usr/include/asm/ptrace.h and
   /usr/include/sys/ucontext.h define the same names.  So, when they're
@@ -4007,7 +4007,7 @@
 
 
 
-  1133..1122..  TThhee UUMMLL BBooggooMMiippss iiss eexxaaccttllyy hhaallff tthhee hhoosstt''ss BBooggooMMiippss
+  13.12.  The UML BogoMips is exactly half the host's BogoMips
 
   On i386 kernels, there are two ways of running the loop that is used
   to calculate the BogoMips rating, using the TSC if it's there or using
@@ -4019,7 +4019,7 @@
 
 
 
-  1133..1133..  WWhheenn yyoouu rruunn UUMMLL,, iitt iimmmmeeddiiaatteellyy sseeggffaauullttss
+  13.13.  When you run UML, it immediately segfaults
 
   If the host is configured with the 2G/2G address space split, that's
   why.  See ``UML on 2G/2G hosts''  for the details on getting UML to
@@ -4027,7 +4027,7 @@
 
 
 
-  1133..1144..  xxtteerrmmss aappppeeaarr,, tthheenn iimmmmeeddiiaatteellyy ddiissaappppeeaarr
+  13.14.  xterms appear, then immediately disappear
 
   If you're running an up to date kernel with an old release of
   uml_utilities, the port-helper program will not work properly, so
@@ -4039,7 +4039,7 @@
 
 
 
-  1133..1155..  AAnnyy ootthheerr ppaanniicc,, hhaanngg,, oorr ssttrraannggee bbeehhaavviioorr
+  13.15.  Any other panic, hang, or strange behavior
 
   If you're seeing truly strange behavior, such as hangs or panics that
   happen in random places, or you try running the debugger to see what's
@@ -4059,7 +4059,7 @@
 
   If you want to be super-helpful, read ``Diagnosing Problems'' and
   follow the instructions contained therein.
-  1144..  DDiiaaggnnoossiinngg PPrroobblleemmss
+  14.  Diagnosing Problems
 
 
   If you get UML to crash, hang, or otherwise misbehave, you should
@@ -4078,7 +4078,7 @@
   ``Kernel debugging''  UML first.
 
 
-  1144..11..  CCaassee 11 :: NNoorrmmaall kkeerrnneell ppaanniiccss
+  14.1.  Case 1 : Normal kernel panics
 
   The most common case is for a normal thread to panic.  To debug this,
   you will need to run it under the debugger (add 'debug' to the command
@@ -4128,7 +4128,7 @@
   to get that information from the faulting ip.
 
 
-  1144..22..  CCaassee 22 :: TTrraacciinngg tthhrreeaadd ppaanniiccss
+  14.2.  Case 2 : Tracing thread panics
 
   The less common and more painful case is when the tracing thread
   panics.  In this case, the kernel debugger will be useless because it
@@ -4161,7 +4161,7 @@
   backtrace in and wait for our crack debugging team to fix the problem.
 
 
-  1144..33..  CCaassee 33 :: TTrraacciinngg tthhrreeaadd ppaanniiccss ccaauusseedd bbyy ootthheerr tthhrreeaaddss
+  14.3.  Case 3 : Tracing thread panics caused by other threads
 
   However, there are cases where the misbehavior of another thread
   caused the problem.  The most common panic of this type is:
@@ -4227,7 +4227,7 @@
 
 
 
-  1144..44..  CCaassee 44 :: HHaannggss
+  14.4.  Case 4 : Hangs
 
   Hangs seem to be fairly rare, but they sometimes happen.  When a hang
   happens, we need a backtrace from the offending process.  Run the
@@ -4257,7 +4257,7 @@
 
 
 
-  1155..  TThhaannkkss
+  15.  Thanks
 
 
   A number of people have helped this project in various ways, and this
@@ -4274,20 +4274,20 @@
   bookkeeping lapses and I forget about contributions.
 
 
-  1155..11..  CCooddee aanndd DDooccuummeennttaattiioonn
+  15.1.  Code and Documentation
 
   Rusty Russell <rusty at linuxcare.com.au>  -
 
-  +o  wrote the  HOWTO <http://user-mode-
+  o  wrote the  HOWTO <http://user-mode-
      linux.sourceforge.net/UserModeLinux-HOWTO.html>
 
-  +o  prodded me into making this project official and putting it on
+  o  prodded me into making this project official and putting it on
      SourceForge
 
-  +o  came up with the way cool UML logo <http://user-mode-
+  o  came up with the way cool UML logo <http://user-mode-
      linux.sourceforge.net/uml-small.png>
 
-  +o  redid the config process
+  o  redid the config process
 
 
   Peter Moulder <reiter at netspace.net.au>  - Fixed my config and build
@@ -4296,18 +4296,18 @@
 
   Bill Stearns <wstearns at pobox.com>  -
 
-  +o  HOWTO updates
+  o  HOWTO updates
 
-  +o  lots of bug reports
+  o  lots of bug reports
 
-  +o  lots of testing
+  o  lots of testing
 
-  +o  dedicated a box (uml.ists.dartmouth.edu) to support UML development
+  o  dedicated a box (uml.ists.dartmouth.edu) to support UML development
 
-  +o  wrote the mkrootfs script, which allows bootable filesystems of
+  o  wrote the mkrootfs script, which allows bootable filesystems of
      RPM-based distributions to be cranked out
 
-  +o  cranked out a large number of filesystems with said script
+  o  cranked out a large number of filesystems with said script
 
 
   Jim Leu <jleu at mindspring.com>  - Wrote the virtual ethernet driver
@@ -4375,176 +4375,176 @@
 
   David Coulson <http://davidcoulson.net>  -
 
-  +o  Set up the usermodelinux.org <http://usermodelinux.org>  site,
+  o  Set up the usermodelinux.org <http://usermodelinux.org>  site,
      which is a great way of keeping the UML user community on top of
      UML goings-on.
 
-  +o  Site documentation and updates
+  o  Site documentation and updates
 
-  +o  Nifty little UML management daemon  UMLd
+  o  Nifty little UML management daemon  UMLd
      <http://uml.openconsultancy.com/umld/>
 
-  +o  Lots of testing and bug reports
+  o  Lots of testing and bug reports
 
 
 
 
-  1155..22..  FFlluusshhiinngg oouutt bbuuggss
+  15.2.  Flushing out bugs
 
 
 
-  +o  Yuri Pudgorodsky
+  o  Yuri Pudgorodsky
 
-  +o  Gerald Britton
+  o  Gerald Britton
 
-  +o  Ian Wehrman
+  o  Ian Wehrman
 
-  +o  Gord Lamb
+  o  Gord Lamb
 
-  +o  Eugene Koontz
+  o  Eugene Koontz
 
-  +o  John H. Hartman
+  o  John H. Hartman
 
-  +o  Anders Karlsson
+  o  Anders Karlsson
 
-  +o  Daniel Phillips
+  o  Daniel Phillips
 
-  +o  John Fremlin
+  o  John Fremlin
 
-  +o  Rainer Burgstaller
+  o  Rainer Burgstaller
 
-  +o  James Stevenson
+  o  James Stevenson
 
-  +o  Matt Clay
+  o  Matt Clay
 
-  +o  Cliff Jefferies
+  o  Cliff Jefferies
 
-  +o  Geoff Hoff
+  o  Geoff Hoff
 
-  +o  Lennert Buytenhek
+  o  Lennert Buytenhek
 
-  +o  Al Viro
+  o  Al Viro
 
-  +o  Frank Klingenhoefer
+  o  Frank Klingenhoefer
 
-  +o  Livio Baldini Soares
+  o  Livio Baldini Soares
 
-  +o  Jon Burgess
+  o  Jon Burgess
 
-  +o  Petru Paler
+  o  Petru Paler
 
-  +o  Paul
+  o  Paul
 
-  +o  Chris Reahard
+  o  Chris Reahard
 
-  +o  Sverker Nilsson
+  o  Sverker Nilsson
 
-  +o  Gong Su
+  o  Gong Su
 
-  +o  johan verrept
+  o  johan verrept
 
-  +o  Bjorn Eriksson
+  o  Bjorn Eriksson
 
-  +o  Lorenzo Allegrucci
+  o  Lorenzo Allegrucci
 
-  +o  Muli Ben-Yehuda
+  o  Muli Ben-Yehuda
 
-  +o  David Mansfield
+  o  David Mansfield
 
-  +o  Howard Goff
+  o  Howard Goff
 
-  +o  Mike Anderson
+  o  Mike Anderson
 
-  +o  John Byrne
+  o  John Byrne
 
-  +o  Sapan J. Batia
+  o  Sapan J. Batia
 
-  +o  Iris Huang
+  o  Iris Huang
 
-  +o  Jan Hudec
+  o  Jan Hudec
 
-  +o  Voluspa
+  o  Voluspa
 
 
 
 
-  1155..33..  BBuugglleettss aanndd cclleeaann--uuppss
+  15.3.  Buglets and clean-ups
 
 
 
-  +o  Dave Zarzycki
+  o  Dave Zarzycki
 
-  +o  Adam Lazur
+  o  Adam Lazur
 
-  +o  Boria Feigin
+  o  Boria Feigin
 
-  +o  Brian J. Murrell
+  o  Brian J. Murrell
 
-  +o  JS
+  o  JS
 
-  +o  Roman Zippel
+  o  Roman Zippel
 
-  +o  Wil Cooley
+  o  Wil Cooley
 
-  +o  Ayelet Shemesh
+  o  Ayelet Shemesh
 
-  +o  Will Dyson
+  o  Will Dyson
 
-  +o  Sverker Nilsson
+  o  Sverker Nilsson
 
-  +o  dvorak
+  o  dvorak
 
-  +o  v.naga srinivas
+  o  v.naga srinivas
 
-  +o  Shlomi Fish
+  o  Shlomi Fish
 
-  +o  Roger Binns
+  o  Roger Binns
 
-  +o  johan verrept
+  o  johan verrept
 
-  +o  MrChuoi
+  o  MrChuoi
 
-  +o  Peter Cleve
+  o  Peter Cleve
 
-  +o  Vincent Guffens
+  o  Vincent Guffens
 
-  +o  Nathan Scott
+  o  Nathan Scott
 
-  +o  Patrick Caulfield
+  o  Patrick Caulfield
 
-  +o  jbearce
+  o  jbearce
 
-  +o  Catalin Marinas
+  o  Catalin Marinas
 
-  +o  Shane Spencer
+  o  Shane Spencer
 
-  +o  Zou Min
+  o  Zou Min
 
 
-  +o  Ryan Boder
+  o  Ryan Boder
 
-  +o  Lorenzo Colitti
+  o  Lorenzo Colitti
 
-  +o  Gwendal Grignou
+  o  Gwendal Grignou
 
-  +o  Andre' Breiler
+  o  Andre' Breiler
 
-  +o  Tsutomu Yasuda
+  o  Tsutomu Yasuda
 
 
 
-  1155..44..  CCaassee SSttuuddiieess
+  15.4.  Case Studies
 
 
-  +o  Jon Wright
+  o  Jon Wright
 
-  +o  William McEwan
+  o  William McEwan
 
-  +o  Michael Richardson
+  o  Michael Richardson
 
 
 
-  1155..55..  OOtthheerr ccoonnttrriibbuuttiioonnss
+  15.5.  Other contributions
 
 
   Bill Carr <Bill.Carr at compaq.com>  made the Red Hat mkrootfs script
diff --git a/MAINTAINERS b/MAINTAINERS
index c406f9b..479beaa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6683,7 +6683,6 @@
 
 UDF FILESYSTEM
 M:	Jan Kara <jack@suse.cz>
-W:	http://linux-udf.sourceforge.net
 S:	Maintained
 F:	Documentation/filesystems/udf.txt
 F:	fs/udf/
diff --git a/arch/um/Kconfig.char b/arch/um/Kconfig.char
index 70dabd1..b9d7c42 100644
--- a/arch/um/Kconfig.char
+++ b/arch/um/Kconfig.char
@@ -1,5 +1,4 @@
-
-menu "Character Devices"
+menu "UML Character Devices"
 
 config STDERR_CONSOLE
 	bool "stderr console"
@@ -105,92 +104,6 @@
           this if you expect the UML that you build to be run in environments
           which don't have a set of /dev/pty* devices.
 
-config UNIX98_PTYS
-	bool "Unix98 PTY support"
-	help
-	  A pseudo terminal (PTY) is a software device consisting of two
-	  halves: a master and a slave. The slave device behaves identical to
-	  a physical terminal; the master device is used by a process to
-	  read data from and write data to the slave, thereby emulating a
-	  terminal. Typical programs for the master side are telnet servers
-	  and xterms.
-
-	  Linux has traditionally used the BSD-like names /dev/ptyxx for
-	  masters and /dev/ttyxx for slaves of pseudo terminals. This scheme
-	  has a number of problems. The GNU C library glibc 2.1 and later,
-	  however, supports the Unix98 naming standard: in order to acquire a
-	  pseudo terminal, a process opens /dev/ptmx; the number of the pseudo
-	  terminal is then made available to the process and the pseudo
-	  terminal slave can be accessed as /dev/pts/<number>. What was
-	  traditionally /dev/ttyp2 will then be /dev/pts/2, for example.
-
-	  All modern Linux systems use the Unix98 ptys.  Say Y unless
-	  you're on an embedded system and want to conserve memory.
-
-config LEGACY_PTYS
-	bool "Legacy (BSD) PTY support"
-	default y
-	help
-	  A pseudo terminal (PTY) is a software device consisting of two
-	  halves: a master and a slave. The slave device behaves identical to
-	  a physical terminal; the master device is used by a process to
-	  read data from and write data to the slave, thereby emulating a
-	  terminal. Typical programs for the master side are telnet servers
-	  and xterms.
-
-	  Linux has traditionally used the BSD-like names /dev/ptyxx
-	  for masters and /dev/ttyxx for slaves of pseudo
-	  terminals. This scheme has a number of problems, including
-	  security.  This option enables these legacy devices; on most
-	  systems, it is safe to say N.
-
-config RAW_DRIVER
-        tristate "RAW driver (/dev/raw/rawN)"
-	depends on BLOCK
-        help
-          The raw driver permits block devices to be bound to /dev/raw/rawN.
-          Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O.
-          See the raw(8) manpage for more details.
-
-          Applications should preferably open the device (eg /dev/hda1)
-          with the O_DIRECT flag.
-
-config MAX_RAW_DEVS
-        int "Maximum number of RAW devices to support (1-8192)"
-        depends on RAW_DRIVER
-        default "256"
-        help
-          The maximum number of RAW devices that are supported.
-          Default is 256. Increase this number in case you need lots of
-          raw devices.
-
-config LEGACY_PTY_COUNT
-	int "Maximum number of legacy PTY in use"
-	depends on LEGACY_PTYS
-	default "256"
-	help
-	  The maximum number of legacy PTYs that can be used at any one time.
-	  The default is 256, and should be more than enough.  Embedded
-	  systems may want to reduce this to save memory.
-
-	  When not in use, each legacy PTY occupies 12 bytes on 32-bit
-	  architectures and 24 bytes on 64-bit architectures.
-
-config WATCHDOG
-	bool "Watchdog Timer Support"
-
-config WATCHDOG_NOWAYOUT
-	bool "Disable watchdog shutdown on close"
-	depends on WATCHDOG
-
-config SOFT_WATCHDOG
-	tristate "Software Watchdog"
-	depends on WATCHDOG
-
-config UML_WATCHDOG
-	tristate "UML watchdog"
-	depends on WATCHDOG
-
 config UML_SOUND
 	tristate "Sound support"
 	help
@@ -211,29 +124,4 @@
 	tristate
 	default UML_SOUND
 
-#It is selected elsewhere, so kconfig would warn without this.
-config HW_RANDOM
-	tristate
-	default n
-
-config UML_RANDOM
-	tristate "Hardware random number generator"
-	help
-	  This option enables UML's "hardware" random number generator.  It
-	  attaches itself to the host's /dev/random, supplying as much entropy
-	  as the host has, rather than the small amount the UML gets from its
-	  own drivers.  It registers itself as a standard hardware random number
-	  generator, major 10, minor 183, and the canonical device name is
-	  /dev/hwrng.
-	  The way to make use of this is to install the rng-tools package
-	  (check your distro, or download from
-	  http://sourceforge.net/projects/gkernel/).  rngd periodically reads
-	  /dev/hwrng and injects the entropy into /dev/random.
-
-config MMAPPER
-	tristate "iomem emulation driver"
-	help
-	  This driver allows a host file to be used as emulated IO memory inside
-	  UML.
-
 endmenu
diff --git a/arch/um/Kconfig.rest b/arch/um/Kconfig.rest
index 0ccad0f..567eb5f 100644
--- a/arch/um/Kconfig.rest
+++ b/arch/um/Kconfig.rest
@@ -2,20 +2,14 @@
 
 source "kernel/Kconfig.freezer"
 
-source "drivers/block/Kconfig"
-
 source "arch/um/Kconfig.char"
 
-source "drivers/base/Kconfig"
+source "drivers/Kconfig"
 
 source "net/Kconfig"
 
 source "arch/um/Kconfig.net"
 
-source "drivers/net/Kconfig"
-
-source "drivers/connector/Kconfig"
-
 source "fs/Kconfig"
 
 source "security/Kconfig"
@@ -24,19 +18,4 @@
 
 source "lib/Kconfig"
 
-source "drivers/scsi/Kconfig"
-
-source "drivers/md/Kconfig"
-
-if BROKEN
-	source "drivers/mtd/Kconfig"
-endif
-
-source "drivers/leds/Kconfig"
-
-#This is just to shut up some Kconfig warnings, so no prompt.
-config INPUT
-	tristate
-	default n
-
 source "arch/um/Kconfig.debug"
diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um
index b5e675e..70fd690 100644
--- a/arch/um/Kconfig.um
+++ b/arch/um/Kconfig.um
@@ -148,5 +148,11 @@
 	  be 1 << order pages.  The default is OK unless you're running Valgrind
 	  on UML, in which case, set this to 3.
 
+config MMAPPER
+	tristate "iomem emulation driver"
+	help
+	  This driver allows a host file to be used as emulated IO memory inside
+	  UML.
+
 config NO_DMA
 	def_bool y
diff --git a/arch/um/Makefile b/arch/um/Makefile
index c0f712c..7730af6 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -20,15 +20,27 @@
 
 MODE_INCLUDE	+= -I$(srctree)/$(ARCH_DIR)/include/shared/skas
 
+HEADER_ARCH 	:= $(SUBARCH)
+
+# Additional ARCH settings for x86
+ifeq ($(SUBARCH),i386)
+        HEADER_ARCH := x86
+endif
+ifeq ($(SUBARCH),x86_64)
+        HEADER_ARCH := x86
+endif
+
+HOST_DIR := arch/$(HEADER_ARCH)
+
 include $(srctree)/$(ARCH_DIR)/Makefile-skas
+include $(srctree)/$(HOST_DIR)/Makefile.um
+
+core-y += $(HOST_DIR)/um/
 
 SHARED_HEADERS	:= $(ARCH_DIR)/include/shared
 ARCH_INCLUDE	:= -I$(srctree)/$(SHARED_HEADERS)
-ARCH_INCLUDE	+= -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)/shared
-ifneq ($(KBUILD_SRC),)
-ARCH_INCLUDE	+= -I$(SHARED_HEADERS)
-endif
-KBUILD_CPPFLAGS += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)
+ARCH_INCLUDE	+= -I$(srctree)/$(HOST_DIR)/um/shared
+KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/um
 
 # -Dvmap=kernel_vmap prevents anything from referencing the libpcap.o symbol so
 # named - it's a common symbol in libpcap, so we get a binary which crashes.
@@ -47,14 +59,12 @@
 
 USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\
 	$(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \
-	$(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64
-
-include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH)
+	$(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 -idirafter include
 
 #This will adjust *FLAGS accordingly to the platform.
 include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
 
-KBUILD_CPPFLAGS += -I$(srctree)/arch/$(HEADER_ARCH)/include
+KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include
 
 # -Derrno=kernel_errno - This turns all kernel references to errno into
 # kernel_errno to separate them from the libc errno.  This allows -fno-common
@@ -84,10 +94,9 @@
   echo '		   find in the kernel root.'
 endef
 
-KBUILD_KCONFIG := arch/um/Kconfig.$(HEADER_ARCH)
+KBUILD_KCONFIG := $(HOST_DIR)/um/Kconfig
 
-archprepare: $(SHARED_HEADERS)/user_constants.h
-archprepare: $(SHARED_HEADERS)/kern_constants.h
+archprepare: include/generated/user_constants.h
 
 LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
 LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
@@ -118,9 +127,7 @@
 
 # When cleaning we don't include .config, so we don't include
 # TT or skas makefiles and don't clean skas_ptregs.h.
-CLEAN_FILES += linux x.i gmon.out \
-	$(SHARED_HEADERS)/user_constants.h \
-	$(SHARED_HEADERS)/kern_constants.h
+CLEAN_FILES += linux x.i gmon.out
 
 archclean:
 	@find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
@@ -128,8 +135,8 @@
 
 # Generated files
 
-$(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s: FORCE
-	$(Q)$(MAKE) $(build)=$(ARCH_DIR)/sys-$(SUBARCH) $@
+$(HOST_DIR)/um/user-offsets.s: FORCE
+	$(Q)$(MAKE) $(build)=$(HOST_DIR)/um $@
 
 define filechk_gen-asm-offsets
         (set -e; \
@@ -144,11 +151,7 @@
          echo ""; )
 endef
 
-$(SHARED_HEADERS)/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s
+include/generated/user_constants.h: $(HOST_DIR)/um/user-offsets.s
 	$(call filechk,gen-asm-offsets)
 
-$(SHARED_HEADERS)/kern_constants.h:
-	$(Q)mkdir -p $(dir $@)
-	$(Q)echo '#include "../../../../include/generated/asm-offsets.h"' >$@
-
-export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS HEADER_ARCH DEV_NULL_PATH
+export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS DEV_NULL_PATH
diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64
deleted file mode 100644
index a9cd7e7..0000000
--- a/arch/um/Makefile-x86_64
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright 2003 - 2004 Pathscale, Inc
-# Released under the GPL
-
-core-y += arch/um/sys-x86_64/ arch/x86/crypto/
-START := 0x60000000
-
-_extra_flags_ = -fno-builtin -m64
-
-KBUILD_CFLAGS += $(_extra_flags_)
-
-CHECKFLAGS  += -m64 -D__x86_64__
-KBUILD_AFLAGS += -m64
-LDFLAGS += -m elf_x86_64
-KBUILD_CPPFLAGS += -m64
-
-ELF_ARCH := i386:x86-64
-ELF_FORMAT := elf64-x86-64
-HEADER_ARCH := x86
-
-# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
-
-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
-LINK-y += -m64
-
-# Do unit-at-a-time unconditionally on x86_64, following the host
-KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
diff --git a/arch/um/include/shared/chan_kern.h b/arch/um/drivers/chan.h
similarity index 94%
rename from arch/um/include/shared/chan_kern.h
rename to arch/um/drivers/chan.h
index 1e65145..8df0fd90 100644
--- a/arch/um/include/shared/chan_kern.h
+++ b/arch/um/drivers/chan.h
@@ -6,9 +6,9 @@
 #ifndef __CHAN_KERN_H__
 #define __CHAN_KERN_H__
 
-#include "linux/tty.h"
-#include "linux/list.h"
-#include "linux/console.h"
+#include <linux/tty.h>
+#include <linux/list.h>
+#include <linux/console.h>
 #include "chan_user.h"
 #include "line.h"
 
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index d4191fe..420e2c8 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -6,7 +6,7 @@
 #include <linux/slab.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
-#include "chan_kern.h"
+#include "chan.h"
 #include "os.h"
 
 #ifdef CONFIG_NOCONFIG_CHAN
@@ -358,11 +358,11 @@
 	return 0;
 }
 
-static void free_one_chan(struct chan *chan, int delay_free_irq)
+static void free_one_chan(struct chan *chan)
 {
 	list_del(&chan->list);
 
-	close_one_chan(chan, delay_free_irq);
+	close_one_chan(chan, 0);
 
 	if (chan->ops->free != NULL)
 		(*chan->ops->free)(chan->data);
@@ -372,14 +372,14 @@
 	kfree(chan);
 }
 
-static void free_chan(struct list_head *chans, int delay_free_irq)
+static void free_chan(struct list_head *chans)
 {
 	struct list_head *ele, *next;
 	struct chan *chan;
 
 	list_for_each_safe(ele, next, chans) {
 		chan = list_entry(ele, struct chan, list);
-		free_one_chan(chan, delay_free_irq);
+		free_one_chan(chan);
 	}
 }
 
@@ -547,7 +547,7 @@
 	char *in, *out;
 
 	if (!list_empty(chans)) {
-		free_chan(chans, 0);
+		free_chan(chans);
 		INIT_LIST_HEAD(chans);
 	}
 
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index cfeb3f4..f180813 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -11,10 +11,8 @@
 #include <termios.h>
 #include <sys/ioctl.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 void generic_close(int fd, void *unused)
 {
@@ -283,7 +281,12 @@
 		return;
 
 	pid = tcgetpgrp(fd);
-	if (!is_skas_winch(pid, fd, tty) && (pid == -1)) {
+	if (is_skas_winch(pid, fd, tty)) {
+		register_winch_irq(-1, fd, -1, tty, 0);
+		return;
+	}
+
+	if (pid == -1) {
 		thread = winch_tramp(fd, tty, &thread_fd, &stack);
 		if (thread < 0)
 			return;
diff --git a/arch/um/include/shared/chan_user.h b/arch/um/drivers/chan_user.h
similarity index 100%
rename from arch/um/include/shared/chan_user.h
rename to arch/um/drivers/chan_user.h
diff --git a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h
index f5701fd..7f2ed0b 100644
--- a/arch/um/drivers/cow_sys.h
+++ b/arch/um/drivers/cow_sys.h
@@ -3,7 +3,6 @@
 
 #include "kern_util.h"
 #include "os.h"
-#include "user.h"
 #include "um_malloc.h"
 
 static inline void *cow_malloc(int size)
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
index f8e85e0..a4fd7bc 100644
--- a/arch/um/drivers/daemon_user.c
+++ b/arch/um/drivers/daemon_user.c
@@ -17,7 +17,6 @@
 #include "net_user.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 enum request_type { REQ_NEW_CONTROL };
 
diff --git a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c
index f5a981a..5b81d25 100644
--- a/arch/um/drivers/fd.c
+++ b/arch/um/drivers/fd.c
@@ -9,10 +9,8 @@
 #include <errno.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 struct fd_chan {
 	int fd;
diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c
index 84dce3f..0345d62 100644
--- a/arch/um/drivers/harddog_user.c
+++ b/arch/um/drivers/harddog_user.c
@@ -7,7 +7,6 @@
 #include <unistd.h>
 #include <errno.h>
 #include "os.h"
-#include "user.h"
 
 struct dog_data {
 	int stdin;
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 364c8a1..c1cf220 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -7,7 +7,7 @@
 #include "linux/kd.h"
 #include "linux/sched.h"
 #include "linux/slab.h"
-#include "chan_kern.h"
+#include "chan.h"
 #include "irq_kern.h"
 #include "irq_user.h"
 #include "kern_util.h"
diff --git a/arch/um/include/shared/line.h b/arch/um/drivers/line.h
similarity index 100%
rename from arch/um/include/shared/line.h
rename to arch/um/drivers/line.h
diff --git a/arch/um/include/shared/mconsole.h b/arch/um/drivers/mconsole.h
similarity index 100%
rename from arch/um/include/shared/mconsole.h
rename to arch/um/drivers/mconsole.h
diff --git a/arch/um/include/shared/mconsole_kern.h b/arch/um/drivers/mconsole_kern.h
similarity index 100%
rename from arch/um/include/shared/mconsole_kern.h
rename to arch/um/drivers/mconsole_kern.h
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index f8cf4c8..9920982 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -10,9 +10,7 @@
 #include <sys/socket.h>
 #include <sys/uio.h>
 #include <sys/un.h>
-#include "kern_constants.h"
 #include "mconsole.h"
-#include "user.h"
 
 static struct mconsole_command commands[] = {
 	/*
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
index 5201188..05090c3 100644
--- a/arch/um/drivers/net_user.c
+++ b/arch/um/drivers/net_user.c
@@ -12,10 +12,8 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include "net_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 int tap_open_common(void *dev, char *gate_addr)
 {
diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c
index 5f90358..702a75b 100644
--- a/arch/um/drivers/pcap_user.c
+++ b/arch/um/drivers/pcap_user.c
@@ -9,9 +9,7 @@
 #include <asm/types.h>
 #include "net_user.h"
 #include "pcap_user.h"
-#include "kern_constants.h"
 #include "um_malloc.h"
-#include "user.h"
 
 #define PCAP_FD(p) (*(int *)(p))
 
diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c
index b49bf56..7b010b7 100644
--- a/arch/um/drivers/port_user.c
+++ b/arch/um/drivers/port_user.c
@@ -10,11 +10,9 @@
 #include <unistd.h>
 #include <netinet/in.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "port.h"
 #include "um_malloc.h"
-#include "user.h"
 
 struct port_chan {
 	int raw;
diff --git a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c
index 1113911..cff2b75 100644
--- a/arch/um/drivers/pty.c
+++ b/arch/um/drivers/pty.c
@@ -12,10 +12,8 @@
 #include <termios.h>
 #include <sys/stat.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 struct pty_chan {
 	void (*announce)(char *dev_name, int dev);
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
index cbacfc4..932b4d6 100644
--- a/arch/um/drivers/slip_user.c
+++ b/arch/um/drivers/slip_user.c
@@ -11,12 +11,10 @@
 #include <string.h>
 #include <sys/termios.h>
 #include <sys/wait.h>
-#include "kern_constants.h"
 #include "net_user.h"
 #include "os.h"
 #include "slip.h"
 #include "um_malloc.h"
-#include "user.h"
 
 static int slip_user_init(void *data, void *dev)
 {
diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c
index a0ada8f..db4adb6 100644
--- a/arch/um/drivers/slirp_user.c
+++ b/arch/um/drivers/slirp_user.c
@@ -7,11 +7,9 @@
 #include <errno.h>
 #include <string.h>
 #include <sys/wait.h>
-#include "kern_constants.h"
 #include "net_user.h"
 #include "os.h"
 #include "slirp.h"
-#include "user.h"
 
 static int slirp_user_init(void *data, void *dev)
 {
diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
index f1786e6..9d8c20a 100644
--- a/arch/um/drivers/ssl.c
+++ b/arch/um/drivers/ssl.c
@@ -12,10 +12,8 @@
 #include "linux/console.h"
 #include "asm/termbits.h"
 #include "asm/irq.h"
-#include "line.h"
 #include "ssl.h"
-#include "chan_kern.h"
-#include "kern.h"
+#include "chan.h"
 #include "init.h"
 #include "irq_user.h"
 #include "mconsole_kern.h"
diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index 49266f6..088776f 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -20,8 +20,7 @@
 #include "asm/current.h"
 #include "asm/irq.h"
 #include "stdio_console.h"
-#include "line.h"
-#include "chan_kern.h"
+#include "chan.h"
 #include "irq_user.h"
 #include "mconsole_kern.h"
 #include "init.h"
diff --git a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c
index 495858a..a97391f 100644
--- a/arch/um/drivers/tty.c
+++ b/arch/um/drivers/tty.c
@@ -7,10 +7,8 @@
 #include <fcntl.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 struct tty_chan {
 	char *dev;
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 620f5b7..944453a 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -46,7 +46,6 @@
 #include "asm/tlbflush.h"
 #include "mem_user.h"
 #include "kern_util.h"
-#include "kern.h"
 #include "mconsole_kern.h"
 #include "init.h"
 #include "irq_user.h"
@@ -54,7 +53,6 @@
 #include "ubd_user.h"
 #include "os.h"
 #include "mem.h"
-#include "mem_kern.h"
 #include "cow.h"
 
 enum ubd_req { UBD_READ, UBD_WRITE };
@@ -513,8 +511,37 @@
 static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 {
 	char *file;
+	int fd;
+	int err;
 
-	file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
+	__u32 version;
+	__u32 align;
+	char *backing_file;
+	time_t mtime;
+	unsigned long long size;
+	int sector_size;
+	int bitmap_offset;
+
+	if (ubd_dev->file && ubd_dev->cow.file) {
+		file = ubd_dev->cow.file;
+
+		goto out;
+	}
+
+	fd = os_open_file(ubd_dev->file, global_openflags, 0);
+	if (fd < 0)
+		return fd;
+
+	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
+		&mtime, &size, &sector_size, &align, &bitmap_offset);
+	os_close_file(fd);
+
+	if(err == -EINVAL)
+		file = ubd_dev->file;
+	else
+		file = backing_file;
+
+out:
 	return os_file_size(file, size_out);
 }
 
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index b591bb9..007b94d 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -16,7 +16,6 @@
 #include <sys/mman.h>
 #include <sys/param.h>
 #include "asm/types.h"
-#include "user.h"
 #include "ubd_user.h"
 #include "os.h"
 #include "cow.h"
diff --git a/arch/um/include/shared/ubd_user.h b/arch/um/drivers/ubd_user.h
similarity index 100%
rename from arch/um/include/shared/ubd_user.h
rename to arch/um/drivers/ubd_user.h
diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c
index 59c56fd..010fa2d 100644
--- a/arch/um/drivers/umcast_user.c
+++ b/arch/um/drivers/umcast_user.c
@@ -15,11 +15,9 @@
 #include <unistd.h>
 #include <errno.h>
 #include <netinet/in.h>
-#include "kern_constants.h"
 #include "umcast.h"
 #include "net_user.h"
 #include "um_malloc.h"
-#include "user.h"
 
 static struct sockaddr_in *new_addr(char *addr, unsigned short port)
 {
diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c
index c5c4325..b8c2867 100644
--- a/arch/um/drivers/vde_user.c
+++ b/arch/um/drivers/vde_user.c
@@ -6,10 +6,8 @@
 #include <stddef.h>
 #include <errno.h>
 #include <libvdeplug.h>
-#include "kern_constants.h"
 #include "net_user.h"
 #include "um_malloc.h"
-#include "user.h"
 #include "vde.h"
 
 static int vde_user_init(void *data, void *dev)
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index 2e1de57..969110e 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -11,10 +11,8 @@
 #include <string.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 #include "xterm.h"
 
 struct xterm_chan {
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
new file mode 100644
index 0000000..451f451
--- /dev/null
+++ b/arch/um/include/asm/Kbuild
@@ -0,0 +1,3 @@
+generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h
+generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h
+generic-y += ftrace.h
diff --git a/arch/um/include/asm/bug.h b/arch/um/include/asm/bug.h
deleted file mode 100644
index 9e33b86..0000000
--- a/arch/um/include/asm/bug.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_BUG_H
-#define __UM_BUG_H
-
-#include <asm-generic/bug.h>
-
-#endif
diff --git a/arch/um/include/asm/checksum.h b/arch/um/include/asm/checksum.h
deleted file mode 100644
index 5b50136..0000000
--- a/arch/um/include/asm/checksum.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_CHECKSUM_H
-#define __UM_CHECKSUM_H
-
-#include "sysdep/checksum.h"
-
-#endif
diff --git a/arch/um/include/asm/cputime.h b/arch/um/include/asm/cputime.h
deleted file mode 100644
index c84acba..0000000
--- a/arch/um/include/asm/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_CPUTIME_H
-#define __UM_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __UM_CPUTIME_H */
diff --git a/arch/um/include/asm/device.h b/arch/um/include/asm/device.h
deleted file mode 100644
index d8f9872..0000000
--- a/arch/um/include/asm/device.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
diff --git a/arch/um/include/asm/emergency-restart.h b/arch/um/include/asm/emergency-restart.h
deleted file mode 100644
index 108d8c4..0000000
--- a/arch/um/include/asm/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/um/include/asm/ftrace.h b/arch/um/include/asm/ftrace.h
deleted file mode 100644
index 40a8c17..0000000
--- a/arch/um/include/asm/ftrace.h
+++ /dev/null
@@ -1 +0,0 @@
-/* empty */
diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h
deleted file mode 100644
index 6a332a9..0000000
--- a/arch/um/include/asm/futex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#include <asm-generic/futex.h>
-
-#endif
diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
deleted file mode 100644
index fb3c05a..0000000
--- a/arch/um/include/asm/hardirq.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/hardirq.h>
diff --git a/arch/um/include/asm/hw_irq.h b/arch/um/include/asm/hw_irq.h
deleted file mode 100644
index 1cf84cf..0000000
--- a/arch/um/include/asm/hw_irq.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_UM_HW_IRQ_H
-#define _ASM_UM_HW_IRQ_H
-
-#include "asm/irq.h"
-#include "asm/archparam.h"
-
-#endif
diff --git a/arch/um/include/asm/irq_regs.h b/arch/um/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b..0000000
--- a/arch/um/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h
index 659b9ab..c780d8a 100644
--- a/arch/um/include/asm/irqflags.h
+++ b/arch/um/include/asm/irqflags.h
@@ -1,6 +1,42 @@
 #ifndef __UM_IRQFLAGS_H
 #define __UM_IRQFLAGS_H
 
-/* Empty for now */
+extern int get_signals(void);
+extern int set_signals(int enable);
+extern void block_signals(void);
+extern void unblock_signals(void);
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return get_signals();
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	set_signals(flags);
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	unblock_signals();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	block_signals();
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+	flags = arch_local_save_flags();
+	arch_local_irq_disable();
+	return flags;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_local_save_flags() == 0;
+}
 
 #endif
diff --git a/arch/um/include/asm/kdebug.h b/arch/um/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b0..0000000
--- a/arch/um/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index cf259de..30509b9 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -1,12 +1,24 @@
 /* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#ifndef __MMU_H
-#define __MMU_H
+#ifndef __ARCH_UM_MMU_H
+#define __ARCH_UM_MMU_H
 
-#include "um_mmu.h"
+#include "mm_id.h"
+#include <asm/mm_context.h>
+
+typedef struct mm_context {
+	struct mm_id id;
+	struct uml_arch_mm_context arch;
+	struct page **stub_pages;
+} mm_context_t;
+
+extern void __switch_mm(struct mm_id * mm_idp);
+
+/* Avoid tangled inclusion with asm/ldt.h */
+extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
+extern void free_ldt(struct mm_context *mm);
 
 #endif
-
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index 34d8130..591b3d8 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -6,15 +6,12 @@
 #ifndef __UM_MMU_CONTEXT_H
 #define __UM_MMU_CONTEXT_H
 
-#include "linux/sched.h"
-#include "um_mmu.h"
+#include <linux/sched.h>
+#include <asm/mmu.h>
 
 extern void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
 extern void arch_exit_mmap(struct mm_struct *mm);
 
-#define get_mmu_context(task) do ; while(0)
-#define activate_context(tsk) do ; while(0)
-
 #define deactivate_mm(tsk,mm)	do { } while (0)
 
 extern void force_flush_all(void);
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 4cc9b6c..7cfc3ce 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -19,7 +19,7 @@
 struct page;
 
 #include <linux/types.h>
-#include <sysdep/vm-flags.h>
+#include <asm/vm-flags.h>
 
 /*
  * These are used to make use of C type-checking..
diff --git a/arch/um/include/asm/page_offset.h b/arch/um/include/asm/page_offset.h
deleted file mode 100644
index 1c168df..0000000
--- a/arch/um/include/asm/page_offset.h
+++ /dev/null
@@ -1 +0,0 @@
-#define PAGE_OFFSET_RAW (uml_physmem)
diff --git a/arch/um/include/asm/pda.h b/arch/um/include/asm/pda.h
deleted file mode 100644
index ddcd774..0000000
--- a/arch/um/include/asm/pda.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_PDA_X86_64_H
-#define __UM_PDA_X86_64_H
-
-/* XXX */
-struct foo {
-	unsigned int __softirq_pending;
-	unsigned int __nmi_count;
-};
-
-extern struct foo me;
-
-#define read_pda(me) (&me)
-
-#endif
-
diff --git a/arch/um/include/asm/percpu.h b/arch/um/include/asm/percpu.h
deleted file mode 100644
index efe7508..0000000
--- a/arch/um/include/asm/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_PERCPU_H
-#define __UM_PERCPU_H
-
-#include <asm-generic/percpu.h>
-
-#endif /* __UM_PERCPU_H */
diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h
index 1a7d275..f605d3c 100644
--- a/arch/um/include/asm/ptrace-generic.h
+++ b/arch/um/include/asm/ptrace-generic.h
@@ -23,17 +23,10 @@
 #define PT_REGS_IP(r) UPT_IP(&(r)->regs)
 #define PT_REGS_SP(r) UPT_SP(&(r)->regs)
 
-#define PT_REG(r, reg) UPT_REG(&(r)->regs, reg)
-#define PT_REGS_SET(r, reg, val) UPT_SET(&(r)->regs, reg, val)
-
-#define PT_REGS_SET_SYSCALL_RETURN(r, res) \
-	UPT_SET_SYSCALL_RETURN(&(r)->regs, res)
 #define PT_REGS_RESTART_SYSCALL(r) UPT_RESTART_SYSCALL(&(r)->regs)
 
 #define PT_REGS_SYSCALL_NR(r) UPT_SYSCALL_NR(&(r)->regs)
 
-#define PT_REGS_SC(r) UPT_SC(&(r)->regs)
-
 #define instruction_pointer(regs) PT_REGS_IP(regs)
 
 struct task_struct;
diff --git a/arch/um/include/asm/sections.h b/arch/um/include/asm/sections.h
deleted file mode 100644
index 6b0231e..0000000
--- a/arch/um/include/asm/sections.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _UM_SECTIONS_H
-#define _UM_SECTIONS_H
-
-/* nothing to see, move along */
-#include <asm-generic/sections.h>
-
-#endif
diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h
deleted file mode 100644
index 68a90ec..0000000
--- a/arch/um/include/asm/system.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef __UM_SYSTEM_GENERIC_H
-#define __UM_SYSTEM_GENERIC_H
-
-#include "sysdep/system.h"
-
-extern int get_signals(void);
-extern int set_signals(int enable);
-extern void block_signals(void);
-extern void unblock_signals(void);
-
-static inline unsigned long arch_local_save_flags(void)
-{
-	return get_signals();
-}
-
-static inline void arch_local_irq_restore(unsigned long flags)
-{
-	set_signals(flags);
-}
-
-static inline void arch_local_irq_enable(void)
-{
-	unblock_signals();
-}
-
-static inline void arch_local_irq_disable(void)
-{
-	block_signals();
-}
-
-static inline unsigned long arch_local_irq_save(void)
-{
-	unsigned long flags;
-	flags = arch_local_save_flags();
-	arch_local_irq_disable();
-	return flags;
-}
-
-static inline bool arch_irqs_disabled(void)
-{
-	return arch_local_save_flags() == 0;
-}
-
-extern void *_switch_to(void *prev, void *next, void *last);
-#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
-
-#endif
diff --git a/arch/um/include/asm/topology.h b/arch/um/include/asm/topology.h
deleted file mode 100644
index 0905e4f..0000000
--- a/arch/um/include/asm/topology.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_UM_TOPOLOGY_H
-#define _ASM_UM_TOPOLOGY_H
-
-#include <asm-generic/topology.h>
-
-#endif
diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h
index b9a895d..3f22fbf 100644
--- a/arch/um/include/asm/uaccess.h
+++ b/arch/um/include/asm/uaccess.h
@@ -6,15 +6,15 @@
 #ifndef __UM_UACCESS_H
 #define __UM_UACCESS_H
 
-#include <asm/errno.h>
-#include <asm/processor.h>
-
 /* thread_info has a mm_segment_t in it, so put the definition up here */
 typedef struct {
 	unsigned long seg;
 } mm_segment_t;
 
-#include "linux/thread_info.h"
+#include <linux/thread_info.h>
+#include <linux/errno.h>
+#include <asm/processor.h>
+#include <asm/elf.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
@@ -38,7 +38,86 @@
 
 #define segment_eq(a, b) ((a).seg == (b).seg)
 
-#include "um_uaccess.h"
+#define __under_task_size(addr, size) \
+	(((unsigned long) (addr) < TASK_SIZE) && \
+	 (((unsigned long) (addr) + (size)) < TASK_SIZE))
+
+#define __access_ok_vsyscall(type, addr, size) \
+	 ((type == VERIFY_READ) && \
+	  ((unsigned long) (addr) >= FIXADDR_USER_START) && \
+	  ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \
+	  ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))
+
+#define __addr_range_nowrap(addr, size) \
+	((unsigned long) (addr) <= ((unsigned long) (addr) + (size)))
+
+#define access_ok(type, addr, size) \
+	(__addr_range_nowrap(addr, size) && \
+	 (__under_task_size(addr, size) || \
+	  __access_ok_vsyscall(type, addr, size) || \
+	  segment_eq(get_fs(), KERNEL_DS)))
+
+extern int copy_from_user(void *to, const void __user *from, int n);
+extern int copy_to_user(void __user *to, const void *from, int n);
+
+/*
+ * strncpy_from_user: - Copy a NUL terminated string from userspace.
+ * @dst:   Destination address, in kernel space.  This buffer must be at
+ *         least @count bytes long.
+ * @src:   Source address, in user space.
+ * @count: Maximum number of bytes to copy, including the trailing NUL.
+ *
+ * Copies a NUL-terminated string from userspace to kernel space.
+ *
+ * On success, returns the length of the string (not including the trailing
+ * NUL).
+ *
+ * If access to userspace fails, returns -EFAULT (some data may have been
+ * copied).
+ *
+ * If @count is smaller than the length of the string, copies @count bytes
+ * and returns @count.
+ */
+
+extern int strncpy_from_user(char *dst, const char __user *src, int count);
+
+/*
+ * __clear_user: - Zero a block of memory in user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @n:    Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+extern int __clear_user(void __user *mem, int len);
+
+/*
+ * clear_user: - Zero a block of memory in user space.
+ * @to:   Destination address, in user space.
+ * @n:    Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+extern int clear_user(void __user *mem, int len);
+
+/*
+ * strlen_user: - Get the size of a string in user space.
+ * @str: The string to measure.
+ * @n:   The maximum valid length
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ * If the string is too long, returns a value greater than @n.
+ */
+extern int strnlen_user(const void __user *str, int len);
 
 #define __copy_from_user(to, from, n) copy_from_user(to, from, n)
 
diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h
deleted file mode 100644
index a19db3e..0000000
--- a/arch/um/include/asm/xor.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_XOR_H
-#define __UM_XOR_H
-
-#include "asm-generic/xor.h"
-
-#endif
diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h
index a92b678..896e166 100644
--- a/arch/um/include/shared/as-layout.h
+++ b/arch/um/include/shared/as-layout.h
@@ -6,7 +6,7 @@
 #ifndef __START_H__
 #define __START_H__
 
-#include "kern_constants.h"
+#include <generated/asm-offsets.h>
 
 /*
  * Stolen from linux/const.h, which can't be directly included since
diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 72009c7..d7fe563 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -2,7 +2,6 @@
 
 DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
 
-OFFSET(HOST_TASK_REGS, task_struct, thread.regs);
 OFFSET(HOST_TASK_PID, task_struct, pid);
 
 DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE);
diff --git a/arch/um/include/shared/initrd.h b/arch/um/include/shared/initrd.h
deleted file mode 100644
index 22673bc..0000000
--- a/arch/um/include/shared/initrd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __INITRD_USER_H__
-#define __INITRD_USER_H__
-
-extern int load_initrd(char *filename, void *buf, int size);
-
-#endif
-
diff --git a/arch/um/include/shared/kern.h b/arch/um/include/shared/kern.h
index 4ce3fc6..6cd0124 100644
--- a/arch/um/include/shared/kern.h
+++ b/arch/um/include/shared/kern.h
@@ -13,28 +13,10 @@
  * includes.
  */
 
-extern int errno;
-
-extern int clone(int (*proc)(void *), void *sp, int flags, void *data);
-extern int sleep(int);
 extern int printf(const char *fmt, ...);
-extern char *strerror(int errnum);
-extern char *ptsname(int __fd);
-extern int munmap(void *, int);
 extern void *sbrk(int increment);
-extern void *malloc(int size);
-extern void perror(char *err);
-extern int kill(int pid, int sig);
-extern int getuid(void);
-extern int getgid(void);
 extern int pause(void);
-extern int write(int, const void *, int);
 extern void exit(int);
-extern int close(int);
-extern int read(unsigned int, char *, int);
-extern int pipe(int *);
-extern int sched_yield(void);
-extern int ptrace(int op, int pid, long addr, long data);
 
 #endif
 
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index 3c34122..0f14838 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -21,7 +21,6 @@
 extern void free_stack(unsigned long stack, int order);
 
 extern int do_signal(void);
-extern void copy_sc(struct uml_pt_regs *regs, void *from);
 extern void interrupt_end(void);
 extern void relay_signal(int sig, struct uml_pt_regs *regs);
 
diff --git a/arch/um/include/shared/ldt.h b/arch/um/include/shared/ldt.h
deleted file mode 100644
index a7f999a..0000000
--- a/arch/um/include/shared/ldt.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
- * Licensed under the GPL
- *
- * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
- */
-
-#ifndef __ASM_LDT_H
-#define __ASM_LDT_H
-
-#include <linux/mutex.h>
-#include <sysdep/host_ldt.h>
-
-extern void ldt_host_info(void);
-
-#define LDT_PAGES_MAX \
-	((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
-#define LDT_ENTRIES_PER_PAGE \
-	(PAGE_SIZE/LDT_ENTRY_SIZE)
-#define LDT_DIRECT_ENTRIES \
-	((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
-
-struct ldt_entry {
-	__u32 a;
-	__u32 b;
-};
-
-typedef struct uml_ldt {
-	int entry_count;
-	struct mutex lock;
-	union {
-		struct ldt_entry * pages[LDT_PAGES_MAX];
-		struct ldt_entry entries[LDT_DIRECT_ENTRIES];
-	} u;
-} uml_ldt_t;
-
-#endif
diff --git a/arch/um/include/shared/mem_kern.h b/arch/um/include/shared/mem_kern.h
deleted file mode 100644
index 69be0fd..0000000
--- a/arch/um/include/shared/mem_kern.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#ifndef __MEM_KERN_H__
-#define __MEM_KERN_H__
-
-#include "linux/list.h"
-#include "linux/types.h"
-
-struct remapper {
-	struct list_head list;
-	int (*proc)(int, unsigned long, int, __u64);
-};
-
-extern void register_remapper(struct remapper *info);
-
-#endif
-
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 83c7c2e..89b686c1 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -10,7 +10,6 @@
 #include "irq_user.h"
 #include "longjmp.h"
 #include "mm_id.h"
-#include "sysdep/tls.h"
 
 #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
 
@@ -203,12 +202,6 @@
 extern int can_drop_memory(void);
 extern void os_flush_stdout(void);
 
-/* uaccess.c */
-extern unsigned long __do_user_copy(void *to, const void *from, int n,
-				    void **fault_addr, jmp_buf **fault_catcher,
-				    void (*op)(void *to, const void *from,
-					       int n), int *faulted_out);
-
 /* execvp.c */
 extern int execvp_noalloc(char *buf, const char *file, char *const argv[]);
 /* helper.c */
@@ -218,10 +211,6 @@
 extern int helper_wait(int pid);
 
 
-/* tls.c */
-extern int os_set_thread_area(user_desc_t *info, int pid);
-extern int os_get_thread_area(user_desc_t *info, int pid);
-
 /* umid.c */
 extern int umid_file_name(char *name, char *buf, int len);
 extern int set_umid(char *name);
@@ -231,7 +220,7 @@
 extern void timer_init(void);
 extern void set_sigstack(void *sig_stack, int size);
 extern void remove_sigstack(void);
-extern void set_handler(int sig, void (*handler)(int), int flags, ...);
+extern void set_handler(int sig);
 extern int change_sig(int signal, int on);
 extern void block_signals(void);
 extern void unblock_signals(void);
diff --git a/arch/um/include/shared/process.h b/arch/um/include/shared/process.h
deleted file mode 100644
index bb873a5..0000000
--- a/arch/um/include/shared/process.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __PROCESS_H__
-#define __PROCESS_H__
-
-#include <signal.h>
-
-/* Copied from linux/compiler-gcc.h since we can't include it directly */
-#define barrier() __asm__ __volatile__("": : :"memory")
-
-extern void sig_handler(int sig, struct sigcontext *sc);
-extern void alarm_handler(int sig, struct sigcontext *sc);
-
-#endif
diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h
index 7fd8539..56b2f28 100644
--- a/arch/um/include/shared/ptrace_user.h
+++ b/arch/um/include/shared/ptrace_user.h
@@ -6,7 +6,8 @@
 #ifndef __PTRACE_USER_H__
 #define __PTRACE_USER_H__
 
-#include "sysdep/ptrace_user.h"
+#include <sys/ptrace.h>
+#include <sysdep/ptrace_user.h>
 
 extern int ptrace_getregs(long pid, unsigned long *regs_out);
 extern int ptrace_setregs(long pid, unsigned long *regs_in);
diff --git a/arch/um/include/shared/skas_ptregs.h b/arch/um/include/shared/skas_ptregs.h
deleted file mode 100644
index 73db19e..0000000
--- a/arch/um/include/shared/skas_ptregs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __SKAS_PT_REGS_
-#define __SKAS_PT_REGS_
-
-#include <user_constants.h>
-
-#endif
diff --git a/arch/um/include/shared/syscall.h b/arch/um/include/shared/syscall.h
deleted file mode 100644
index dda1df9..0000000
--- a/arch/um/include/shared/syscall.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSCALL_USER_H
-#define __SYSCALL_USER_H
-
-extern int record_syscall_start(int syscall);
-extern void record_syscall_end(int index, long result);
-
-#endif
diff --git a/arch/um/include/shared/task.h b/arch/um/include/shared/task.h
deleted file mode 100644
index 3fe726b..0000000
--- a/arch/um/include/shared/task.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __TASK_H
-#define __TASK_H
-
-#include <kern_constants.h>
-
-#define TASK_REGS(task) ((struct uml_pt_regs *) &(((char *) (task))[HOST_TASK_REGS]))
-#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
-
-#endif
diff --git a/arch/um/include/shared/tlb.h b/arch/um/include/shared/tlb.h
deleted file mode 100644
index ecd2265..0000000
--- a/arch/um/include/shared/tlb.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __TLB_H__
-#define __TLB_H__
-
-#include "um_mmu.h"
-
-extern void force_flush_all(void);
-extern int flush_tlb_kernel_range_common(unsigned long start,
-					 unsigned long end);
-
-#endif
diff --git a/arch/um/include/shared/um_malloc.h b/arch/um/include/shared/um_malloc.h
index c554d70..6395fef 100644
--- a/arch/um/include/shared/um_malloc.h
+++ b/arch/um/include/shared/um_malloc.h
@@ -6,7 +6,7 @@
 #ifndef __UM_MALLOC_H__
 #define __UM_MALLOC_H__
 
-#include "kern_constants.h"
+#include <generated/asm-offsets.h>
 
 extern void *uml_kmalloc(int size, int flags);
 extern void kfree(const void *ptr);
diff --git a/arch/um/include/shared/um_mmu.h b/arch/um/include/shared/um_mmu.h
deleted file mode 100644
index b1a7e47..0000000
--- a/arch/um/include/shared/um_mmu.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* 
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __ARCH_UM_MMU_H
-#define __ARCH_UM_MMU_H
-
-#include "mm_id.h"
-#include "ldt.h"
-
-typedef struct mm_context {
-	struct mm_id id;
-	struct uml_ldt ldt;
-	struct page **stub_pages;
-} mm_context_t;
-
-extern void __switch_mm(struct mm_id * mm_idp);
-
-/* Avoid tangled inclusion with asm/ldt.h */
-extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
-extern void free_ldt(struct mm_context *mm);
-
-#endif
diff --git a/arch/um/include/shared/um_uaccess.h b/arch/um/include/shared/um_uaccess.h
deleted file mode 100644
index 45c0499..0000000
--- a/arch/um/include/shared/um_uaccess.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* 
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __ARCH_UM_UACCESS_H
-#define __ARCH_UM_UACCESS_H
-
-#include <asm/elf.h>
-#include <asm/fixmap.h>
-#include "sysdep/archsetjmp.h"
-
-#define __under_task_size(addr, size) \
-	(((unsigned long) (addr) < TASK_SIZE) && \
-	 (((unsigned long) (addr) + (size)) < TASK_SIZE))
-
-#define __access_ok_vsyscall(type, addr, size) \
-	 ((type == VERIFY_READ) && \
-	  ((unsigned long) (addr) >= FIXADDR_USER_START) && \
-	  ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \
-	  ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))
-
-#define __addr_range_nowrap(addr, size) \
-	((unsigned long) (addr) <= ((unsigned long) (addr) + (size)))
-
-#define access_ok(type, addr, size) \
-	(__addr_range_nowrap(addr, size) && \
-	 (__under_task_size(addr, size) || \
-	  __access_ok_vsyscall(type, addr, size) || \
-	  segment_eq(get_fs(), KERNEL_DS)))
-
-extern int copy_from_user(void *to, const void __user *from, int n);
-extern int copy_to_user(void __user *to, const void *from, int n);
-
-extern int __do_copy_to_user(void *to, const void *from, int n,
-			     void **fault_addr, jmp_buf **fault_catcher);
-
-/*
- * strncpy_from_user: - Copy a NUL terminated string from userspace.
- * @dst:   Destination address, in kernel space.  This buffer must be at
- *         least @count bytes long.
- * @src:   Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-
-extern int strncpy_from_user(char *dst, const char __user *src, int count);
-
-/*
- * __clear_user: - Zero a block of memory in user space, with less checking.
- * @to:   Destination address, in user space.
- * @n:    Number of bytes to zero.
- *
- * Zero a block of memory in user space.  Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be cleared.
- * On success, this will be zero.
- */
-extern int __clear_user(void __user *mem, int len);
-
-/*
- * clear_user: - Zero a block of memory in user space.
- * @to:   Destination address, in user space.
- * @n:    Number of bytes to zero.
- *
- * Zero a block of memory in user space.
- *
- * Returns number of bytes that could not be cleared.
- * On success, this will be zero.
- */
-extern int clear_user(void __user *mem, int len);
-
-/*
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- * @n:   The maximum valid length
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- * If the string is too long, returns a value greater than @n.
- */
-extern int strnlen_user(const void __user *str, int len);
-
-#endif
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index 293f7c7..4fa82c0 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -6,7 +6,7 @@
 #ifndef __USER_H__
 #define __USER_H__
 
-#include "kern_constants.h"
+#include <generated/asm-offsets.h>
 
 /*
  * The usual definition - copied here because the kernel provides its own,
@@ -36,10 +36,11 @@
 }
 #endif
 
-extern void schedule(void);
 extern int in_aton(char *str);
-extern int open_gdb_chan(void);
 extern size_t strlcpy(char *, const char *, size_t);
 extern size_t strlcat(char *, const char *, size_t);
 
+/* Copied from linux/compiler-gcc.h since we can't include it directly */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
 #endif
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index c4491c1..bc49474 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -11,7 +11,7 @@
 
 obj-y = config.o exec.o exitcode.o init_task.o irq.o ksyms.o mem.o \
 	physmem.o process.o ptrace.o reboot.o sigio.o \
-	signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o uaccess.o \
+	signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o \
 	um_arch.o umid.o skas/
 
 obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 939a4a6..6cade93 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -3,14 +3,15 @@
  * Licensed under the GPL
  */
 
-#include "linux/stddef.h"
-#include "linux/fs.h"
-#include "linux/ptrace.h"
-#include "linux/sched.h"
-#include "linux/slab.h"
-#include "asm/current.h"
-#include "asm/processor.h"
-#include "asm/uaccess.h"
+#include <linux/stddef.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
 #include "as-layout.h"
 #include "mem_user.h"
 #include "skas.h"
@@ -41,6 +42,7 @@
 	PT_REGS_IP(regs) = eip;
 	PT_REGS_SP(regs) = esp;
 }
+EXPORT_SYMBOL(start_thread);
 
 static long execve1(const char *file,
 		    const char __user *const __user *argv,
diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c
index 72eccd2..e9bcf247 100644
--- a/arch/um/kernel/gmon_syms.c
+++ b/arch/um/kernel/gmon_syms.c
@@ -7,18 +7,3 @@
 
 extern void __bb_init_func(void *)  __attribute__((weak));
 EXPORT_SYMBOL(__bb_init_func);
-
-/*
- * This is defined (and referred to in profiling stub code) only by some GCC
- * versions in libgcov.
- *
- * Since SuSE backported the fix, we cannot handle it depending on GCC version.
- * So, unconditionally export it. But also give it a weak declaration, which
- * will be overridden by any other one.
- */
-
-extern void __gcov_init(void *) __attribute__((weak));
-EXPORT_SYMBOL(__gcov_init);
-
-extern void __gcov_merge_add(void *) __attribute__((weak));
-EXPORT_SYMBOL(__gcov_merge_add);
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index d386c75..10cc18f 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -7,12 +7,12 @@
 #include "linux/bootmem.h"
 #include "linux/initrd.h"
 #include "asm/types.h"
-#include "initrd.h"
 #include "init.h"
 #include "os.h"
 
 /* Changed by uml_initrd_setup, which is a setup */
 static char *initrd __initdata = NULL;
+static int load_initrd(char *filename, void *buf, int size);
 
 static int __init read_initrd(void)
 {
@@ -62,7 +62,7 @@
 "    name of the file containing the image.\n\n"
 );
 
-int load_initrd(char *filename, void *buf, int size)
+static int load_initrd(char *filename, void *buf, int size)
 {
 	int fd, n;
 
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 9e485c7..71b8c94 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -258,6 +258,7 @@
 
 	ignore_sigio_fd(fd);
 }
+EXPORT_SYMBOL(deactivate_fd);
 
 /*
  * Called just before shutdown in order to provide a clean exec
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 0ae0dfc..e17bea0 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -3,33 +3,11 @@
  * Licensed under the GPL
  */
 
-#include "linux/module.h"
-#include "linux/syscalls.h"
-#include "asm/tlbflush.h"
-#include "asm/uaccess.h"
-#include "as-layout.h"
-#include "kern_util.h"
-#include "mem_user.h"
+#include <linux/module.h>
 #include "os.h"
 
-EXPORT_SYMBOL(uml_physmem);
 EXPORT_SYMBOL(set_signals);
 EXPORT_SYMBOL(get_signals);
-EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(sys_waitpid);
-EXPORT_SYMBOL(flush_tlb_range);
-
-EXPORT_SYMBOL(high_physmem);
-EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(handle_page_fault);
-EXPORT_SYMBOL(find_iomem);
-
-EXPORT_SYMBOL(strnlen_user);
-EXPORT_SYMBOL(strncpy_from_user);
-EXPORT_SYMBOL(copy_to_user);
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(clear_user);
-EXPORT_SYMBOL(uml_strdup);
 
 EXPORT_SYMBOL(os_stat_fd);
 EXPORT_SYMBOL(os_stat_file);
@@ -57,24 +35,10 @@
 EXPORT_SYMBOL(os_accept_connection);
 EXPORT_SYMBOL(os_rcv_fd);
 EXPORT_SYMBOL(run_helper);
-EXPORT_SYMBOL(start_thread);
 EXPORT_SYMBOL(os_major);
 EXPORT_SYMBOL(os_minor);
 EXPORT_SYMBOL(os_makedev);
 
 EXPORT_SYMBOL(add_sigio_fd);
 EXPORT_SYMBOL(ignore_sigio_fd);
-EXPORT_SYMBOL(deactivate_fd);
 EXPORT_SYMBOL(sigio_broken);
-
-#ifdef CONFIG_SMP
-
-/* required for SMP */
-
-extern void __write_lock_failed(rwlock_t *rw);
-EXPORT_SYMBOL(__write_lock_failed);
-
-extern void __read_lock_failed(rwlock_t *rw);
-EXPORT_SYMBOL(__read_lock_failed);
-
-#endif
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 8137ccc..ebb86b2 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/stddef.h>
+#include <linux/module.h>
 #include <linux/bootmem.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
@@ -20,6 +21,7 @@
 
 /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
 unsigned long *empty_zero_page = NULL;
+EXPORT_SYMBOL(empty_zero_page);
 /* allocated in paging_init and unchanged thereafter */
 static unsigned long *empty_bad_page = NULL;
 
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index a1a9090..f116db1 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -3,20 +3,22 @@
  * Licensed under the GPL
  */
 
-#include "linux/bootmem.h"
-#include "linux/mm.h"
-#include "linux/pfn.h"
-#include "asm/page.h"
-#include "as-layout.h"
-#include "init.h"
-#include "kern.h"
-#include "mem_user.h"
-#include "os.h"
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/pfn.h>
+#include <asm/page.h>
+#include <as-layout.h>
+#include <init.h>
+#include <kern.h>
+#include <mem_user.h>
+#include <os.h>
 
 static int physmem_fd = -1;
 
 /* Changed during early boot */
 unsigned long high_physmem;
+EXPORT_SYMBOL(high_physmem);
 
 extern unsigned long long physmem_size;
 
@@ -184,6 +186,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(find_iomem);
 
 static int setup_iomem(void)
 {
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 21c1ae7..c533835 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -20,12 +20,12 @@
 #include <linux/threads.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
+#include <asm/mmu_context.h>
 #include <asm/uaccess.h>
 #include "as-layout.h"
 #include "kern_util.h"
 #include "os.h"
 #include "skas.h"
-#include "tlb.h"
 
 /*
  * This is a per-cpu array.  A processor only modifies its entry and it only
@@ -78,6 +78,7 @@
 		      &current->thread.regs, 0, NULL, NULL);
 	return pid;
 }
+EXPORT_SYMBOL(kernel_thread);
 
 static inline void set_current(struct task_struct *task)
 {
@@ -286,6 +287,7 @@
 {
 	return kstrdup(string, GFP_KERNEL);
 }
+EXPORT_SYMBOL(uml_strdup);
 
 int copy_to_user_proc(void __user *to, void *from, int size)
 {
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index b5c094c..e8b889d 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -11,7 +11,6 @@
 #include <asm/unistd.h>
 #include "frame_kern.h"
 #include "kern_util.h"
-#include <sysdep/sigcontext.h>
 
 EXPORT_SYMBOL(block_signals);
 EXPORT_SYMBOL(unblock_signals);
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index 2c8583c..e1fd066 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -8,7 +8,6 @@
 #include <asm/unistd.h>
 #include <sys/time.h>
 #include "as-layout.h"
-#include "kern_constants.h"
 #include "ptrace_user.h"
 #include "stub-data.h"
 #include "sysdep/stub.h"
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 6966342..9fefd92 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -6,6 +6,7 @@
 #include <linux/err.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <asm/current.h>
 #include <asm/page.h>
@@ -149,6 +150,7 @@
 	       buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to):
 	       n;
 }
+EXPORT_SYMBOL(copy_from_user);
 
 static int copy_chunk_to_user(unsigned long to, int len, void *arg)
 {
@@ -170,6 +172,7 @@
 	       buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) :
 	       n;
 }
+EXPORT_SYMBOL(copy_to_user);
 
 static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
 {
@@ -204,6 +207,7 @@
 		return -EFAULT;
 	return strnlen(dst, count);
 }
+EXPORT_SYMBOL(strncpy_from_user);
 
 static int clear_chunk(unsigned long addr, int len, void *unused)
 {
@@ -226,6 +230,7 @@
 	return access_ok(VERIFY_WRITE, mem, len) ?
 	       buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len;
 }
+EXPORT_SYMBOL(clear_user);
 
 static int strnlen_chunk(unsigned long str, int len, void *arg)
 {
@@ -251,3 +256,4 @@
 		return count + 1;
 	return -EFAULT;
 }
+EXPORT_SYMBOL(strnlen_user);
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index d175d05..7f3d4d86 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -11,7 +12,6 @@
 #include "mem_user.h"
 #include "os.h"
 #include "skas.h"
-#include "tlb.h"
 
 struct host_vm_change {
 	struct host_vm_op {
@@ -287,7 +287,7 @@
 	}
 }
 
-int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
+static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm;
 	pgd_t *pgd;
@@ -499,6 +499,7 @@
 		flush_tlb_kernel_range_common(start, end);
 	else fix_range(vma->vm_mm, start, end, 0);
 }
+EXPORT_SYMBOL(flush_tlb_range);
 
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 			unsigned long end)
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 8c7b882..dafc947 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -6,6 +6,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/hardirq.h>
+#include <linux/module.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -14,7 +15,6 @@
 #include "kern_util.h"
 #include "os.h"
 #include "skas.h"
-#include "sysdep/sigcontext.h"
 
 /*
  * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
@@ -112,6 +112,7 @@
 	pagefault_out_of_memory();
 	return 0;
 }
+EXPORT_SYMBOL(handle_page_fault);
 
 static void show_segv_info(struct uml_pt_regs *regs)
 {
diff --git a/arch/um/kernel/uaccess.c b/arch/um/kernel/uaccess.c
deleted file mode 100644
index dd33f04..0000000
--- a/arch/um/kernel/uaccess.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk)
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-/*
- * These are here rather than tt/uaccess.c because skas mode needs them in
- * order to do SIGBUS recovery when a tmpfs mount runs out of room.
- */
-
-#include <linux/string.h>
-#include "os.h"
-
-static void __do_copy(void *to, const void *from, int n)
-{
-	memcpy(to, from, n);
-}
-
-
-int __do_copy_to_user(void *to, const void *from, int n,
-		      void **fault_addr, jmp_buf **fault_catcher)
-{
-	unsigned long fault;
-	int faulted;
-
-	fault = __do_user_copy(to, from, n, fault_addr, fault_catcher,
-			       __do_copy, &faulted);
-	if (!faulted)
-		return 0;
-	else
-		return n - (fault - (unsigned long) to);
-}
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 8d84250..ba00eae 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -102,6 +102,8 @@
 
 /* Set in linux_main */
 unsigned long uml_physmem;
+EXPORT_SYMBOL(uml_physmem);
+
 unsigned long uml_reserved; /* Also modified in mem_init */
 unsigned long start_vm;
 unsigned long end_vm;
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index b33f4df..dd76410 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -4,14 +4,14 @@
 #
 
 obj-y = aio.o execvp.o file.o helper.o irq.o main.o mem.o process.o \
-	registers.o sigio.o signal.o start_up.o time.o tty.o uaccess.o \
-	umid.o tls.o user_syms.o util.o drivers/ sys-$(SUBARCH)/ skas/
+	registers.o sigio.o signal.o start_up.o time.o tty.o \
+	umid.o user_syms.o util.o drivers/ skas/
 
 obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o
 
 USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \
 	main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \
-	tty.o tls.o uaccess.o umid.o util.o
+	tty.o umid.o util.o
 
 CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH)
 
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
index 57e3d46..c5d039e 100644
--- a/arch/um/os-Linux/aio.c
+++ b/arch/um/os-Linux/aio.c
@@ -11,10 +11,8 @@
 #include <asm/unistd.h>
 #include "aio.h"
 #include "init.h"
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
-#include "user.h"
 
 struct aio_thread_req {
 	enum aio_type type;
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
index cc72cb2..db3d648 100644
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ b/arch/um/os-Linux/drivers/ethertap_user.c
@@ -13,11 +13,9 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include "etap.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "net_user.h"
 #include "um_malloc.h"
-#include "user.h"
 
 #define MAX_PACKET ETH_MAX_PACKET
 
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
index 2448be0..a2aacff 100644
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ b/arch/um/os-Linux/drivers/tuntap_user.c
@@ -13,11 +13,9 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include <sys/uio.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
 #include "tuntap.h"
-#include "user.h"
 
 static int tuntap_user_init(void *data, void *dev)
 {
diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c
index 9533237..d895271 100644
--- a/arch/um/os-Linux/elf_aux.c
+++ b/arch/um/os-Linux/elf_aux.c
@@ -12,7 +12,6 @@
 #include "init.h"
 #include "elf_user.h"
 #include "mem_user.h"
-#include <kern_constants.h>
 
 typedef Elf32_auxv_t elf_auxv_t;
 
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 140e587..b049a63 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -13,9 +13,7 @@
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/un.h>
-#include "kern_constants.h"
 #include "os.h"
-#include "user.h"
 
 static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
 {
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index feff22d..cf26c4a 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -10,11 +10,9 @@
 #include <linux/limits.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 struct helper_data {
 	void (*pre_exec)(void*);
diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h
new file mode 100644
index 0000000..2c3c3ec
--- /dev/null
+++ b/arch/um/os-Linux/internal.h
@@ -0,0 +1 @@
+void alarm_handler(int, mcontext_t *);
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index 0348b97..9a49908 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -9,11 +9,8 @@
 #include <signal.h>
 #include <string.h>
 #include "irq_user.h"
-#include "kern_constants.h"
 #include "os.h"
-#include "process.h"
 #include "um_malloc.h"
-#include "user.h"
 
 /*
  * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 8471b81..7a86dd5 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -12,7 +12,6 @@
 #include <sys/resource.h>
 #include "as-layout.h"
 #include "init.h"
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
 #include "um_malloc.h"
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 62878cf..8e421e1 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -14,9 +14,7 @@
 #include <sys/mman.h>
 #include <sys/param.h>
 #include "init.h"
-#include "kern_constants.h"
 #include "os.h"
-#include "user.h"
 
 /* Modified by which_tmpdir, which is called during early boot */
 static char *default_tmpdir = "/tmp";
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 0c45dc8..307f173 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -13,12 +13,9 @@
 #include <sys/wait.h>
 #include <asm/unistd.h>
 #include "init.h"
-#include "kern_constants.h"
 #include "longjmp.h"
 #include "os.h"
-#include "process.h"
 #include "skas_ptrace.h"
-#include "user.h"
 
 #define ARBITRARY_ADDR -1
 #define FAILURE_PID    -1
@@ -237,21 +234,13 @@
 
 void init_new_thread_signals(void)
 {
-	set_handler(SIGSEGV, (__sighandler_t) sig_handler, SA_ONSTACK,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-	set_handler(SIGTRAP, (__sighandler_t) sig_handler, SA_ONSTACK,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-	set_handler(SIGFPE, (__sighandler_t) sig_handler, SA_ONSTACK,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-	set_handler(SIGILL, (__sighandler_t) sig_handler, SA_ONSTACK,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-	set_handler(SIGBUS, (__sighandler_t) sig_handler, SA_ONSTACK,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
+	set_handler(SIGSEGV);
+	set_handler(SIGTRAP);
+	set_handler(SIGFPE);
+	set_handler(SIGILL);
+	set_handler(SIGBUS);
 	signal(SIGHUP, SIG_IGN);
-
-	set_handler(SIGIO, (__sighandler_t) sig_handler,
-		    SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM,
-		    SIGVTALRM, -1);
+	set_handler(SIGIO);
 	signal(SIGWINCH, SIG_IGN);
 	signal(SIGTERM, SIG_DFL);
 }
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 63d299d..3c16121 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -11,14 +11,11 @@
 #include <sched.h>
 #include <signal.h>
 #include <string.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "init.h"
 #include "os.h"
-#include "process.h"
 #include "sigio.h"
 #include "um_malloc.h"
-#include "user.h"
 
 /*
  * Protected by sigio_lock(), also used by sigio_cleanup, which is an
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 6ae1807..2d22f1f 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -12,13 +12,7 @@
 #include "as-layout.h"
 #include "kern_util.h"
 #include "os.h"
-#include "process.h"
-#include "sysdep/barrier.h"
-#include "sysdep/sigcontext.h"
-#include "user.h"
-
-/* Copied from linux/compiler-gcc.h since we can't include it directly */
-#define barrier() __asm__ __volatile__("": : :"memory")
+#include "sysdep/mcontext.h"
 
 void (*sig_info[NSIG])(int, struct uml_pt_regs *) = {
 	[SIGTRAP]	= relay_signal,
@@ -30,7 +24,7 @@
 	[SIGIO]		= sigio_handler,
 	[SIGVTALRM]	= timer_handler };
 
-static void sig_handler_common(int sig, struct sigcontext *sc)
+static void sig_handler_common(int sig, mcontext_t *mc)
 {
 	struct uml_pt_regs r;
 	int save_errno = errno;
@@ -38,8 +32,8 @@
 	r.is_user = 0;
 	if (sig == SIGSEGV) {
 		/* For segfaults, we want the data from the sigcontext. */
-		copy_sc(&r, sc);
-		GET_FAULTINFO_FROM_SC(r.faultinfo, sc);
+		get_regs_from_mc(&r, mc);
+		GET_FAULTINFO_FROM_MC(r.faultinfo, mc);
 	}
 
 	/* enable signals if sig isn't IRQ signal */
@@ -66,7 +60,7 @@
 static int signals_enabled;
 static unsigned int signals_pending;
 
-void sig_handler(int sig, struct sigcontext *sc)
+void sig_handler(int sig, mcontext_t *mc)
 {
 	int enabled;
 
@@ -78,23 +72,23 @@
 
 	block_signals();
 
-	sig_handler_common(sig, sc);
+	sig_handler_common(sig, mc);
 
 	set_signals(enabled);
 }
 
-static void real_alarm_handler(struct sigcontext *sc)
+static void real_alarm_handler(mcontext_t *mc)
 {
 	struct uml_pt_regs regs;
 
-	if (sc != NULL)
-		copy_sc(&regs, sc);
+	if (mc != NULL)
+		get_regs_from_mc(&regs, mc);
 	regs.is_user = 0;
 	unblock_signals();
 	timer_handler(SIGVTALRM, &regs);
 }
 
-void alarm_handler(int sig, struct sigcontext *sc)
+void alarm_handler(int sig, mcontext_t *mc)
 {
 	int enabled;
 
@@ -106,14 +100,13 @@
 
 	block_signals();
 
-	real_alarm_handler(sc);
+	real_alarm_handler(mc);
 	set_signals(enabled);
 }
 
 void timer_init(void)
 {
-	set_handler(SIGVTALRM, (__sighandler_t) alarm_handler,
-		    SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, -1);
+	set_handler(SIGVTALRM);
 }
 
 void set_sigstack(void *sig_stack, int size)
@@ -126,10 +119,23 @@
 		panic("enabling signal stack failed, errno = %d\n", errno);
 }
 
-static void (*handlers[_NSIG])(int sig, struct sigcontext *sc);
+static void (*handlers[_NSIG])(int sig, mcontext_t *mc) = {
+	[SIGSEGV] = sig_handler,
+	[SIGBUS] = sig_handler,
+	[SIGILL] = sig_handler,
+	[SIGFPE] = sig_handler,
+	[SIGTRAP] = sig_handler,
 
-void handle_signal(int sig, struct sigcontext *sc)
+	[SIGIO] = sig_handler,
+	[SIGWINCH] = sig_handler,
+	[SIGVTALRM] = alarm_handler
+};
+
+
+static void hard_handler(int sig, siginfo_t *info, void *p)
 {
+	struct ucontext *uc = p;
+	mcontext_t *mc = &uc->uc_mcontext;
 	unsigned long pending = 1UL << sig;
 
 	do {
@@ -155,7 +161,7 @@
 		while ((sig = ffs(pending)) != 0){
 			sig--;
 			pending &= ~(1 << sig);
-			(*handlers[sig])(sig, sc);
+			(*handlers[sig])(sig, mc);
 		}
 
 		/*
@@ -169,28 +175,26 @@
 	} while (pending);
 }
 
-extern void hard_handler(int sig);
-
-void set_handler(int sig, void (*handler)(int), int flags, ...)
+void set_handler(int sig)
 {
 	struct sigaction action;
-	va_list ap;
+	int flags = SA_SIGINFO | SA_ONSTACK;
 	sigset_t sig_mask;
-	int mask;
 
-	handlers[sig] = (void (*)(int, struct sigcontext *)) handler;
-	action.sa_handler = hard_handler;
+	action.sa_sigaction = hard_handler;
 
+	/* block irq ones */
 	sigemptyset(&action.sa_mask);
-
-	va_start(ap, flags);
-	while ((mask = va_arg(ap, int)) != -1)
-		sigaddset(&action.sa_mask, mask);
-	va_end(ap);
+	sigaddset(&action.sa_mask, SIGVTALRM);
+	sigaddset(&action.sa_mask, SIGIO);
+	sigaddset(&action.sa_mask, SIGWINCH);
 
 	if (sig == SIGSEGV)
 		flags |= SA_NODEFER;
 
+	if (sigismember(&action.sa_mask, sig))
+		flags |= SA_RESTART; /* if it's an irq signal */
+
 	action.sa_flags = flags;
 	action.sa_restorer = NULL;
 	if (sigaction(sig, &action, NULL) < 0)
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index e771398..c0afff7 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -9,7 +9,6 @@
 #include <string.h>
 #include <sys/mman.h>
 #include "init.h"
-#include "kern_constants.h"
 #include "as-layout.h"
 #include "mm_id.h"
 #include "os.h"
@@ -17,7 +16,6 @@
 #include "ptrace_user.h"
 #include "registers.h"
 #include "skas.h"
-#include "user.h"
 #include "sysdep/ptrace.h"
 #include "sysdep/stub.h"
 
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index dee0e8c..cd65727 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -9,31 +9,23 @@
 #include <errno.h>
 #include <string.h>
 #include <sys/mman.h>
-#include <sys/ptrace.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
 #include "as-layout.h"
-#include "chan_user.h"
-#include "kern_constants.h"
+#include "init.h"
 #include "kern_util.h"
 #include "mem.h"
 #include "os.h"
-#include "process.h"
 #include "proc_mm.h"
 #include "ptrace_user.h"
 #include "registers.h"
 #include "skas.h"
 #include "skas_ptrace.h"
-#include "user.h"
 #include "sysdep/stub.h"
 
 int is_skas_winch(int pid, int fd, void *data)
 {
-	if (pid != getpgrp())
-		return 0;
-
-	register_winch_irq(-1, fd, -1, data, 0);
-	return 1;
+	return pid == getpgrp();
 }
 
 static int ptrace_dump_regs(int pid)
@@ -169,7 +161,7 @@
 
 	if (!local_using_sysemu)
 	{
-		err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
+		err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
 			     __NR_getpid);
 		if (err < 0) {
 			printk(UM_KERN_ERR "handle_trap - nullifying syscall "
@@ -257,8 +249,8 @@
 
 		set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE);
 		sigemptyset(&sa.sa_mask);
-		sa.sa_flags = SA_ONSTACK | SA_NODEFER;
-		sa.sa_handler = (void *) v;
+		sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+		sa.sa_sigaction = (void *) v;
 		sa.sa_restorer = NULL;
 		if (sigaction(SIGSEGV, &sa, NULL) < 0) {
 			printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV "
@@ -661,8 +653,7 @@
 {
 	int n;
 
-	set_handler(SIGWINCH, (__sighandler_t) sig_handler,
-		    SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGVTALRM, -1);
+	set_handler(SIGWINCH);
 
 	/*
 	 * Can't use UML_SETJMP or UML_LONGJMP here because they save
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 02ee9ad..425162e 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -13,12 +13,10 @@
 #include <signal.h>
 #include <string.h>
 #include <sys/mman.h>
-#include <sys/ptrace.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
 #include "init.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "mem_user.h"
 #include "ptrace_user.h"
@@ -225,7 +223,7 @@
 		goto fail;
 	}
 
-	n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
+	n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
 	if (n < 0) {
 		non_fatal("check_sysemu : failed to modify system call "
 			  "return");
@@ -261,7 +259,7 @@
 					  "doesn't singlestep");
 				goto fail;
 			}
-			n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET,
+			n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET,
 				   os_getpid());
 			if (n < 0)
 				fatal_perror("check_sysemu : failed to modify "
@@ -317,10 +315,10 @@
 			fatal("check_ptrace : expected (SIGTRAP|0x80), "
 			       "got status = %d", status);
 
-		syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET,
+		syscall = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET,
 				 0);
 		if (syscall == __NR_getpid) {
-			n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
+			n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
 				   __NR_getppid);
 			if (n < 0)
 				fatal_perror("check_ptrace : failed to modify "
diff --git a/arch/um/os-Linux/sys-i386/signal.c b/arch/um/os-Linux/sys-i386/signal.c
deleted file mode 100644
index f311609..0000000
--- a/arch/um/os-Linux/sys-i386/signal.c
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * Copyright (C) 2006 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <signal.h>
-
-extern void handle_signal(int sig, struct sigcontext *sc);
-
-void hard_handler(int sig)
-{
-	handle_signal(sig, (struct sigcontext *) (&sig + 1));
-}
diff --git a/arch/um/os-Linux/sys-i386/tls.c b/arch/um/os-Linux/sys-i386/tls.c
deleted file mode 100644
index 32ed41e..0000000
--- a/arch/um/os-Linux/sys-i386/tls.c
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <errno.h>
-#include <linux/unistd.h>
-
-#include <sys/syscall.h>
-#include <unistd.h>
-
-#include "sysdep/tls.h"
-#include "user.h"
-
-/* Checks whether host supports TLS, and sets *tls_min according to the value
- * valid on the host.
- * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */
-void check_host_supports_tls(int *supports_tls, int *tls_min) {
-	/* Values for x86 and x86_64.*/
-	int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64};
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(val); i++) {
-		user_desc_t info;
-		info.entry_number = val[i];
-
-		if (syscall(__NR_get_thread_area, &info) == 0) {
-			*tls_min = val[i];
-			*supports_tls = 1;
-			return;
-		} else {
-			if (errno == EINVAL)
-				continue;
-			else if (errno == ENOSYS)
-				*supports_tls = 0;
-				return;
-		}
-	}
-
-	*supports_tls = 0;
-}
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
deleted file mode 100644
index a44a47f..0000000
--- a/arch/um/os-Linux/sys-x86_64/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-#
-# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
-# Licensed under the GPL
-#
-
-obj-y = registers.o prctl.o signal.o task_size.o
-
-USER_OBJS := $(obj-y)
-
-include arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c
deleted file mode 100644
index 594d97a..0000000
--- a/arch/um/os-Linux/sys-x86_64/registers.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2006 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <errno.h>
-#include <sys/ptrace.h>
-#define __FRAME_OFFSETS
-#include <asm/ptrace.h>
-#include "kern_constants.h"
-#include "longjmp.h"
-#include "user.h"
-
-int save_fp_registers(int pid, unsigned long *fp_regs)
-{
-	if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
-		return -errno;
-	return 0;
-}
-
-int restore_fp_registers(int pid, unsigned long *fp_regs)
-{
-	if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
-		return -errno;
-	return 0;
-}
-
-unsigned long get_thread_reg(int reg, jmp_buf *buf)
-{
-	switch (reg) {
-	case RIP:
-		return buf[0]->__rip;
-	case RSP:
-		return buf[0]->__rsp;
-	case RBP:
-		return buf[0]->__rbp;
-	default:
-		printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
-		       reg);
-		return 0;
-	}
-}
-
-int get_fp_registers(int pid, unsigned long *regs)
-{
-	return save_fp_registers(pid, regs);
-}
-
-int put_fp_registers(int pid, unsigned long *regs)
-{
-	return restore_fp_registers(pid, regs);
-}
diff --git a/arch/um/os-Linux/sys-x86_64/signal.c b/arch/um/os-Linux/sys-x86_64/signal.c
deleted file mode 100644
index 82a3888..0000000
--- a/arch/um/os-Linux/sys-x86_64/signal.c
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright (C) 2006 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <signal.h>
-
-extern void handle_signal(int sig, struct sigcontext *sc);
-
-void hard_handler(int sig)
-{
-	struct ucontext *uc;
-	asm("movq %%rdx, %0" : "=r" (uc));
-
-	handle_signal(sig, (struct sigcontext *) &uc->uc_mcontext);
-}
diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c
deleted file mode 100644
index 26a0dd1..0000000
--- a/arch/um/os-Linux/sys-x86_64/task_size.c
+++ /dev/null
@@ -1,5 +0,0 @@
-unsigned long os_get_top_address(unsigned long shift)
-{
-	/* The old value of CONFIG_TOP_ADDR */
-	return 0x7fc0000000;
-}
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 6e3359d..910499d 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -8,11 +8,9 @@
 #include <signal.h>
 #include <time.h>
 #include <sys/time.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
-#include "process.h"
-#include "user.h"
+#include "internal.h"
 
 int set_interval(void)
 {
diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c
deleted file mode 100644
index 73277801..0000000
--- a/arch/um/os-Linux/tls.c
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <errno.h>
-#include <sys/ptrace.h>
-#include "sysdep/tls.h"
-
-/* TLS support - we basically rely on the host's one.*/
-
-#ifndef PTRACE_GET_THREAD_AREA
-#define PTRACE_GET_THREAD_AREA 25
-#endif
-
-#ifndef PTRACE_SET_THREAD_AREA
-#define PTRACE_SET_THREAD_AREA 26
-#endif
-
-int os_set_thread_area(user_desc_t *info, int pid)
-{
-	int ret;
-
-	ret = ptrace(PTRACE_SET_THREAD_AREA, pid, info->entry_number,
-		     (unsigned long) info);
-	if (ret < 0)
-		ret = -errno;
-	return ret;
-}
-
-int os_get_thread_area(user_desc_t *info, int pid)
-{
-	int ret;
-
-	ret = ptrace(PTRACE_GET_THREAD_AREA, pid, info->entry_number,
-		     (unsigned long) info);
-	if (ret < 0)
-		ret = -errno;
-	return ret;
-}
diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c
index b09ff66..dd12b99 100644
--- a/arch/um/os-Linux/tty.c
+++ b/arch/um/os-Linux/tty.c
@@ -7,10 +7,8 @@
 #include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
-#include "user.h"
 
 struct grantpt_info {
 	int fd;
diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c
deleted file mode 100644
index 087ed74..0000000
--- a/arch/um/os-Linux/uaccess.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk)
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <stddef.h>
-#include "longjmp.h"
-
-unsigned long __do_user_copy(void *to, const void *from, int n,
-			     void **fault_addr, jmp_buf **fault_catcher,
-			     void (*op)(void *to, const void *from,
-					int n), int *faulted_out)
-{
-	unsigned long *faddrp = (unsigned long *) fault_addr, ret;
-
-	jmp_buf jbuf;
-	*fault_catcher = &jbuf;
-	if (UML_SETJMP(&jbuf) == 0) {
-		(*op)(to, from, n);
-		ret = 0;
-		*faulted_out = 0;
-	}
-	else {
-		ret = *faddrp;
-		*faulted_out = 1;
-	}
-	*fault_addr = NULL;
-	*fault_catcher = NULL;
-	return ret;
-}
-
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index a27defb..4832eb5 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -13,9 +13,7 @@
 #include <unistd.h>
 #include <sys/stat.h>
 #include "init.h"
-#include "kern_constants.h"
 #include "os.h"
-#include "user.h"
 
 #define UML_DIR "~/.uml/"
 
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 5803b18..9e3b43b 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -13,9 +13,7 @@
 #include <wait.h>
 #include <sys/mman.h>
 #include <sys/utsname.h>
-#include "kern_constants.h"
 #include "os.h"
-#include "user.h"
 
 void stack_protections(unsigned long address)
 {
diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules
index 61107b6..2eb2843 100644
--- a/arch/um/scripts/Makefile.rules
+++ b/arch/um/scripts/Makefile.rules
@@ -8,7 +8,7 @@
 USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
 
 $(USER_OBJS:.o=.%): \
-	c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) $(CFLAGS_$(basetarget).o)
+	c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o)
 $(USER_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \
 	-Dunix -D__unix__ -D__$(SUBARCH)__ $(CF)
 
@@ -25,8 +25,3 @@
 define unprofile
 	$(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1)))
 endef
-
-ifdef subarch-obj-y
-obj-y += subarch.o
-subarch-y = $(addprefix ../../$(HEADER_ARCH)/,$(subarch-obj-y))
-endif
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
deleted file mode 100644
index 3923cfb..0000000
--- a/arch/um/sys-i386/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
-#
-
-obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
-	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
-	sys_call_table.o tls.o atomic64_cx8_32.o mem.o
-
-obj-$(CONFIG_BINFMT_ELF) += elfcore.o
-
-subarch-obj-y = lib/string_32.o
-subarch-obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += lib/rwsem.o
-subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
-subarch-obj-$(CONFIG_MODULES) += kernel/module.o
-
-USER_OBJS := bugs.o ptrace_user.o fault.o
-
-USER_OBJS += user-offsets.s
-extra-y += user-offsets.s
-
-UNPROFILE_OBJS := stub_segv.o
-CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
-
-include arch/um/scripts/Makefile.rules
diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h
deleted file mode 100644
index 4230555..0000000
--- a/arch/um/sys-i386/asm/elf.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-#ifndef __UM_ELF_I386_H
-#define __UM_ELF_I386_H
-
-#include <asm/user.h>
-#include "skas.h"
-
-#define R_386_NONE	0
-#define R_386_32	1
-#define R_386_PC32	2
-#define R_386_GOT32	3
-#define R_386_PLT32	4
-#define R_386_COPY	5
-#define R_386_GLOB_DAT	6
-#define R_386_JMP_SLOT	7
-#define R_386_RELATIVE	8
-#define R_386_GOTOFF	9
-#define R_386_GOTPC	10
-#define R_386_NUM	11
-
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct user_i387_struct elf_fpregset_t;
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) \
-	(((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
-
-#define ELF_CLASS	ELFCLASS32
-#define ELF_DATA        ELFDATA2LSB
-#define ELF_ARCH        EM_386
-
-#define ELF_PLAT_INIT(regs, load_addr) do { \
-	PT_REGS_EBX(regs) = 0; \
-	PT_REGS_ECX(regs) = 0; \
-	PT_REGS_EDX(regs) = 0; \
-	PT_REGS_ESI(regs) = 0; \
-	PT_REGS_EDI(regs) = 0; \
-	PT_REGS_EBP(regs) = 0; \
-	PT_REGS_EAX(regs) = 0; \
-} while (0)
-
-#define ELF_EXEC_PAGESIZE 4096
-
-#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
-
-/* Shamelessly stolen from include/asm-i386/elf.h */
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs) do {	\
-	pr_reg[0] = PT_REGS_EBX(regs);		\
-	pr_reg[1] = PT_REGS_ECX(regs);		\
-	pr_reg[2] = PT_REGS_EDX(regs);		\
-	pr_reg[3] = PT_REGS_ESI(regs);		\
-	pr_reg[4] = PT_REGS_EDI(regs);		\
-	pr_reg[5] = PT_REGS_EBP(regs);		\
-	pr_reg[6] = PT_REGS_EAX(regs);		\
-	pr_reg[7] = PT_REGS_DS(regs);		\
-	pr_reg[8] = PT_REGS_ES(regs);		\
-	/* fake once used fs and gs selectors? */	\
-	pr_reg[9] = PT_REGS_DS(regs);		\
-	pr_reg[10] = PT_REGS_DS(regs);		\
-	pr_reg[11] = PT_REGS_SYSCALL_NR(regs);	\
-	pr_reg[12] = PT_REGS_IP(regs);		\
-	pr_reg[13] = PT_REGS_CS(regs);		\
-	pr_reg[14] = PT_REGS_EFLAGS(regs);	\
-	pr_reg[15] = PT_REGS_SP(regs);		\
-	pr_reg[16] = PT_REGS_SS(regs);		\
-} while (0);
-
-#define task_pt_regs(t) (&(t)->thread.regs)
-
-struct task_struct;
-
-extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
-
-#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
-
-extern long elf_aux_hwcap;
-#define ELF_HWCAP (elf_aux_hwcap)
-
-extern char * elf_aux_platform;
-#define ELF_PLATFORM (elf_aux_platform)
-
-#define SET_PERSONALITY(ex) do { } while (0)
-
-extern unsigned long vsyscall_ehdr;
-extern unsigned long vsyscall_end;
-extern unsigned long __kernel_vsyscall;
-
-#define VSYSCALL_BASE vsyscall_ehdr
-#define VSYSCALL_END vsyscall_end
-
-/*
- * This is the range that is readable by user mode, and things
- * acting like user mode such as get_user_pages.
- */
-#define FIXADDR_USER_START      VSYSCALL_BASE
-#define FIXADDR_USER_END        VSYSCALL_END
-
-#define __HAVE_ARCH_GATE_AREA 1
-
-/*
- * Architecture-neutral AT_ values in 0-17, leave some room
- * for more of them, start the x86-specific ones at 32.
- */
-#define AT_SYSINFO		32
-#define AT_SYSINFO_EHDR		33
-
-#define ARCH_DLINFO						\
-do {								\
-	if ( vsyscall_ehdr ) {					\
-		NEW_AUX_ENT(AT_SYSINFO,	__kernel_vsyscall);	\
-		NEW_AUX_ENT(AT_SYSINFO_EHDR, vsyscall_ehdr);	\
-	}							\
-} while (0)
-
-#endif
diff --git a/arch/um/sys-i386/asm/module.h b/arch/um/sys-i386/asm/module.h
deleted file mode 100644
index 5ead4a0..0000000
--- a/arch/um/sys-i386/asm/module.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __UM_MODULE_I386_H
-#define __UM_MODULE_I386_H
-
-/* UML is simple */
-struct mod_arch_specific
-{
-};
-
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
-#endif
diff --git a/arch/um/sys-i386/atomic64_cx8_32.S b/arch/um/sys-i386/atomic64_cx8_32.S
deleted file mode 100644
index 1e901d3..0000000
--- a/arch/um/sys-i386/atomic64_cx8_32.S
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * atomic64_t for 586+
- *
- * Copied from arch/x86/lib/atomic64_cx8_32.S
- *
- * Copyright © 2010  Luca Barbieri
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
-#include <asm/dwarf2.h>
-
-.macro SAVE reg
-	pushl_cfi %\reg
-	CFI_REL_OFFSET \reg, 0
-.endm
-
-.macro RESTORE reg
-	popl_cfi %\reg
-	CFI_RESTORE \reg
-.endm
-
-.macro read64 reg
-	movl %ebx, %eax
-	movl %ecx, %edx
-/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
-	LOCK_PREFIX
-	cmpxchg8b (\reg)
-.endm
-
-ENTRY(atomic64_read_cx8)
-	CFI_STARTPROC
-
-	read64 %ecx
-	ret
-	CFI_ENDPROC
-ENDPROC(atomic64_read_cx8)
-
-ENTRY(atomic64_set_cx8)
-	CFI_STARTPROC
-
-1:
-/* we don't need LOCK_PREFIX since aligned 64-bit writes
- * are atomic on 586 and newer */
-	cmpxchg8b (%esi)
-	jne 1b
-
-	ret
-	CFI_ENDPROC
-ENDPROC(atomic64_set_cx8)
-
-ENTRY(atomic64_xchg_cx8)
-	CFI_STARTPROC
-
-	movl %ebx, %eax
-	movl %ecx, %edx
-1:
-	LOCK_PREFIX
-	cmpxchg8b (%esi)
-	jne 1b
-
-	ret
-	CFI_ENDPROC
-ENDPROC(atomic64_xchg_cx8)
-
-.macro addsub_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
-	CFI_STARTPROC
-	SAVE ebp
-	SAVE ebx
-	SAVE esi
-	SAVE edi
-
-	movl %eax, %esi
-	movl %edx, %edi
-	movl %ecx, %ebp
-
-	read64 %ebp
-1:
-	movl %eax, %ebx
-	movl %edx, %ecx
-	\ins\()l %esi, %ebx
-	\insc\()l %edi, %ecx
-	LOCK_PREFIX
-	cmpxchg8b (%ebp)
-	jne 1b
-
-10:
-	movl %ebx, %eax
-	movl %ecx, %edx
-	RESTORE edi
-	RESTORE esi
-	RESTORE ebx
-	RESTORE ebp
-	ret
-	CFI_ENDPROC
-ENDPROC(atomic64_\func\()_return_cx8)
-.endm
-
-addsub_return add add adc
-addsub_return sub sub sbb
-
-.macro incdec_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
-	CFI_STARTPROC
-	SAVE ebx
-
-	read64 %esi
-1:
-	movl %eax, %ebx
-	movl %edx, %ecx
-	\ins\()l $1, %ebx
-	\insc\()l $0, %ecx
-	LOCK_PREFIX
-	cmpxchg8b (%esi)
-	jne 1b
-
-10:
-	movl %ebx, %eax
-	movl %ecx, %edx
-	RESTORE ebx
-	ret
-	CFI_ENDPROC
-ENDPROC(atomic64_\func\()_return_cx8)
-.endm
-
-incdec_return inc add adc
-incdec_return dec sub sbb
-
-ENTRY(atomic64_dec_if_positive_cx8)
-	CFI_STARTPROC
-	SAVE ebx
-
-	read64 %esi
-1:
-	movl %eax, %ebx
-	movl %edx, %ecx
-	subl $1, %ebx
-	sbb $0, %ecx
-	js 2f
-	LOCK_PREFIX
-	cmpxchg8b (%esi)
-	jne 1b
-
-2:
-	movl %ebx, %eax
-	movl %ecx, %edx
-	RESTORE ebx
-	ret
-	CFI_ENDPROC
-ENDPROC(atomic64_dec_if_positive_cx8)
-
-ENTRY(atomic64_add_unless_cx8)
-	CFI_STARTPROC
-	SAVE ebp
-	SAVE ebx
-/* these just push these two parameters on the stack */
-	SAVE edi
-	SAVE esi
-
-	movl %ecx, %ebp
-	movl %eax, %esi
-	movl %edx, %edi
-
-	read64 %ebp
-1:
-	cmpl %eax, 0(%esp)
-	je 4f
-2:
-	movl %eax, %ebx
-	movl %edx, %ecx
-	addl %esi, %ebx
-	adcl %edi, %ecx
-	LOCK_PREFIX
-	cmpxchg8b (%ebp)
-	jne 1b
-
-	movl $1, %eax
-3:
-	addl $8, %esp
-	CFI_ADJUST_CFA_OFFSET -8
-	RESTORE ebx
-	RESTORE ebp
-	ret
-4:
-	cmpl %edx, 4(%esp)
-	jne 2b
-	xorl %eax, %eax
-	jmp 3b
-	CFI_ENDPROC
-ENDPROC(atomic64_add_unless_cx8)
-
-ENTRY(atomic64_inc_not_zero_cx8)
-	CFI_STARTPROC
-	SAVE ebx
-
-	read64 %esi
-1:
-	testl %eax, %eax
-	je 4f
-2:
-	movl %eax, %ebx
-	movl %edx, %ecx
-	addl $1, %ebx
-	adcl $0, %ecx
-	LOCK_PREFIX
-	cmpxchg8b (%esi)
-	jne 1b
-
-	movl $1, %eax
-3:
-	RESTORE ebx
-	ret
-4:
-	testl %edx, %edx
-	jne 2b
-	jmp 3b
-	CFI_ENDPROC
-ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/um/sys-i386/bug.c b/arch/um/sys-i386/bug.c
deleted file mode 100644
index 8d4f273..0000000
--- a/arch/um/sys-i386/bug.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL V2
- */
-
-#include <linux/uaccess.h>
-#include <asm/errno.h>
-
-/* Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
- * that's not relevant in skas mode.
- */
-
-int is_valid_bugaddr(unsigned long eip)
-{
-	unsigned short ud2;
-
-	if (probe_kernel_address((unsigned short __user *)eip, ud2))
-		return 0;
-
-	return ud2 == 0x0b0f;
-}
diff --git a/arch/um/sys-i386/ksyms.c b/arch/um/sys-i386/ksyms.c
deleted file mode 100644
index bfbefd3..0000000
--- a/arch/um/sys-i386/ksyms.c
+++ /dev/null
@@ -1,5 +0,0 @@
-#include "linux/module.h"
-#include "asm/checksum.h"
-
-/* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial);
diff --git a/arch/um/sys-i386/shared/sysdep/barrier.h b/arch/um/sys-i386/shared/sysdep/barrier.h
deleted file mode 100644
index b58d52c..0000000
--- a/arch/um/sys-i386/shared/sysdep/barrier.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __SYSDEP_I386_BARRIER_H
-#define __SYSDEP_I386_BARRIER_H
-
-/* Copied from include/asm-i386 for use by userspace.  i386 has the option
- * of using mfence, but I'm just using this, which works everywhere, for now.
- */
-#define mb() asm volatile("lock; addl $0,0(%esp)")
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/host_ldt.h b/arch/um/sys-i386/shared/sysdep/host_ldt.h
deleted file mode 100644
index 0953cc4..0000000
--- a/arch/um/sys-i386/shared/sysdep/host_ldt.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __ASM_HOST_LDT_I386_H
-#define __ASM_HOST_LDT_I386_H
-
-#include <asm/ldt.h>
-
-/*
- * macros stolen from include/asm-i386/desc.h
- */
-#define LDT_entry_a(info) \
-	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-#define LDT_entry_b(info) \
-	(((info)->base_addr & 0xff000000) | \
-	(((info)->base_addr & 0x00ff0000) >> 16) | \
-	((info)->limit & 0xf0000) | \
-	(((info)->read_exec_only ^ 1) << 9) | \
-	((info)->contents << 10) | \
-	(((info)->seg_not_present ^ 1) << 15) | \
-	((info)->seg_32bit << 22) | \
-	((info)->limit_in_pages << 23) | \
-	((info)->useable << 20) | \
-	0x7000)
-
-#define LDT_empty(info) (\
-	(info)->base_addr	== 0	&& \
-	(info)->limit		== 0	&& \
-	(info)->contents	== 0	&& \
-	(info)->read_exec_only	== 1	&& \
-	(info)->seg_32bit	== 0	&& \
-	(info)->limit_in_pages	== 0	&& \
-	(info)->seg_not_present	== 1	&& \
-	(info)->useable		== 0	)
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/ptrace.h b/arch/um/sys-i386/shared/sysdep/ptrace.h
deleted file mode 100644
index c398a50..0000000
--- a/arch/um/sys-i386/shared/sysdep/ptrace.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_I386_PTRACE_H
-#define __SYSDEP_I386_PTRACE_H
-
-#include "user_constants.h"
-#include "sysdep/faultinfo.h"
-
-#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long))
-#define MAX_REG_OFFSET (UM_FRAME_SIZE)
-
-static inline void update_debugregs(int seq) {}
-
-/* syscall emulation path in ptrace */
-
-#ifndef PTRACE_SYSEMU
-#define PTRACE_SYSEMU 31
-#endif
-
-void set_using_sysemu(int value);
-int get_using_sysemu(void);
-extern int sysemu_supported;
-
-#include "skas_ptregs.h"
-
-#define REGS_IP(r) ((r)[HOST_IP])
-#define REGS_SP(r) ((r)[HOST_SP])
-#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
-#define REGS_EAX(r) ((r)[HOST_EAX])
-#define REGS_EBX(r) ((r)[HOST_EBX])
-#define REGS_ECX(r) ((r)[HOST_ECX])
-#define REGS_EDX(r) ((r)[HOST_EDX])
-#define REGS_ESI(r) ((r)[HOST_ESI])
-#define REGS_EDI(r) ((r)[HOST_EDI])
-#define REGS_EBP(r) ((r)[HOST_EBP])
-#define REGS_CS(r) ((r)[HOST_CS])
-#define REGS_SS(r) ((r)[HOST_SS])
-#define REGS_DS(r) ((r)[HOST_DS])
-#define REGS_ES(r) ((r)[HOST_ES])
-#define REGS_FS(r) ((r)[HOST_FS])
-#define REGS_GS(r) ((r)[HOST_GS])
-
-#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res)
-
-#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
-
-#ifndef PTRACE_SYSEMU_SINGLESTEP
-#define PTRACE_SYSEMU_SINGLESTEP 32
-#endif
-
-struct uml_pt_regs {
-	unsigned long gp[MAX_REG_NR];
-	unsigned long fp[HOST_FPX_SIZE];
-	struct faultinfo faultinfo;
-	long syscall;
-	int is_user;
-};
-
-#define EMPTY_UML_PT_REGS { }
-
-#define UPT_IP(r) REGS_IP((r)->gp)
-#define UPT_SP(r) REGS_SP((r)->gp)
-#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
-#define UPT_EAX(r) REGS_EAX((r)->gp)
-#define UPT_EBX(r) REGS_EBX((r)->gp)
-#define UPT_ECX(r) REGS_ECX((r)->gp)
-#define UPT_EDX(r) REGS_EDX((r)->gp)
-#define UPT_ESI(r) REGS_ESI((r)->gp)
-#define UPT_EDI(r) REGS_EDI((r)->gp)
-#define UPT_EBP(r) REGS_EBP((r)->gp)
-#define UPT_ORIG_EAX(r) ((r)->syscall)
-#define UPT_CS(r) REGS_CS((r)->gp)
-#define UPT_SS(r) REGS_SS((r)->gp)
-#define UPT_DS(r) REGS_DS((r)->gp)
-#define UPT_ES(r) REGS_ES((r)->gp)
-#define UPT_FS(r) REGS_FS((r)->gp)
-#define UPT_GS(r) REGS_GS((r)->gp)
-
-#define UPT_SYSCALL_ARG1(r) UPT_EBX(r)
-#define UPT_SYSCALL_ARG2(r) UPT_ECX(r)
-#define UPT_SYSCALL_ARG3(r) UPT_EDX(r)
-#define UPT_SYSCALL_ARG4(r) UPT_ESI(r)
-#define UPT_SYSCALL_ARG5(r) UPT_EDI(r)
-#define UPT_SYSCALL_ARG6(r) UPT_EBP(r)
-
-extern int user_context(unsigned long sp);
-
-#define UPT_IS_USER(r) ((r)->is_user)
-
-struct syscall_args {
-	unsigned long args[6];
-};
-
-#define SYSCALL_ARGS(r) ((struct syscall_args) \
-			 { .args = { UPT_SYSCALL_ARG1(r),	\
-				     UPT_SYSCALL_ARG2(r),	\
-				     UPT_SYSCALL_ARG3(r),	\
-				     UPT_SYSCALL_ARG4(r),	\
-				     UPT_SYSCALL_ARG5(r),	\
-				     UPT_SYSCALL_ARG6(r) } } )
-
-#define UPT_REG(regs, reg) \
-	({	unsigned long val; \
-		switch(reg){ \
-		case EIP: val = UPT_IP(regs); break; \
-		case UESP: val = UPT_SP(regs); break; \
-		case EAX: val = UPT_EAX(regs); break; \
-		case EBX: val = UPT_EBX(regs); break; \
-		case ECX: val = UPT_ECX(regs); break; \
-		case EDX: val = UPT_EDX(regs); break; \
-		case ESI: val = UPT_ESI(regs); break; \
-		case EDI: val = UPT_EDI(regs); break; \
-		case EBP: val = UPT_EBP(regs); break; \
-		case ORIG_EAX: val = UPT_ORIG_EAX(regs); break; \
-		case CS: val = UPT_CS(regs); break; \
-		case SS: val = UPT_SS(regs); break; \
-		case DS: val = UPT_DS(regs); break; \
-		case ES: val = UPT_ES(regs); break; \
-		case FS: val = UPT_FS(regs); break; \
-		case GS: val = UPT_GS(regs); break; \
-		case EFL: val = UPT_EFLAGS(regs); break; \
-		default :  \
-			panic("Bad register in UPT_REG : %d\n", reg);  \
-			val = -1; \
-		} \
-	        val; \
-	})
-
-#define UPT_SET(regs, reg, val) \
-	do { \
-		switch(reg){ \
-		case EIP: UPT_IP(regs) = val; break; \
-		case UESP: UPT_SP(regs) = val; break; \
-		case EAX: UPT_EAX(regs) = val; break; \
-		case EBX: UPT_EBX(regs) = val; break; \
-		case ECX: UPT_ECX(regs) = val; break; \
-		case EDX: UPT_EDX(regs) = val; break; \
-		case ESI: UPT_ESI(regs) = val; break; \
-		case EDI: UPT_EDI(regs) = val; break; \
-		case EBP: UPT_EBP(regs) = val; break; \
-		case ORIG_EAX: UPT_ORIG_EAX(regs) = val; break; \
-		case CS: UPT_CS(regs) = val; break; \
-		case SS: UPT_SS(regs) = val; break; \
-		case DS: UPT_DS(regs) = val; break; \
-		case ES: UPT_ES(regs) = val; break; \
-		case FS: UPT_FS(regs) = val; break; \
-		case GS: UPT_GS(regs) = val; break; \
-		case EFL: UPT_EFLAGS(regs) = val; break; \
-		default :  \
-			panic("Bad register in UPT_SET : %d\n", reg);  \
-			break; \
-		} \
-	} while (0)
-
-#define UPT_SET_SYSCALL_RETURN(r, res) \
-	REGS_SET_SYSCALL_RETURN((r)->regs, (res))
-
-#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
-
-#define UPT_ORIG_SYSCALL(r) UPT_EAX(r)
-#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r)
-#define UPT_SYSCALL_RET(r) UPT_EAX(r)
-
-#define UPT_FAULTINFO(r) (&(r)->faultinfo)
-
-extern void arch_init_registers(int pid);
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/ptrace_user.h b/arch/um/sys-i386/shared/sysdep/ptrace_user.h
deleted file mode 100644
index ef56247..0000000
--- a/arch/um/sys-i386/shared/sysdep/ptrace_user.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_I386_PTRACE_USER_H__
-#define __SYSDEP_I386_PTRACE_USER_H__
-
-#include <sys/ptrace.h>
-#include <linux/ptrace.h>
-#include <asm/ptrace.h>
-#include "user_constants.h"
-
-#define PT_OFFSET(r) ((r) * sizeof(long))
-
-#define PT_SYSCALL_NR(regs) ((regs)[ORIG_EAX])
-#define PT_SYSCALL_NR_OFFSET PT_OFFSET(ORIG_EAX)
-
-#define PT_SYSCALL_ARG1_OFFSET PT_OFFSET(EBX)
-#define PT_SYSCALL_ARG2_OFFSET PT_OFFSET(ECX)
-#define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX)
-#define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI)
-#define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI)
-#define PT_SYSCALL_ARG6_OFFSET PT_OFFSET(EBP)
-
-#define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX)
-
-#define REGS_SYSCALL_NR EAX /* This is used before a system call */
-#define REGS_SYSCALL_ARG1 EBX
-#define REGS_SYSCALL_ARG2 ECX
-#define REGS_SYSCALL_ARG3 EDX
-#define REGS_SYSCALL_ARG4 ESI
-#define REGS_SYSCALL_ARG5 EDI
-#define REGS_SYSCALL_ARG6 EBP
-
-#define REGS_IP_INDEX EIP
-#define REGS_SP_INDEX UESP
-
-#define PT_IP_OFFSET PT_OFFSET(EIP)
-#define PT_IP(regs) ((regs)[EIP])
-#define PT_SP_OFFSET PT_OFFSET(UESP)
-#define PT_SP(regs) ((regs)[UESP])
-
-#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE)
-
-#ifndef FRAME_SIZE
-#define FRAME_SIZE (17)
-#endif
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/sc.h b/arch/um/sys-i386/shared/sysdep/sc.h
deleted file mode 100644
index c57d178..0000000
--- a/arch/um/sys-i386/shared/sysdep/sc.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef __SYSDEP_I386_SC_H
-#define __SYSDEP_I386_SC_H
-
-#include <user_constants.h>
-
-#define SC_OFFSET(sc, field) \
-	*((unsigned long *) &(((char *) (sc))[HOST_##field]))
-#define SC_FP_OFFSET(sc, field) \
-	*((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[HOST_##field]))
-#define SC_FP_OFFSET_PTR(sc, field, type) \
-	((type *) &(((char *) (SC_FPSTATE(sc)))[HOST_##field]))
-
-#define SC_IP(sc) SC_OFFSET(sc, SC_IP)
-#define SC_SP(sc) SC_OFFSET(sc, SC_SP)
-#define SC_FS(sc) SC_OFFSET(sc, SC_FS)
-#define SC_GS(sc) SC_OFFSET(sc, SC_GS)
-#define SC_DS(sc) SC_OFFSET(sc, SC_DS)
-#define SC_ES(sc) SC_OFFSET(sc, SC_ES)
-#define SC_SS(sc) SC_OFFSET(sc, SC_SS)
-#define SC_CS(sc) SC_OFFSET(sc, SC_CS)
-#define SC_EFLAGS(sc) SC_OFFSET(sc, SC_EFLAGS)
-#define SC_EAX(sc) SC_OFFSET(sc, SC_EAX)
-#define SC_EBX(sc) SC_OFFSET(sc, SC_EBX)
-#define SC_ECX(sc) SC_OFFSET(sc, SC_ECX)
-#define SC_EDX(sc) SC_OFFSET(sc, SC_EDX)
-#define SC_EDI(sc) SC_OFFSET(sc, SC_EDI)
-#define SC_ESI(sc) SC_OFFSET(sc, SC_ESI)
-#define SC_EBP(sc) SC_OFFSET(sc, SC_EBP)
-#define SC_TRAPNO(sc) SC_OFFSET(sc, SC_TRAPNO)
-#define SC_ERR(sc) SC_OFFSET(sc, SC_ERR)
-#define SC_CR2(sc) SC_OFFSET(sc, SC_CR2)
-#define SC_FPSTATE(sc) SC_OFFSET(sc, SC_FPSTATE)
-#define SC_SIGMASK(sc) SC_OFFSET(sc, SC_SIGMASK)
-#define SC_FP_CW(sc) SC_FP_OFFSET(sc, SC_FP_CW)
-#define SC_FP_SW(sc) SC_FP_OFFSET(sc, SC_FP_SW)
-#define SC_FP_TAG(sc) SC_FP_OFFSET(sc, SC_FP_TAG)
-#define SC_FP_IPOFF(sc) SC_FP_OFFSET(sc, SC_FP_IPOFF)
-#define SC_FP_CSSEL(sc) SC_FP_OFFSET(sc, SC_FP_CSSEL)
-#define SC_FP_DATAOFF(sc) SC_FP_OFFSET(sc, SC_FP_DATAOFF)
-#define SC_FP_DATASEL(sc) SC_FP_OFFSET(sc, SC_FP_DATASEL)
-#define SC_FP_ST(sc) SC_FP_OFFSET_PTR(sc, SC_FP_ST, struct _fpstate)
-#define SC_FXSR_ENV(sc) SC_FP_OFFSET_PTR(sc, SC_FXSR_ENV, void)
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/sigcontext.h b/arch/um/sys-i386/shared/sysdep/sigcontext.h
deleted file mode 100644
index f583c87..0000000
--- a/arch/um/sys-i386/shared/sysdep/sigcontext.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYS_SIGCONTEXT_I386_H
-#define __SYS_SIGCONTEXT_I386_H
-
-#include "sysdep/sc.h"
-
-#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
-
-#define GET_FAULTINFO_FROM_SC(fi, sc) \
-	{ \
-		(fi).cr2 = SC_CR2(sc); \
-		(fi).error_code = SC_ERR(sc); \
-		(fi).trap_no = SC_TRAPNO(sc); \
-	}
-
-/* This is Page Fault */
-#define SEGV_IS_FIXABLE(fi)	((fi)->trap_no == 14)
-
-/* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */
-#define SEGV_MAYBE_FIXABLE(fi)	((fi)->trap_no == 0 && ptrace_faultinfo)
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/vm-flags.h b/arch/um/sys-i386/shared/sysdep/vm-flags.h
deleted file mode 100644
index e0d24c5..0000000
--- a/arch/um/sys-i386/shared/sysdep/vm-flags.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#ifndef __VM_FLAGS_I386_H
-#define __VM_FLAGS_I386_H
-
-#define VM_DATA_DEFAULT_FLAGS \
-	(VM_READ | VM_WRITE | \
-	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
-		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#endif
diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c
deleted file mode 100644
index 28ccf73..0000000
--- a/arch/um/sys-i386/stub_segv.c
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include "sysdep/stub.h"
-#include "sysdep/sigcontext.h"
-
-void __attribute__ ((__section__ (".__syscall_stub")))
-stub_segv_handler(int sig)
-{
-	struct sigcontext *sc = (struct sigcontext *) (&sig + 1);
-
-	GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA), sc);
-
-	trap_myself();
-}
diff --git a/arch/um/sys-i386/user-offsets.c b/arch/um/sys-i386/user-offsets.c
deleted file mode 100644
index 5f883bf..0000000
--- a/arch/um/sys-i386/user-offsets.c
+++ /dev/null
@@ -1,53 +0,0 @@
-#include <stdio.h>
-#include <stddef.h>
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/user.h>
-#include <sys/mman.h>
-#include <asm/ptrace.h>
-
-#define DEFINE(sym, val) \
-	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define DEFINE_LONGS(sym, val) \
-	asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
-
-#define OFFSET(sym, str, mem) \
-	DEFINE(sym, offsetof(struct str, mem));
-
-void foo(void)
-{
-	OFFSET(HOST_SC_TRAPNO, sigcontext, trapno);
-	OFFSET(HOST_SC_ERR, sigcontext, err);
-	OFFSET(HOST_SC_CR2, sigcontext, cr2);
-
-	DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct));
-	DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct));
-
-	DEFINE(HOST_IP, EIP);
-	DEFINE(HOST_SP, UESP);
-	DEFINE(HOST_EFLAGS, EFL);
-	DEFINE(HOST_EAX, EAX);
-	DEFINE(HOST_EBX, EBX);
-	DEFINE(HOST_ECX, ECX);
-	DEFINE(HOST_EDX, EDX);
-	DEFINE(HOST_ESI, ESI);
-	DEFINE(HOST_EDI, EDI);
-	DEFINE(HOST_EBP, EBP);
-	DEFINE(HOST_CS, CS);
-	DEFINE(HOST_SS, SS);
-	DEFINE(HOST_DS, DS);
-	DEFINE(HOST_FS, FS);
-	DEFINE(HOST_ES, ES);
-	DEFINE(HOST_GS, GS);
-	DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
-
-	/* XXX Duplicated between i386 and x86_64 */
-	DEFINE(UM_POLLIN, POLLIN);
-	DEFINE(UM_POLLPRI, POLLPRI);
-	DEFINE(UM_POLLOUT, POLLOUT);
-
-	DEFINE(UM_PROT_READ, PROT_READ);
-	DEFINE(UM_PROT_WRITE, PROT_WRITE);
-	DEFINE(UM_PROT_EXEC, PROT_EXEC);
-}
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
deleted file mode 100644
index bd4d1d3..0000000
--- a/arch/um/sys-x86_64/Makefile
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Copyright 2003 PathScale, Inc.
-#
-# Licensed under the GPL
-#
-
-obj-y = bug.o bugs.o delay.o fault.o ldt.o ptrace.o ptrace_user.o mem.o \
-	setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \
-	sysrq.o ksyms.o tls.o
-
-obj-y += vdso/
-
-subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o \
-		lib/rwsem.o
-subarch-obj-$(CONFIG_MODULES) += kernel/module.o
-
-ldt-y = ../sys-i386/ldt.o
-
-USER_OBJS := ptrace_user.o
-
-USER_OBJS += user-offsets.s
-extra-y += user-offsets.s
-
-UNPROFILE_OBJS := stub_segv.o
-CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
-
-include arch/um/scripts/Makefile.rules
diff --git a/arch/um/sys-x86_64/asm/archparam.h b/arch/um/sys-x86_64/asm/archparam.h
deleted file mode 100644
index 6c08366..0000000
--- a/arch/um/sys-x86_64/asm/archparam.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_ARCHPARAM_X86_64_H
-#define __UM_ARCHPARAM_X86_64_H
-
-
-/* No user-accessible fixmap addresses, i.e. vsyscall */
-#define FIXADDR_USER_START	0
-#define FIXADDR_USER_END	0
-
-#endif
-
diff --git a/arch/um/sys-x86_64/asm/elf.h b/arch/um/sys-x86_64/asm/elf.h
deleted file mode 100644
index 11a2bfb..0000000
--- a/arch/um/sys-x86_64/asm/elf.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- *
- * Licensed under the GPL
- */
-#ifndef __UM_ELF_X86_64_H
-#define __UM_ELF_X86_64_H
-
-#include <asm/user.h>
-#include "skas.h"
-
-/* x86-64 relocation types, taken from asm-x86_64/elf.h */
-#define R_X86_64_NONE		0	/* No reloc */
-#define R_X86_64_64		1	/* Direct 64 bit  */
-#define R_X86_64_PC32		2	/* PC relative 32 bit signed */
-#define R_X86_64_GOT32		3	/* 32 bit GOT entry */
-#define R_X86_64_PLT32		4	/* 32 bit PLT address */
-#define R_X86_64_COPY		5	/* Copy symbol at runtime */
-#define R_X86_64_GLOB_DAT	6	/* Create GOT entry */
-#define R_X86_64_JUMP_SLOT	7	/* Create PLT entry */
-#define R_X86_64_RELATIVE	8	/* Adjust by program base */
-#define R_X86_64_GOTPCREL	9	/* 32 bit signed pc relative
-					   offset to GOT */
-#define R_X86_64_32		10	/* Direct 32 bit zero extended */
-#define R_X86_64_32S		11	/* Direct 32 bit sign extended */
-#define R_X86_64_16		12	/* Direct 16 bit zero extended */
-#define R_X86_64_PC16		13	/* 16 bit sign extended pc relative */
-#define R_X86_64_8		14	/* Direct 8 bit sign extended  */
-#define R_X86_64_PC8		15	/* 8 bit sign extended pc relative */
-
-#define R_X86_64_NUM		16
-
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct user_i387_struct elf_fpregset_t;
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) \
-	((x)->e_machine == EM_X86_64)
-
-#define ELF_CLASS	ELFCLASS64
-#define ELF_DATA        ELFDATA2LSB
-#define ELF_ARCH        EM_X86_64
-
-#define ELF_PLAT_INIT(regs, load_addr)    do { \
-	PT_REGS_RBX(regs) = 0; \
-	PT_REGS_RCX(regs) = 0; \
-	PT_REGS_RDX(regs) = 0; \
-	PT_REGS_RSI(regs) = 0; \
-	PT_REGS_RDI(regs) = 0; \
-	PT_REGS_RBP(regs) = 0; \
-	PT_REGS_RAX(regs) = 0; \
-	PT_REGS_R8(regs) = 0; \
-	PT_REGS_R9(regs) = 0; \
-	PT_REGS_R10(regs) = 0; \
-	PT_REGS_R11(regs) = 0; \
-	PT_REGS_R12(regs) = 0; \
-	PT_REGS_R13(regs) = 0; \
-	PT_REGS_R14(regs) = 0; \
-	PT_REGS_R15(regs) = 0; \
-} while (0)
-
-#define ELF_CORE_COPY_REGS(pr_reg, _regs)		\
-	(pr_reg)[0] = (_regs)->regs.gp[0];			\
-	(pr_reg)[1] = (_regs)->regs.gp[1];			\
-	(pr_reg)[2] = (_regs)->regs.gp[2];			\
-	(pr_reg)[3] = (_regs)->regs.gp[3];			\
-	(pr_reg)[4] = (_regs)->regs.gp[4];			\
-	(pr_reg)[5] = (_regs)->regs.gp[5];			\
-	(pr_reg)[6] = (_regs)->regs.gp[6];			\
-	(pr_reg)[7] = (_regs)->regs.gp[7];			\
-	(pr_reg)[8] = (_regs)->regs.gp[8];			\
-	(pr_reg)[9] = (_regs)->regs.gp[9];			\
-	(pr_reg)[10] = (_regs)->regs.gp[10];			\
-	(pr_reg)[11] = (_regs)->regs.gp[11];			\
-	(pr_reg)[12] = (_regs)->regs.gp[12];			\
-	(pr_reg)[13] = (_regs)->regs.gp[13];			\
-	(pr_reg)[14] = (_regs)->regs.gp[14];			\
-	(pr_reg)[15] = (_regs)->regs.gp[15];			\
-	(pr_reg)[16] = (_regs)->regs.gp[16];			\
-	(pr_reg)[17] = (_regs)->regs.gp[17];			\
-	(pr_reg)[18] = (_regs)->regs.gp[18];			\
-	(pr_reg)[19] = (_regs)->regs.gp[19];			\
-	(pr_reg)[20] = (_regs)->regs.gp[20];			\
-	(pr_reg)[21] = current->thread.arch.fs;			\
-	(pr_reg)[22] = 0;					\
-	(pr_reg)[23] = 0;					\
-	(pr_reg)[24] = 0;					\
-	(pr_reg)[25] = 0;					\
-	(pr_reg)[26] = 0;
-
-#define task_pt_regs(t) (&(t)->thread.regs)
-
-struct task_struct;
-
-extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
-
-#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
-
-#ifdef TIF_IA32 /* XXX */
-#error XXX, indeed
-        clear_thread_flag(TIF_IA32);
-#endif
-
-#define ELF_EXEC_PAGESIZE 4096
-
-#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
-
-extern long elf_aux_hwcap;
-#define ELF_HWCAP (elf_aux_hwcap)
-
-#define ELF_PLATFORM "x86_64"
-
-#define SET_PERSONALITY(ex) do ; while(0)
-
-#define __HAVE_ARCH_GATE_AREA 1
-#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
-struct linux_binprm;
-extern int arch_setup_additional_pages(struct linux_binprm *bprm,
-	int uses_interp);
-
-extern unsigned long um_vdso_addr;
-#define AT_SYSINFO_EHDR 33
-#define ARCH_DLINFO	NEW_AUX_ENT(AT_SYSINFO_EHDR, um_vdso_addr)
-
-#endif
diff --git a/arch/um/sys-x86_64/asm/module.h b/arch/um/sys-x86_64/asm/module.h
deleted file mode 100644
index 8eb79c2..0000000
--- a/arch/um/sys-x86_64/asm/module.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_MODULE_X86_64_H
-#define __UM_MODULE_X86_64_H
-
-/* UML is simple */
-struct mod_arch_specific
-{
-};
-
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-
-#endif
-
diff --git a/arch/um/sys-x86_64/delay.c b/arch/um/sys-x86_64/delay.c
deleted file mode 100644
index f3fe1a6..0000000
--- a/arch/um/sys-x86_64/delay.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
- * Mostly copied from arch/x86/lib/delay.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <asm/param.h>
-
-void __delay(unsigned long loops)
-{
-	asm volatile(
-		"test %0,%0\n"
-		"jz 3f\n"
-		"jmp 1f\n"
-
-		".align 16\n"
-		"1: jmp 2f\n"
-
-		".align 16\n"
-		"2: dec %0\n"
-		" jnz 2b\n"
-		"3: dec %0\n"
-
-		: /* we don't need output */
-		: "a" (loops)
-	);
-}
-EXPORT_SYMBOL(__delay);
-
-inline void __const_udelay(unsigned long xloops)
-{
-	int d0;
-
-	xloops *= 4;
-	asm("mull %%edx"
-		: "=d" (xloops), "=&a" (d0)
-		: "1" (xloops), "0"
-		(loops_per_jiffy * (HZ/4)));
-
-	__delay(++xloops);
-}
-EXPORT_SYMBOL(__const_udelay);
-
-void __udelay(unsigned long usecs)
-{
-	__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
-}
-EXPORT_SYMBOL(__udelay);
-
-void __ndelay(unsigned long nsecs)
-{
-	__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
-}
-EXPORT_SYMBOL(__ndelay);
diff --git a/arch/um/sys-x86_64/fault.c b/arch/um/sys-x86_64/fault.c
deleted file mode 100644
index ce85117..0000000
--- a/arch/um/sys-x86_64/fault.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#include "sysdep/ptrace.h"
-
-/* These two are from asm-um/uaccess.h and linux/module.h, check them. */
-struct exception_table_entry
-{
-	unsigned long insn;
-	unsigned long fixup;
-};
-
-const struct exception_table_entry *search_exception_tables(unsigned long add);
-
-int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
-{
-	const struct exception_table_entry *fixup;
-
-	fixup = search_exception_tables(address);
-	if (fixup != 0) {
-		UPT_IP(regs) = fixup->fixup;
-		return 1;
-	}
-	return 0;
-}
diff --git a/arch/um/sys-x86_64/ptrace_user.c b/arch/um/sys-x86_64/ptrace_user.c
deleted file mode 100644
index c57a496..0000000
--- a/arch/um/sys-x86_64/ptrace_user.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#include <errno.h>
-#include "ptrace_user.h"
-
-int ptrace_getregs(long pid, unsigned long *regs_out)
-{
-	if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0)
-		return -errno;
-	return(0);
-}
-
-int ptrace_setregs(long pid, unsigned long *regs_out)
-{
-	if (ptrace(PTRACE_SETREGS, pid, 0, regs_out) < 0)
-		return -errno;
-	return(0);
-}
diff --git a/arch/um/sys-x86_64/shared/sysdep/barrier.h b/arch/um/sys-x86_64/shared/sysdep/barrier.h
deleted file mode 100644
index 7b610be..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/barrier.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __SYSDEP_X86_64_BARRIER_H
-#define __SYSDEP_X86_64_BARRIER_H
-
-/* Copied from include/asm-x86_64 for use by userspace. */
-#define mb() 	asm volatile("mfence":::"memory")
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/host_ldt.h b/arch/um/sys-x86_64/shared/sysdep/host_ldt.h
deleted file mode 100644
index e8b1be1..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/host_ldt.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef __ASM_HOST_LDT_X86_64_H
-#define __ASM_HOST_LDT_X86_64_H
-
-#include <asm/ldt.h>
-
-/*
- * macros stolen from include/asm-x86_64/desc.h
- */
-#define LDT_entry_a(info) \
-	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-/* Don't allow setting of the lm bit. It is useless anyways because
- * 64bit system calls require __USER_CS. */
-#define LDT_entry_b(info) \
-	(((info)->base_addr & 0xff000000) | \
-	(((info)->base_addr & 0x00ff0000) >> 16) | \
-	((info)->limit & 0xf0000) | \
-	(((info)->read_exec_only ^ 1) << 9) | \
-	((info)->contents << 10) | \
-	(((info)->seg_not_present ^ 1) << 15) | \
-	((info)->seg_32bit << 22) | \
-	((info)->limit_in_pages << 23) | \
-	((info)->useable << 20) | \
-	/* ((info)->lm << 21) | */ \
-	0x7000)
-
-#define LDT_empty(info) (\
-	(info)->base_addr	== 0	&& \
-	(info)->limit		== 0	&& \
-	(info)->contents	== 0	&& \
-	(info)->read_exec_only	== 1	&& \
-	(info)->seg_32bit	== 0	&& \
-	(info)->limit_in_pages	== 0	&& \
-	(info)->seg_not_present	== 1	&& \
-	(info)->useable		== 0	&& \
-	(info)->lm              == 0)
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h b/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h
deleted file mode 100644
index a307237..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/time.h>
-#include <linux/elf.h>
-#include <linux/crypto.h>
-#include <asm/page.h>
-#include <asm/mman.h>
-
-#define DEFINE(sym, val) \
-	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define DEFINE_STR1(x) #x
-#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " DEFINE_STR1(val) " " #val: : )
-
-#define BLANK() asm volatile("\n->" : : )
-
-#define OFFSET(sym, str, mem) \
-	DEFINE(sym, offsetof(struct str, mem));
-
-void foo(void)
-{
-#include <common-offsets.h>
-}
diff --git a/arch/um/sys-x86_64/shared/sysdep/ptrace.h b/arch/um/sys-x86_64/shared/sysdep/ptrace.h
deleted file mode 100644
index 8ee8f8e..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/ptrace.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- *
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_X86_64_PTRACE_H
-#define __SYSDEP_X86_64_PTRACE_H
-
-#include "user_constants.h"
-#include "sysdep/faultinfo.h"
-
-#define MAX_REG_OFFSET (UM_FRAME_SIZE)
-#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
-
-#include "skas_ptregs.h"
-
-#define REGS_IP(r) ((r)[HOST_IP])
-#define REGS_SP(r) ((r)[HOST_SP])
-
-#define REGS_RBX(r) ((r)[HOST_RBX])
-#define REGS_RCX(r) ((r)[HOST_RCX])
-#define REGS_RDX(r) ((r)[HOST_RDX])
-#define REGS_RSI(r) ((r)[HOST_RSI])
-#define REGS_RDI(r) ((r)[HOST_RDI])
-#define REGS_RBP(r) ((r)[HOST_RBP])
-#define REGS_RAX(r) ((r)[HOST_RAX])
-#define REGS_R8(r) ((r)[HOST_R8])
-#define REGS_R9(r) ((r)[HOST_R9])
-#define REGS_R10(r) ((r)[HOST_R10])
-#define REGS_R11(r) ((r)[HOST_R11])
-#define REGS_R12(r) ((r)[HOST_R12])
-#define REGS_R13(r) ((r)[HOST_R13])
-#define REGS_R14(r) ((r)[HOST_R14])
-#define REGS_R15(r) ((r)[HOST_R15])
-#define REGS_CS(r) ((r)[HOST_CS])
-#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
-#define REGS_SS(r) ((r)[HOST_SS])
-
-#define HOST_FS_BASE 21
-#define HOST_GS_BASE 22
-#define HOST_DS 23
-#define HOST_ES 24
-#define HOST_FS 25
-#define HOST_GS 26
-
-/* Also defined in asm/ptrace-x86_64.h, but not in libc headers.  So, these
- * are already defined for kernel code, but not for userspace code.
- */
-#ifndef FS_BASE
-/* These aren't defined in ptrace.h, but exist in struct user_regs_struct,
- * which is what x86_64 ptrace actually uses.
- */
-#define FS_BASE (HOST_FS_BASE * sizeof(long))
-#define GS_BASE (HOST_GS_BASE * sizeof(long))
-#define DS (HOST_DS * sizeof(long))
-#define ES (HOST_ES * sizeof(long))
-#define FS (HOST_FS * sizeof(long))
-#define GS (HOST_GS * sizeof(long))
-#endif
-
-#define REGS_FS_BASE(r) ((r)[HOST_FS_BASE])
-#define REGS_GS_BASE(r) ((r)[HOST_GS_BASE])
-#define REGS_DS(r) ((r)[HOST_DS])
-#define REGS_ES(r) ((r)[HOST_ES])
-#define REGS_FS(r) ((r)[HOST_FS])
-#define REGS_GS(r) ((r)[HOST_GS])
-
-#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_RAX])
-
-#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res)
-
-#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
-
-#define REGS_SEGV_IS_FIXABLE(r) SEGV_IS_FIXABLE((r)->trap_type)
-
-#define REGS_FAULT_ADDR(r) ((r)->fault_addr)
-
-#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type)
-
-#define REGS_TRAP(r) ((r)->trap_type)
-
-#define REGS_ERR(r) ((r)->fault_type)
-
-struct uml_pt_regs {
-	unsigned long gp[MAX_REG_NR];
-	unsigned long fp[HOST_FP_SIZE];
-	struct faultinfo faultinfo;
-	long syscall;
-	int is_user;
-};
-
-#define EMPTY_UML_PT_REGS { }
-
-#define UPT_RBX(r) REGS_RBX((r)->gp)
-#define UPT_RCX(r) REGS_RCX((r)->gp)
-#define UPT_RDX(r) REGS_RDX((r)->gp)
-#define UPT_RSI(r) REGS_RSI((r)->gp)
-#define UPT_RDI(r) REGS_RDI((r)->gp)
-#define UPT_RBP(r) REGS_RBP((r)->gp)
-#define UPT_RAX(r) REGS_RAX((r)->gp)
-#define UPT_R8(r) REGS_R8((r)->gp)
-#define UPT_R9(r) REGS_R9((r)->gp)
-#define UPT_R10(r) REGS_R10((r)->gp)
-#define UPT_R11(r) REGS_R11((r)->gp)
-#define UPT_R12(r) REGS_R12((r)->gp)
-#define UPT_R13(r) REGS_R13((r)->gp)
-#define UPT_R14(r) REGS_R14((r)->gp)
-#define UPT_R15(r) REGS_R15((r)->gp)
-#define UPT_CS(r) REGS_CS((r)->gp)
-#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp)
-#define UPT_FS(r) REGS_FS((r)->gp)
-#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp)
-#define UPT_GS(r) REGS_GS((r)->gp)
-#define UPT_DS(r) REGS_DS((r)->gp)
-#define UPT_ES(r) REGS_ES((r)->gp)
-#define UPT_CS(r) REGS_CS((r)->gp)
-#define UPT_SS(r) REGS_SS((r)->gp)
-#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp)
-
-#define UPT_IP(r) REGS_IP((r)->gp)
-#define UPT_SP(r) REGS_SP((r)->gp)
-
-#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
-#define UPT_SYSCALL_NR(r) ((r)->syscall)
-#define UPT_SYSCALL_RET(r) UPT_RAX(r)
-
-extern int user_context(unsigned long sp);
-
-#define UPT_IS_USER(r) ((r)->is_user)
-
-#define UPT_SYSCALL_ARG1(r) UPT_RDI(r)
-#define UPT_SYSCALL_ARG2(r) UPT_RSI(r)
-#define UPT_SYSCALL_ARG3(r) UPT_RDX(r)
-#define UPT_SYSCALL_ARG4(r) UPT_R10(r)
-#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
-#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
-
-struct syscall_args {
-	unsigned long args[6];
-};
-
-#define SYSCALL_ARGS(r) ((struct syscall_args) \
-			 { .args = { UPT_SYSCALL_ARG1(r),	 \
-				     UPT_SYSCALL_ARG2(r),	 \
-				     UPT_SYSCALL_ARG3(r),	 \
-				     UPT_SYSCALL_ARG4(r),	 \
-				     UPT_SYSCALL_ARG5(r),	 \
-				     UPT_SYSCALL_ARG6(r) } } )
-
-#define UPT_REG(regs, reg) \
-	({      unsigned long val;		\
-		switch(reg){						\
-		case R8: val = UPT_R8(regs); break;			\
-		case R9: val = UPT_R9(regs); break;			\
-		case R10: val = UPT_R10(regs); break;			\
-		case R11: val = UPT_R11(regs); break;			\
-		case R12: val = UPT_R12(regs); break;			\
-		case R13: val = UPT_R13(regs); break;			\
-		case R14: val = UPT_R14(regs); break;			\
-		case R15: val = UPT_R15(regs); break;			\
-		case RIP: val = UPT_IP(regs); break;			\
-		case RSP: val = UPT_SP(regs); break;			\
-		case RAX: val = UPT_RAX(regs); break;			\
-		case RBX: val = UPT_RBX(regs); break;			\
-		case RCX: val = UPT_RCX(regs); break;			\
-		case RDX: val = UPT_RDX(regs); break;			\
-		case RSI: val = UPT_RSI(regs); break;			\
-		case RDI: val = UPT_RDI(regs); break;			\
-		case RBP: val = UPT_RBP(regs); break;			\
-		case ORIG_RAX: val = UPT_ORIG_RAX(regs); break;		\
-		case CS: val = UPT_CS(regs); break;			\
-		case SS: val = UPT_SS(regs); break;			\
-		case FS_BASE: val = UPT_FS_BASE(regs); break;		\
-		case GS_BASE: val = UPT_GS_BASE(regs); break;		\
-		case DS: val = UPT_DS(regs); break;			\
-		case ES: val = UPT_ES(regs); break;			\
-		case FS : val = UPT_FS (regs); break;			\
-		case GS: val = UPT_GS(regs); break;			\
-		case EFLAGS: val = UPT_EFLAGS(regs); break;		\
-		default :						\
-			panic("Bad register in UPT_REG : %d\n", reg);	\
-			val = -1;					\
-		}							\
-		val;							\
-	})
-
-
-#define UPT_SET(regs, reg, val) \
-	({      unsigned long __upt_val = val;	\
-		switch(reg){						\
-		case R8: UPT_R8(regs) = __upt_val; break;		\
-		case R9: UPT_R9(regs) = __upt_val; break;		\
-		case R10: UPT_R10(regs) = __upt_val; break;		\
-		case R11: UPT_R11(regs) = __upt_val; break;		\
-		case R12: UPT_R12(regs) = __upt_val; break;		\
-		case R13: UPT_R13(regs) = __upt_val; break;		\
-		case R14: UPT_R14(regs) = __upt_val; break;		\
-		case R15: UPT_R15(regs) = __upt_val; break;		\
-		case RIP: UPT_IP(regs) = __upt_val; break;		\
-		case RSP: UPT_SP(regs) = __upt_val; break;		\
-		case RAX: UPT_RAX(regs) = __upt_val; break;		\
-		case RBX: UPT_RBX(regs) = __upt_val; break;		\
-		case RCX: UPT_RCX(regs) = __upt_val; break;		\
-		case RDX: UPT_RDX(regs) = __upt_val; break;		\
-		case RSI: UPT_RSI(regs) = __upt_val; break;		\
-		case RDI: UPT_RDI(regs) = __upt_val; break;		\
-		case RBP: UPT_RBP(regs) = __upt_val; break;		\
-		case ORIG_RAX: UPT_ORIG_RAX(regs) = __upt_val; break;	\
-		case CS: UPT_CS(regs) = __upt_val; break;		\
-		case SS: UPT_SS(regs) = __upt_val; break;		\
-		case FS_BASE: UPT_FS_BASE(regs) = __upt_val; break;	\
-		case GS_BASE: UPT_GS_BASE(regs) = __upt_val; break;	\
-		case DS: UPT_DS(regs) = __upt_val; break;		\
-		case ES: UPT_ES(regs) = __upt_val; break;		\
-		case FS: UPT_FS(regs) = __upt_val; break;		\
-		case GS: UPT_GS(regs) = __upt_val; break;		\
-		case EFLAGS: UPT_EFLAGS(regs) = __upt_val; break;	\
-		default :						\
-			panic("Bad register in UPT_SET : %d\n", reg);	\
-			break;						\
-		}							\
-		__upt_val;						\
-	})
-
-#define UPT_SET_SYSCALL_RETURN(r, res) \
-	REGS_SET_SYSCALL_RETURN((r)->regs, (res))
-
-#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
-
-#define UPT_SEGV_IS_FIXABLE(r) REGS_SEGV_IS_FIXABLE(&r->skas)
-
-#define UPT_FAULTINFO(r) (&(r)->faultinfo)
-
-static inline void arch_init_registers(int pid)
-{
-}
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h b/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h
deleted file mode 100644
index 4dbccdb..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_X86_64_PTRACE_USER_H__
-#define __SYSDEP_X86_64_PTRACE_USER_H__
-
-#define __FRAME_OFFSETS
-#include <sys/ptrace.h>
-#include <linux/ptrace.h>
-#include <asm/ptrace.h>
-#undef __FRAME_OFFSETS
-#include "user_constants.h"
-
-#define PT_INDEX(off) ((off) / sizeof(unsigned long))
-
-#define PT_SYSCALL_NR(regs) ((regs)[PT_INDEX(ORIG_RAX)])
-#define PT_SYSCALL_NR_OFFSET (ORIG_RAX)
-
-#define PT_SYSCALL_ARG1(regs) (((unsigned long *) (regs))[PT_INDEX(RDI)])
-#define PT_SYSCALL_ARG1_OFFSET (RDI)
-
-#define PT_SYSCALL_ARG2(regs) (((unsigned long *) (regs))[PT_INDEX(RSI)])
-#define PT_SYSCALL_ARG2_OFFSET (RSI)
-
-#define PT_SYSCALL_ARG3(regs) (((unsigned long *) (regs))[PT_INDEX(RDX)])
-#define PT_SYSCALL_ARG3_OFFSET (RDX)
-
-#define PT_SYSCALL_ARG4(regs) (((unsigned long *) (regs))[PT_INDEX(RCX)])
-#define PT_SYSCALL_ARG4_OFFSET (RCX)
-
-#define PT_SYSCALL_ARG5(regs) (((unsigned long *) (regs))[PT_INDEX(R8)])
-#define PT_SYSCALL_ARG5_OFFSET (R8)
-
-#define PT_SYSCALL_ARG6(regs) (((unsigned long *) (regs))[PT_INDEX(R9)])
-#define PT_SYSCALL_ARG6_OFFSET (R9)
-
-#define PT_SYSCALL_RET_OFFSET (RAX)
-
-#define PT_IP_OFFSET (RIP)
-#define PT_IP(regs) ((regs)[PT_INDEX(RIP)])
-
-#define PT_SP_OFFSET (RSP)
-#define PT_SP(regs) ((regs)[PT_INDEX(RSP)])
-
-#define PT_ORIG_RAX_OFFSET (ORIG_RAX)
-#define PT_ORIG_RAX(regs) ((regs)[PT_INDEX(ORIG_RAX)])
-
-/*
- * x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though
- * it's defined in the kernel's include/linux/ptrace.h. Additionally, use the
- * 2.4 name and value for 2.4 host compatibility.
- */
-#ifndef PTRACE_OLDSETOPTIONS
-#define PTRACE_OLDSETOPTIONS 21
-#endif
-
-/*
- * These are before the system call, so the system call number is RAX
- * rather than ORIG_RAX, and arg4 is R10 rather than RCX
- */
-#define REGS_SYSCALL_NR PT_INDEX(RAX)
-#define REGS_SYSCALL_ARG1 PT_INDEX(RDI)
-#define REGS_SYSCALL_ARG2 PT_INDEX(RSI)
-#define REGS_SYSCALL_ARG3 PT_INDEX(RDX)
-#define REGS_SYSCALL_ARG4 PT_INDEX(R10)
-#define REGS_SYSCALL_ARG5 PT_INDEX(R8)
-#define REGS_SYSCALL_ARG6 PT_INDEX(R9)
-
-#define REGS_IP_INDEX PT_INDEX(RIP)
-#define REGS_SP_INDEX PT_INDEX(RSP)
-
-#define FP_SIZE (HOST_FP_SIZE)
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/sc.h b/arch/um/sys-x86_64/shared/sysdep/sc.h
deleted file mode 100644
index 8aee45b..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/sc.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef __SYSDEP_X86_64_SC_H
-#define __SYSDEP_X86_64_SC_H
-
-/* Copyright (C) 2003 - 2004 PathScale, Inc
- * Released under the GPL
- */
-
-#include <user_constants.h>
-
-#define SC_OFFSET(sc, field) \
-	 *((unsigned long *) &(((char *) (sc))[HOST_##field]))
-
-#define SC_RBX(sc) SC_OFFSET(sc, SC_RBX)
-#define SC_RCX(sc) SC_OFFSET(sc, SC_RCX)
-#define SC_RDX(sc) SC_OFFSET(sc, SC_RDX)
-#define SC_RSI(sc) SC_OFFSET(sc, SC_RSI)
-#define SC_RDI(sc) SC_OFFSET(sc, SC_RDI)
-#define SC_RBP(sc) SC_OFFSET(sc, SC_RBP)
-#define SC_RAX(sc) SC_OFFSET(sc, SC_RAX)
-#define SC_R8(sc) SC_OFFSET(sc, SC_R8)
-#define SC_R9(sc) SC_OFFSET(sc, SC_R9)
-#define SC_R10(sc) SC_OFFSET(sc, SC_R10)
-#define SC_R11(sc) SC_OFFSET(sc, SC_R11)
-#define SC_R12(sc) SC_OFFSET(sc, SC_R12)
-#define SC_R13(sc) SC_OFFSET(sc, SC_R13)
-#define SC_R14(sc) SC_OFFSET(sc, SC_R14)
-#define SC_R15(sc) SC_OFFSET(sc, SC_R15)
-#define SC_IP(sc) SC_OFFSET(sc, SC_IP)
-#define SC_SP(sc) SC_OFFSET(sc, SC_SP)
-#define SC_CR2(sc) SC_OFFSET(sc, SC_CR2)
-#define SC_ERR(sc) SC_OFFSET(sc, SC_ERR)
-#define SC_TRAPNO(sc) SC_OFFSET(sc, SC_TRAPNO)
-#define SC_CS(sc) SC_OFFSET(sc, SC_CS)
-#define SC_FS(sc) SC_OFFSET(sc, SC_FS)
-#define SC_GS(sc) SC_OFFSET(sc, SC_GS)
-#define SC_EFLAGS(sc) SC_OFFSET(sc, SC_EFLAGS)
-#define SC_SIGMASK(sc) SC_OFFSET(sc, SC_SIGMASK)
-#define SC_SS(sc) SC_OFFSET(sc, SC_SS)
-#if 0
-#define SC_ORIG_RAX(sc) SC_OFFSET(sc, SC_ORIG_RAX)
-#define SC_DS(sc) SC_OFFSET(sc, SC_DS)
-#define SC_ES(sc) SC_OFFSET(sc, SC_ES)
-#endif
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/sigcontext.h b/arch/um/sys-x86_64/shared/sysdep/sigcontext.h
deleted file mode 100644
index 0155133..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/sigcontext.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_X86_64_SIGCONTEXT_H
-#define __SYSDEP_X86_64_SIGCONTEXT_H
-
-#include <sysdep/sc.h>
-
-#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
-
-#define GET_FAULTINFO_FROM_SC(fi, sc) \
-	{ \
-		(fi).cr2 = SC_CR2(sc); \
-		(fi).error_code = SC_ERR(sc); \
-		(fi).trap_no = SC_TRAPNO(sc); \
-	}
-
-/* This is Page Fault */
-#define SEGV_IS_FIXABLE(fi)	((fi)->trap_no == 14)
-
-/* No broken SKAS API, which doesn't pass trap_no, here. */
-#define SEGV_MAYBE_FIXABLE(fi)	0
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h b/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h
deleted file mode 100644
index 95db4be7..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_X86_64_SKAS_PTRACE_H
-#define __SYSDEP_X86_64_SKAS_PTRACE_H
-
-struct ptrace_faultinfo {
-        int is_write;
-        unsigned long addr;
-};
-
-struct ptrace_ldt {
-        int func;
-        void *ptr;
-        unsigned long bytecount;
-};
-
-#define PTRACE_LDT 54
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/system.h b/arch/um/sys-x86_64/shared/sysdep/system.h
deleted file mode 100644
index d1b93c4..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/system.h
+++ /dev/null
@@ -1,132 +0,0 @@
-#ifndef _ASM_X86_SYSTEM_H_
-#define _ASM_X86_SYSTEM_H_
-
-#include <asm/asm.h>
-#include <asm/segment.h>
-#include <asm/cpufeature.h>
-#include <asm/cmpxchg.h>
-#include <asm/nops.h>
-
-#include <linux/kernel.h>
-#include <linux/irqflags.h>
-
-/* entries in ARCH_DLINFO: */
-#ifdef CONFIG_IA32_EMULATION
-# define AT_VECTOR_SIZE_ARCH 2
-#else
-# define AT_VECTOR_SIZE_ARCH 1
-#endif
-
-extern unsigned long arch_align_stack(unsigned long sp);
-
-void default_idle(void);
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- */
-#ifdef CONFIG_X86_32
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
-#else
-#define mb() 	asm volatile("mfence":::"memory")
-#define rmb()	asm volatile("lfence":::"memory")
-#define wmb()	asm volatile("sfence" ::: "memory")
-#endif
-
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	b = 2;
- *	memory_barrier();
- *	p = &b;				q = p;
- *					read_barrier_depends();
- *					d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	a = 2;
- *	memory_barrier();
- *	b = 3;				y = b;
- *					read_barrier_depends();
- *					x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends()	do { } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb()	rmb()
-#else
-# define smp_rmb()	barrier()
-#endif
-#ifdef CONFIG_X86_OOSTORE
-# define smp_wmb() 	wmb()
-#else
-# define smp_wmb()	barrier()
-#endif
-#define smp_read_barrier_depends()	read_barrier_depends()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while (0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
-/*
- * Stop RDTSC speculation. This is needed when you need to use RDTSC
- * (or get_cycles or vread that possibly accesses the TSC) in a defined
- * code region.
- *
- * (Could use an alternative three way for this if there was one.)
- */
-static inline void rdtsc_barrier(void)
-{
-	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
-}
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/tls.h b/arch/um/sys-x86_64/shared/sysdep/tls.h
deleted file mode 100644
index 18c000d..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/tls.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _SYSDEP_TLS_H
-#define _SYSDEP_TLS_H
-
-# ifndef __KERNEL__
-
-/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which
- * may be named user_desc (but in 2.4 and in header matching its API was named
- * modify_ldt_ldt_s). */
-
-typedef struct um_dup_user_desc {
-	unsigned int  entry_number;
-	unsigned int  base_addr;
-	unsigned int  limit;
-	unsigned int  seg_32bit:1;
-	unsigned int  contents:2;
-	unsigned int  read_exec_only:1;
-	unsigned int  limit_in_pages:1;
-	unsigned int  seg_not_present:1;
-	unsigned int  useable:1;
-	unsigned int  lm:1;
-} user_desc_t;
-
-# else /* __KERNEL__ */
-
-#  include <ldt.h>
-typedef struct user_desc user_desc_t;
-
-# endif /* __KERNEL__ */
-#endif /* _SYSDEP_TLS_H */
diff --git a/arch/um/sys-x86_64/shared/sysdep/vm-flags.h b/arch/um/sys-x86_64/shared/sysdep/vm-flags.h
deleted file mode 100644
index 3978e55..0000000
--- a/arch/um/sys-x86_64/shared/sysdep/vm-flags.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
- * Copyright 2003 PathScale, Inc.
- * Licensed under the GPL
- */
-
-#ifndef __VM_FLAGS_X86_64_H
-#define __VM_FLAGS_X86_64_H
-
-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
-	VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \
-	VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#endif
diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c
deleted file mode 100644
index b6b65c7..0000000
--- a/arch/um/sys-x86_64/signal.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Copyright (C) 2003 PathScale, Inc.
- * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <linux/personality.h>
-#include <linux/ptrace.h>
-#include <linux/kernel.h>
-#include <asm/unistd.h>
-#include <asm/uaccess.h>
-#include <asm/ucontext.h>
-#include "frame_kern.h"
-#include "skas.h"
-
-void copy_sc(struct uml_pt_regs *regs, void *from)
-{
-	struct sigcontext *sc = from;
-
-#define GETREG(regs, regno, sc, regname)				\
-	(regs)->gp[(regno) / sizeof(unsigned long)] = (sc)->regname
-
-	GETREG(regs, R8, sc, r8);
-	GETREG(regs, R9, sc, r9);
-	GETREG(regs, R10, sc, r10);
-	GETREG(regs, R11, sc, r11);
-	GETREG(regs, R12, sc, r12);
-	GETREG(regs, R13, sc, r13);
-	GETREG(regs, R14, sc, r14);
-	GETREG(regs, R15, sc, r15);
-	GETREG(regs, RDI, sc, di);
-	GETREG(regs, RSI, sc, si);
-	GETREG(regs, RBP, sc, bp);
-	GETREG(regs, RBX, sc, bx);
-	GETREG(regs, RDX, sc, dx);
-	GETREG(regs, RAX, sc, ax);
-	GETREG(regs, RCX, sc, cx);
-	GETREG(regs, RSP, sc, sp);
-	GETREG(regs, RIP, sc, ip);
-	GETREG(regs, EFLAGS, sc, flags);
-	GETREG(regs, CS, sc, cs);
-
-#undef GETREG
-}
-
-static int copy_sc_from_user(struct pt_regs *regs,
-			     struct sigcontext __user *from,
-			     struct _fpstate __user *fpp)
-{
-	struct user_i387_struct fp;
-	int err = 0;
-
-#define GETREG(regs, regno, sc, regname)				\
-	__get_user((regs)->regs.gp[(regno) / sizeof(unsigned long)],	\
-		   &(sc)->regname)
-
-	err |= GETREG(regs, R8, from, r8);
-	err |= GETREG(regs, R9, from, r9);
-	err |= GETREG(regs, R10, from, r10);
-	err |= GETREG(regs, R11, from, r11);
-	err |= GETREG(regs, R12, from, r12);
-	err |= GETREG(regs, R13, from, r13);
-	err |= GETREG(regs, R14, from, r14);
-	err |= GETREG(regs, R15, from, r15);
-	err |= GETREG(regs, RDI, from, di);
-	err |= GETREG(regs, RSI, from, si);
-	err |= GETREG(regs, RBP, from, bp);
-	err |= GETREG(regs, RBX, from, bx);
-	err |= GETREG(regs, RDX, from, dx);
-	err |= GETREG(regs, RAX, from, ax);
-	err |= GETREG(regs, RCX, from, cx);
-	err |= GETREG(regs, RSP, from, sp);
-	err |= GETREG(regs, RIP, from, ip);
-	err |= GETREG(regs, EFLAGS, from, flags);
-	err |= GETREG(regs, CS, from, cs);
-	if (err)
-		return 1;
-
-#undef GETREG
-
-	err = copy_from_user(&fp, fpp, sizeof(struct user_i387_struct));
-	if (err)
-		return 1;
-
-	err = restore_fp_registers(userspace_pid[current_thread_info()->cpu],
-				   (unsigned long *) &fp);
-	if (err < 0) {
-		printk(KERN_ERR "copy_sc_from_user - "
-		       "restore_fp_registers failed, errno = %d\n",
-		       -err);
-		return 1;
-	}
-
-	return 0;
-}
-
-static int copy_sc_to_user(struct sigcontext __user *to,
-			   struct _fpstate __user *to_fp, struct pt_regs *regs,
-			   unsigned long mask, unsigned long sp)
-{
-	struct faultinfo * fi = &current->thread.arch.faultinfo;
-	struct user_i387_struct fp;
-	int err = 0;
-
-	err |= __put_user(0, &to->gs);
-	err |= __put_user(0, &to->fs);
-
-#define PUTREG(regs, regno, sc, regname)				\
-	__put_user((regs)->regs.gp[(regno) / sizeof(unsigned long)],	\
-		   &(sc)->regname)
-
-	err |= PUTREG(regs, RDI, to, di);
-	err |= PUTREG(regs, RSI, to, si);
-	err |= PUTREG(regs, RBP, to, bp);
-	/*
-	 * Must use original RSP, which is passed in, rather than what's in
-	 * the pt_regs, because that's already been updated to point at the
-	 * signal frame.
-	 */
-	err |= __put_user(sp, &to->sp);
-	err |= PUTREG(regs, RBX, to, bx);
-	err |= PUTREG(regs, RDX, to, dx);
-	err |= PUTREG(regs, RCX, to, cx);
-	err |= PUTREG(regs, RAX, to, ax);
-	err |= PUTREG(regs, R8, to, r8);
-	err |= PUTREG(regs, R9, to, r9);
-	err |= PUTREG(regs, R10, to, r10);
-	err |= PUTREG(regs, R11, to, r11);
-	err |= PUTREG(regs, R12, to, r12);
-	err |= PUTREG(regs, R13, to, r13);
-	err |= PUTREG(regs, R14, to, r14);
-	err |= PUTREG(regs, R15, to, r15);
-	err |= PUTREG(regs, CS, to, cs); /* XXX x86_64 doesn't do this */
-
-	err |= __put_user(fi->cr2, &to->cr2);
-	err |= __put_user(fi->error_code, &to->err);
-	err |= __put_user(fi->trap_no, &to->trapno);
-
-	err |= PUTREG(regs, RIP, to, ip);
-	err |= PUTREG(regs, EFLAGS, to, flags);
-#undef PUTREG
-
-	err |= __put_user(mask, &to->oldmask);
-	if (err)
-		return 1;
-
-	err = save_fp_registers(userspace_pid[current_thread_info()->cpu],
-				(unsigned long *) &fp);
-	if (err < 0) {
-		printk(KERN_ERR "copy_sc_from_user - restore_fp_registers "
-		       "failed, errno = %d\n", -err);
-		return 1;
-	}
-
-	if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
-		return 1;
-
-	return err;
-}
-
-struct rt_sigframe
-{
-	char __user *pretcode;
-	struct ucontext uc;
-	struct siginfo info;
-	struct _fpstate fpstate;
-};
-
-int setup_signal_stack_si(unsigned long stack_top, int sig,
-			  struct k_sigaction *ka, struct pt_regs * regs,
-			  siginfo_t *info, sigset_t *set)
-{
-	struct rt_sigframe __user *frame;
-	unsigned long save_sp = PT_REGS_RSP(regs);
-	int err = 0;
-	struct task_struct *me = current;
-
-	frame = (struct rt_sigframe __user *)
-		round_down(stack_top - sizeof(struct rt_sigframe), 16);
-	/* Subtract 128 for a red zone and 8 for proper alignment */
-	frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto out;
-
-	if (ka->sa.sa_flags & SA_SIGINFO) {
-		err |= copy_siginfo_to_user(&frame->info, info);
-		if (err)
-			goto out;
-	}
-
-	/*
-	 * Update SP now because the page fault handler refuses to extend
-	 * the stack if the faulting address is too far below the current
-	 * SP, which frame now certainly is.  If there's an error, the original
-	 * value is restored on the way out.
-	 * When writing the sigcontext to the stack, we have to write the
-	 * original value, so that's passed to copy_sc_to_user, which does
-	 * the right thing with it.
-	 */
-	PT_REGS_RSP(regs) = (unsigned long) frame;
-
-	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(save_sp),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
-			       set->sig[0], save_sp);
-	err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
-	if (sizeof(*set) == 16) {
-		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
-		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
-	}
-	else
-		err |= __copy_to_user(&frame->uc.uc_sigmask, set,
-				      sizeof(*set));
-
-	/*
-	 * Set up to return from userspace.  If provided, use a stub
-	 * already in userspace.
-	 */
-	/* x86-64 should always use SA_RESTORER. */
-	if (ka->sa.sa_flags & SA_RESTORER)
-		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
-	else
-		/* could use a vstub here */
-		goto restore_sp;
-
-	if (err)
-		goto restore_sp;
-
-	/* Set up registers for signal handler */
-	{
-		struct exec_domain *ed = current_thread_info()->exec_domain;
-		if (unlikely(ed && ed->signal_invmap && sig < 32))
-			sig = ed->signal_invmap[sig];
-	}
-
-	PT_REGS_RDI(regs) = sig;
-	/* In case the signal handler was declared without prototypes */
-	PT_REGS_RAX(regs) = 0;
-
-	/*
-	 * This also works for non SA_SIGINFO handlers because they expect the
-	 * next argument after the signal number on the stack.
-	 */
-	PT_REGS_RSI(regs) = (unsigned long) &frame->info;
-	PT_REGS_RDX(regs) = (unsigned long) &frame->uc;
-	PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler;
- out:
-	return err;
-
-restore_sp:
-	PT_REGS_RSP(regs) = save_sp;
-	return err;
-}
-
-long sys_rt_sigreturn(struct pt_regs *regs)
-{
-	unsigned long sp = PT_REGS_SP(&current->thread.regs);
-	struct rt_sigframe __user *frame =
-		(struct rt_sigframe __user *)(sp - 8);
-	struct ucontext __user *uc = &frame->uc;
-	sigset_t set;
-
-	if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
-		goto segfault;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext,
-			      &frame->fpstate))
-		goto segfault;
-
-	/* Avoid ERESTART handling */
-	PT_REGS_SYSCALL_NR(&current->thread.regs) = -1;
-	return PT_REGS_SYSCALL_RET(&current->thread.regs);
-
- segfault:
-	force_sig(SIGSEGV, current);
-	return 0;
-}
diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c
deleted file mode 100644
index ced051a..0000000
--- a/arch/um/sys-x86_64/stub_segv.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <signal.h>
-#include "as-layout.h"
-#include "sysdep/stub.h"
-#include "sysdep/faultinfo.h"
-#include "sysdep/sigcontext.h"
-
-void __attribute__ ((__section__ (".__syscall_stub")))
-stub_segv_handler(int sig)
-{
-	struct ucontext *uc;
-
-	__asm__ __volatile__("movq %%rdx, %0" : "=g" (uc) :);
-	GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA),
-			      &uc->uc_mcontext);
-	trap_myself();
-}
-
diff --git a/arch/um/sys-x86_64/user-offsets.c b/arch/um/sys-x86_64/user-offsets.c
deleted file mode 100644
index 9735854..0000000
--- a/arch/um/sys-x86_64/user-offsets.c
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <stdio.h>
-#include <stddef.h>
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/mman.h>
-#include <sys/user.h>
-#define __FRAME_OFFSETS
-#include <asm/ptrace.h>
-#include <asm/types.h>
-
-#define DEFINE(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define DEFINE_LONGS(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
-
-#define OFFSET(sym, str, mem) \
-	DEFINE(sym, offsetof(struct str, mem));
-
-void foo(void)
-{
-	OFFSET(HOST_SC_CR2, sigcontext, cr2);
-	OFFSET(HOST_SC_ERR, sigcontext, err);
-	OFFSET(HOST_SC_TRAPNO, sigcontext, trapno);
-
-	DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
-	DEFINE_LONGS(HOST_RBX, RBX);
-	DEFINE_LONGS(HOST_RCX, RCX);
-	DEFINE_LONGS(HOST_RDI, RDI);
-	DEFINE_LONGS(HOST_RSI, RSI);
-	DEFINE_LONGS(HOST_RDX, RDX);
-	DEFINE_LONGS(HOST_RBP, RBP);
-	DEFINE_LONGS(HOST_RAX, RAX);
-	DEFINE_LONGS(HOST_R8, R8);
-	DEFINE_LONGS(HOST_R9, R9);
-	DEFINE_LONGS(HOST_R10, R10);
-	DEFINE_LONGS(HOST_R11, R11);
-	DEFINE_LONGS(HOST_R12, R12);
-	DEFINE_LONGS(HOST_R13, R13);
-	DEFINE_LONGS(HOST_R14, R14);
-	DEFINE_LONGS(HOST_R15, R15);
-	DEFINE_LONGS(HOST_ORIG_RAX, ORIG_RAX);
-	DEFINE_LONGS(HOST_CS, CS);
-	DEFINE_LONGS(HOST_SS, SS);
-	DEFINE_LONGS(HOST_EFLAGS, EFLAGS);
-#if 0
-	DEFINE_LONGS(HOST_FS, FS);
-	DEFINE_LONGS(HOST_GS, GS);
-	DEFINE_LONGS(HOST_DS, DS);
-	DEFINE_LONGS(HOST_ES, ES);
-#endif
-
-	DEFINE_LONGS(HOST_IP, RIP);
-	DEFINE_LONGS(HOST_SP, RSP);
-	DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
-
-	/* XXX Duplicated between i386 and x86_64 */
-	DEFINE(UM_POLLIN, POLLIN);
-	DEFINE(UM_POLLPRI, POLLPRI);
-	DEFINE(UM_POLLOUT, POLLOUT);
-
-	DEFINE(UM_PROT_READ, PROT_READ);
-	DEFINE(UM_PROT_WRITE, PROT_WRITE);
-	DEFINE(UM_PROT_EXEC, PROT_EXEC);
-}
diff --git a/arch/um/Makefile-i386 b/arch/x86/Makefile.um
similarity index 64%
rename from arch/um/Makefile-i386
rename to arch/x86/Makefile.um
index 302cbe5..36ddec6 100644
--- a/arch/um/Makefile-i386
+++ b/arch/x86/Makefile.um
@@ -1,14 +1,11 @@
-core-y += arch/um/sys-i386/ arch/x86/crypto/
+core-y += arch/x86/crypto/
 
-TOP_ADDR := $(CONFIG_TOP_ADDR)
-
+ifeq ($(CONFIG_X86_32),y)
 START := 0x8048000
 
 LDFLAGS			+= -m elf_i386
-ELF_ARCH		:= $(SUBARCH)
-ELF_FORMAT 		:= elf32-$(SUBARCH)
-OBJCOPYFLAGS  		:= -O binary -R .note -R .comment -S
-HEADER_ARCH		:= x86
+ELF_ARCH		:= i386
+ELF_FORMAT 		:= elf32-i386
 CHECKFLAGS	+= -D__i386__
 
 ifeq ("$(origin SUBARCH)", "command line")
@@ -16,9 +13,8 @@
 KBUILD_CFLAGS		+= $(call cc-option,-m32)
 KBUILD_AFLAGS		+= $(call cc-option,-m32)
 LINK-y			+= $(call cc-option,-m32)
-UML_OBJCOPYFLAGS	+= -F $(ELF_FORMAT)
 
-export LDFLAGS HOSTCFLAGS HOSTLDFLAGS UML_OBJCOPYFLAGS
+export LDFLAGS
 endif
 endif
 
@@ -40,3 +36,26 @@
 			else echo $(call cc-option,-funit-at-a-time); fi ;)
 
 KBUILD_CFLAGS += $(cflags-y)
+
+else
+
+START := 0x60000000
+
+KBUILD_CFLAGS += -fno-builtin -m64 
+
+CHECKFLAGS  += -m64 -D__x86_64__
+KBUILD_AFLAGS += -m64
+LDFLAGS += -m elf_x86_64
+KBUILD_CPPFLAGS += -m64
+
+ELF_ARCH := i386:x86-64
+ELF_FORMAT := elf64-x86-64
+
+# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
+
+LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
+LINK-y += -m64
+
+# Do unit-at-a-time unconditionally on x86_64, following the host
+KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
+endif
diff --git a/arch/um/Kconfig.x86 b/arch/x86/um/Kconfig
similarity index 100%
rename from arch/um/Kconfig.x86
rename to arch/x86/um/Kconfig
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
new file mode 100644
index 0000000..8fb5840
--- /dev/null
+++ b/arch/x86/um/Makefile
@@ -0,0 +1,45 @@
+#
+# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+#
+
+ifeq ($(CONFIG_X86_32),y)
+	BITS := 32
+else
+	BITS := 64
+endif
+
+obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
+	ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
+	stub_$(BITS).o stub_segv.o syscalls_$(BITS).o \
+	sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
+	mem_$(BITS).o subarch.o os-$(OS)/
+
+ifeq ($(CONFIG_X86_32),y)
+
+obj-y += checksum_32.o
+obj-$(CONFIG_BINFMT_ELF) += elfcore.o
+
+subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
+subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
+subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o
+
+else
+
+obj-y += vdso/
+
+subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \
+		../lib/rwsem.o
+
+endif
+
+subarch-$(CONFIG_MODULES) += ../kernel/module.o
+
+USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o
+
+extra-y += user-offsets.s
+$(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS)
+
+UNPROFILE_OBJS := stub_segv.o
+CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
+
+include arch/um/scripts/Makefile.rules
diff --git a/arch/um/include/asm/apic.h b/arch/x86/um/asm/apic.h
similarity index 100%
rename from arch/um/include/asm/apic.h
rename to arch/x86/um/asm/apic.h
diff --git a/arch/um/include/asm/arch_hweight.h b/arch/x86/um/asm/arch_hweight.h
similarity index 100%
rename from arch/um/include/asm/arch_hweight.h
rename to arch/x86/um/asm/arch_hweight.h
diff --git a/arch/um/sys-i386/asm/archparam.h b/arch/x86/um/asm/archparam.h
similarity index 62%
rename from arch/um/sys-i386/asm/archparam.h
rename to arch/x86/um/asm/archparam.h
index 2a18a88..c17cf68 100644
--- a/arch/um/sys-i386/asm/archparam.h
+++ b/arch/x86/um/asm/archparam.h
@@ -1,10 +1,13 @@
 /* 
  * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright 2003 PathScale, Inc.
  * Licensed under the GPL
  */
 
-#ifndef __UM_ARCHPARAM_I386_H
-#define __UM_ARCHPARAM_I386_H
+#ifndef __UM_ARCHPARAM_H
+#define __UM_ARCHPARAM_H
+
+#ifdef CONFIG_X86_32
 
 #ifdef CONFIG_X86_PAE
 #define LAST_PKMAP 512
@@ -14,3 +17,4 @@
 
 #endif
 
+#endif
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
new file mode 100644
index 0000000..b6efe23
--- /dev/null
+++ b/arch/x86/um/asm/checksum.h
@@ -0,0 +1,10 @@
+#ifndef __UM_CHECKSUM_H
+#define __UM_CHECKSUM_H
+
+#ifdef CONFIG_X86_32
+# include "checksum_32.h"
+#else
+# include "checksum_64.h"
+#endif
+
+#endif
diff --git a/arch/um/sys-i386/shared/sysdep/checksum.h b/arch/x86/um/asm/checksum_32.h
similarity index 99%
rename from arch/um/sys-i386/shared/sysdep/checksum.h
rename to arch/x86/um/asm/checksum_32.h
index ed47445..caab742 100644
--- a/arch/um/sys-i386/shared/sysdep/checksum.h
+++ b/arch/x86/um/asm/checksum_32.h
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed under the GPL
  */
 
diff --git a/arch/um/sys-x86_64/shared/sysdep/checksum.h b/arch/x86/um/asm/checksum_64.h
similarity index 100%
rename from arch/um/sys-x86_64/shared/sysdep/checksum.h
rename to arch/x86/um/asm/checksum_64.h
diff --git a/arch/um/include/asm/desc.h b/arch/x86/um/asm/desc.h
similarity index 100%
rename from arch/um/include/asm/desc.h
rename to arch/x86/um/asm/desc.h
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
new file mode 100644
index 0000000..f3b0633
--- /dev/null
+++ b/arch/x86/um/asm/elf.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+#ifndef __UM_ELF_X86_H
+#define __UM_ELF_X86_H
+
+#include <asm/user.h>
+#include "skas.h"
+
+#ifdef CONFIG_X86_32
+
+#define R_386_NONE	0
+#define R_386_32	1
+#define R_386_PC32	2
+#define R_386_GOT32	3
+#define R_386_PLT32	4
+#define R_386_COPY	5
+#define R_386_GLOB_DAT	6
+#define R_386_JMP_SLOT	7
+#define R_386_RELATIVE	8
+#define R_386_GOTOFF	9
+#define R_386_GOTPC	10
+#define R_386_NUM	11
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+	(((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
+
+#define ELF_CLASS	ELFCLASS32
+#define ELF_DATA        ELFDATA2LSB
+#define ELF_ARCH        EM_386
+
+#define ELF_PLAT_INIT(regs, load_addr) do { \
+	PT_REGS_EBX(regs) = 0; \
+	PT_REGS_ECX(regs) = 0; \
+	PT_REGS_EDX(regs) = 0; \
+	PT_REGS_ESI(regs) = 0; \
+	PT_REGS_EDI(regs) = 0; \
+	PT_REGS_EBP(regs) = 0; \
+	PT_REGS_EAX(regs) = 0; \
+} while (0)
+
+/* Shamelessly stolen from include/asm-i386/elf.h */
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs) do {	\
+	pr_reg[0] = PT_REGS_EBX(regs);		\
+	pr_reg[1] = PT_REGS_ECX(regs);		\
+	pr_reg[2] = PT_REGS_EDX(regs);		\
+	pr_reg[3] = PT_REGS_ESI(regs);		\
+	pr_reg[4] = PT_REGS_EDI(regs);		\
+	pr_reg[5] = PT_REGS_EBP(regs);		\
+	pr_reg[6] = PT_REGS_EAX(regs);		\
+	pr_reg[7] = PT_REGS_DS(regs);		\
+	pr_reg[8] = PT_REGS_ES(regs);		\
+	/* fake once used fs and gs selectors? */	\
+	pr_reg[9] = PT_REGS_DS(regs);		\
+	pr_reg[10] = PT_REGS_DS(regs);		\
+	pr_reg[11] = PT_REGS_SYSCALL_NR(regs);	\
+	pr_reg[12] = PT_REGS_IP(regs);		\
+	pr_reg[13] = PT_REGS_CS(regs);		\
+	pr_reg[14] = PT_REGS_EFLAGS(regs);	\
+	pr_reg[15] = PT_REGS_SP(regs);		\
+	pr_reg[16] = PT_REGS_SS(regs);		\
+} while (0);
+
+extern char * elf_aux_platform;
+#define ELF_PLATFORM (elf_aux_platform)
+
+extern unsigned long vsyscall_ehdr;
+extern unsigned long vsyscall_end;
+extern unsigned long __kernel_vsyscall;
+
+/*
+ * This is the range that is readable by user mode, and things
+ * acting like user mode such as get_user_pages.
+ */
+#define FIXADDR_USER_START      vsyscall_ehdr
+#define FIXADDR_USER_END        vsyscall_end
+
+
+/*
+ * Architecture-neutral AT_ values in 0-17, leave some room
+ * for more of them, start the x86-specific ones at 32.
+ */
+#define AT_SYSINFO		32
+#define AT_SYSINFO_EHDR		33
+
+#define ARCH_DLINFO						\
+do {								\
+	if ( vsyscall_ehdr ) {					\
+		NEW_AUX_ENT(AT_SYSINFO,	__kernel_vsyscall);	\
+		NEW_AUX_ENT(AT_SYSINFO_EHDR, vsyscall_ehdr);	\
+	}							\
+} while (0)
+
+#else
+
+/* x86-64 relocation types, taken from asm-x86_64/elf.h */
+#define R_X86_64_NONE		0	/* No reloc */
+#define R_X86_64_64		1	/* Direct 64 bit  */
+#define R_X86_64_PC32		2	/* PC relative 32 bit signed */
+#define R_X86_64_GOT32		3	/* 32 bit GOT entry */
+#define R_X86_64_PLT32		4	/* 32 bit PLT address */
+#define R_X86_64_COPY		5	/* Copy symbol at runtime */
+#define R_X86_64_GLOB_DAT	6	/* Create GOT entry */
+#define R_X86_64_JUMP_SLOT	7	/* Create PLT entry */
+#define R_X86_64_RELATIVE	8	/* Adjust by program base */
+#define R_X86_64_GOTPCREL	9	/* 32 bit signed pc relative
+					   offset to GOT */
+#define R_X86_64_32		10	/* Direct 32 bit zero extended */
+#define R_X86_64_32S		11	/* Direct 32 bit sign extended */
+#define R_X86_64_16		12	/* Direct 16 bit zero extended */
+#define R_X86_64_PC16		13	/* 16 bit sign extended pc relative */
+#define R_X86_64_8		14	/* Direct 8 bit sign extended  */
+#define R_X86_64_PC8		15	/* 8 bit sign extended pc relative */
+
+#define R_X86_64_NUM		16
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+	((x)->e_machine == EM_X86_64)
+
+#define ELF_CLASS	ELFCLASS64
+#define ELF_DATA        ELFDATA2LSB
+#define ELF_ARCH        EM_X86_64
+
+#define ELF_PLAT_INIT(regs, load_addr)    do { \
+	PT_REGS_RBX(regs) = 0; \
+	PT_REGS_RCX(regs) = 0; \
+	PT_REGS_RDX(regs) = 0; \
+	PT_REGS_RSI(regs) = 0; \
+	PT_REGS_RDI(regs) = 0; \
+	PT_REGS_RBP(regs) = 0; \
+	PT_REGS_RAX(regs) = 0; \
+	PT_REGS_R8(regs) = 0; \
+	PT_REGS_R9(regs) = 0; \
+	PT_REGS_R10(regs) = 0; \
+	PT_REGS_R11(regs) = 0; \
+	PT_REGS_R12(regs) = 0; \
+	PT_REGS_R13(regs) = 0; \
+	PT_REGS_R14(regs) = 0; \
+	PT_REGS_R15(regs) = 0; \
+} while (0)
+
+#define ELF_CORE_COPY_REGS(pr_reg, _regs)		\
+	(pr_reg)[0] = (_regs)->regs.gp[0];			\
+	(pr_reg)[1] = (_regs)->regs.gp[1];			\
+	(pr_reg)[2] = (_regs)->regs.gp[2];			\
+	(pr_reg)[3] = (_regs)->regs.gp[3];			\
+	(pr_reg)[4] = (_regs)->regs.gp[4];			\
+	(pr_reg)[5] = (_regs)->regs.gp[5];			\
+	(pr_reg)[6] = (_regs)->regs.gp[6];			\
+	(pr_reg)[7] = (_regs)->regs.gp[7];			\
+	(pr_reg)[8] = (_regs)->regs.gp[8];			\
+	(pr_reg)[9] = (_regs)->regs.gp[9];			\
+	(pr_reg)[10] = (_regs)->regs.gp[10];			\
+	(pr_reg)[11] = (_regs)->regs.gp[11];			\
+	(pr_reg)[12] = (_regs)->regs.gp[12];			\
+	(pr_reg)[13] = (_regs)->regs.gp[13];			\
+	(pr_reg)[14] = (_regs)->regs.gp[14];			\
+	(pr_reg)[15] = (_regs)->regs.gp[15];			\
+	(pr_reg)[16] = (_regs)->regs.gp[16];			\
+	(pr_reg)[17] = (_regs)->regs.gp[17];			\
+	(pr_reg)[18] = (_regs)->regs.gp[18];			\
+	(pr_reg)[19] = (_regs)->regs.gp[19];			\
+	(pr_reg)[20] = (_regs)->regs.gp[20];			\
+	(pr_reg)[21] = current->thread.arch.fs;			\
+	(pr_reg)[22] = 0;					\
+	(pr_reg)[23] = 0;					\
+	(pr_reg)[24] = 0;					\
+	(pr_reg)[25] = 0;					\
+	(pr_reg)[26] = 0;
+
+#define ELF_PLATFORM "x86_64"
+
+/* No user-accessible fixmap addresses, i.e. vsyscall */
+#define FIXADDR_USER_START      0
+#define FIXADDR_USER_END        0
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+	int uses_interp);
+
+extern unsigned long um_vdso_addr;
+#define AT_SYSINFO_EHDR 33
+#define ARCH_DLINFO	NEW_AUX_ENT(AT_SYSINFO_EHDR, um_vdso_addr)
+
+#endif
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct user_i387_struct elf_fpregset_t;
+
+#define task_pt_regs(t) (&(t)->thread.regs)
+
+struct task_struct;
+
+extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
+
+#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
+
+#define ELF_EXEC_PAGESIZE 4096
+
+#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
+
+extern long elf_aux_hwcap;
+#define ELF_HWCAP (elf_aux_hwcap)
+
+#define SET_PERSONALITY(ex) do ; while(0)
+#define __HAVE_ARCH_GATE_AREA 1
+
+#endif
diff --git a/arch/um/include/asm/irq_vectors.h b/arch/x86/um/asm/irq_vectors.h
similarity index 100%
rename from arch/um/include/asm/irq_vectors.h
rename to arch/x86/um/asm/irq_vectors.h
diff --git a/arch/x86/um/asm/mm_context.h b/arch/x86/um/asm/mm_context.h
new file mode 100644
index 0000000..4a73d63
--- /dev/null
+++ b/arch/x86/um/asm/mm_context.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
+ * Licensed under the GPL
+ *
+ * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
+ */
+
+#ifndef __ASM_LDT_H
+#define __ASM_LDT_H
+
+#include <linux/mutex.h>
+#include <asm/ldt.h>
+
+extern void ldt_host_info(void);
+
+#define LDT_PAGES_MAX \
+	((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
+#define LDT_ENTRIES_PER_PAGE \
+	(PAGE_SIZE/LDT_ENTRY_SIZE)
+#define LDT_DIRECT_ENTRIES \
+	((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
+
+struct ldt_entry {
+	__u32 a;
+	__u32 b;
+};
+
+typedef struct uml_ldt {
+	int entry_count;
+	struct mutex lock;
+	union {
+		struct ldt_entry * pages[LDT_PAGES_MAX];
+		struct ldt_entry entries[LDT_DIRECT_ENTRIES];
+	} u;
+} uml_ldt_t;
+
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+
+#define LDT_entry_b(info) \
+	(((info)->base_addr & 0xff000000) | \
+	(((info)->base_addr & 0x00ff0000) >> 16) | \
+	((info)->limit & 0xf0000) | \
+	(((info)->read_exec_only ^ 1) << 9) | \
+	((info)->contents << 10) | \
+	(((info)->seg_not_present ^ 1) << 15) | \
+	((info)->seg_32bit << 22) | \
+	((info)->limit_in_pages << 23) | \
+	((info)->useable << 20) | \
+	0x7000)
+
+#define _LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
+
+#ifdef CONFIG_X86_64
+#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
+#else
+#define LDT_empty(info) (_LDT_empty(info))
+#endif
+
+struct uml_arch_mm_context {
+	uml_ldt_t ldt;
+};
+
+#endif
diff --git a/arch/x86/um/asm/module.h b/arch/x86/um/asm/module.h
new file mode 100644
index 0000000..61af80e
--- /dev/null
+++ b/arch/x86/um/asm/module.h
@@ -0,0 +1,23 @@
+#ifndef __UM_MODULE_H
+#define __UM_MODULE_H
+
+/* UML is simple */
+struct mod_arch_specific
+{
+};
+
+#ifdef CONFIG_X86_32
+
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Ehdr Elf32_Ehdr
+
+#else
+
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Ehdr Elf64_Ehdr
+
+#endif
+
+#endif
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
new file mode 100644
index 0000000..118c143
--- /dev/null
+++ b/arch/x86/um/asm/processor.h
@@ -0,0 +1,22 @@
+#ifndef __UM_PROCESSOR_H
+#define __UM_PROCESSOR_H
+
+/* include faultinfo structure */
+#include <sysdep/faultinfo.h>
+
+#ifdef CONFIG_X86_32
+# include "processor_32.h"
+#else
+# include "processor_64.h"
+#endif
+
+#define KSTK_EIP(tsk) KSTK_REG(tsk, HOST_IP)
+#define KSTK_ESP(tsk) KSTK_REG(tsk, HOST_IP)
+#define KSTK_EBP(tsk) KSTK_REG(tsk, HOST_BP)
+
+#define ARCH_IS_STACKGROW(address) \
+       (address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(&current->thread.regs.regs))
+
+#include <asm/processor-generic.h>
+
+#endif
diff --git a/arch/um/sys-i386/asm/processor.h b/arch/x86/um/asm/processor_32.h
similarity index 77%
rename from arch/um/sys-i386/asm/processor.h
rename to arch/x86/um/asm/processor_32.h
index 82a9061..018f732 100644
--- a/arch/um/sys-i386/asm/processor.h
+++ b/arch/x86/um/asm/processor_32.h
@@ -6,15 +6,12 @@
 #ifndef __UM_PROCESSOR_I386_H
 #define __UM_PROCESSOR_I386_H
 
-#include "linux/string.h"
-#include <sysdep/host_ldt.h>
-#include "asm/segment.h"
+#include <linux/string.h>
+#include <asm/segment.h>
+#include <asm/ldt.h>
 
 extern int host_has_cmov;
 
-/* include faultinfo structure */
-#include "sysdep/faultinfo.h"
-
 struct uml_tls_struct {
 	struct user_desc tls;
 	unsigned flushed:1;
@@ -66,13 +63,4 @@
 #define current_text_addr() \
 	({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
 
-#define ARCH_IS_STACKGROW(address) \
-       (address + 32 >= UPT_SP(&current->thread.regs.regs))
-
-#define KSTK_EIP(tsk) KSTK_REG(tsk, EIP)
-#define KSTK_ESP(tsk) KSTK_REG(tsk, UESP)
-#define KSTK_EBP(tsk) KSTK_REG(tsk, EBP)
-
-#include "asm/processor-generic.h"
-
 #endif
diff --git a/arch/um/sys-x86_64/asm/processor.h b/arch/x86/um/asm/processor_64.h
similarity index 76%
rename from arch/um/sys-x86_64/asm/processor.h
rename to arch/x86/um/asm/processor_64.h
index 875a26a..61de92d 100644
--- a/arch/um/sys-x86_64/asm/processor.h
+++ b/arch/x86/um/asm/processor_64.h
@@ -7,9 +7,6 @@
 #ifndef __UM_PROCESSOR_X86_64_H
 #define __UM_PROCESSOR_X86_64_H
 
-/* include faultinfo structure */
-#include "sysdep/faultinfo.h"
-
 struct arch_thread {
         unsigned long debugregs[8];
         int debugregs_seq;
@@ -45,12 +42,4 @@
 #define current_text_addr() \
 	({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; })
 
-#define ARCH_IS_STACKGROW(address) \
-        (address + 128 >= UPT_SP(&current->thread.regs.regs))
-
-#define KSTK_EIP(tsk) KSTK_REG(tsk, RIP)
-#define KSTK_ESP(tsk) KSTK_REG(tsk, RSP)
-
-#include "asm/processor-generic.h"
-
 #endif
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
new file mode 100644
index 0000000..c8aca8c
--- /dev/null
+++ b/arch/x86/um/asm/ptrace.h
@@ -0,0 +1,5 @@
+#ifdef CONFIG_X86_32
+# include "ptrace_32.h"
+#else
+# include "ptrace_64.h"
+#endif
diff --git a/arch/um/sys-i386/asm/ptrace.h b/arch/x86/um/asm/ptrace_32.h
similarity index 100%
rename from arch/um/sys-i386/asm/ptrace.h
rename to arch/x86/um/asm/ptrace_32.h
diff --git a/arch/um/sys-x86_64/asm/ptrace.h b/arch/x86/um/asm/ptrace_64.h
similarity index 97%
rename from arch/um/sys-x86_64/asm/ptrace.h
rename to arch/x86/um/asm/ptrace_64.h
index 83d8c47..706a0d8 100644
--- a/arch/um/sys-x86_64/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace_64.h
@@ -40,7 +40,7 @@
 
 #define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs)
 #define PT_REGS_RIP(r) UPT_IP(&(r)->regs)
-#define PT_REGS_RSP(r) UPT_SP(&(r)->regs)
+#define PT_REGS_SP(r) UPT_SP(&(r)->regs)
 
 #define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
 
diff --git a/arch/um/include/asm/required-features.h b/arch/x86/um/asm/required-features.h
similarity index 100%
rename from arch/um/include/asm/required-features.h
rename to arch/x86/um/asm/required-features.h
diff --git a/arch/um/include/asm/segment.h b/arch/x86/um/asm/segment.h
similarity index 100%
rename from arch/um/include/asm/segment.h
rename to arch/x86/um/asm/segment.h
diff --git a/arch/um/sys-i386/shared/sysdep/system.h b/arch/x86/um/asm/system.h
similarity index 96%
rename from arch/um/sys-i386/shared/sysdep/system.h
rename to arch/x86/um/asm/system.h
index d1b93c4..a459fd9 100644
--- a/arch/um/sys-i386/shared/sysdep/system.h
+++ b/arch/x86/um/asm/system.h
@@ -129,4 +129,7 @@
 	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
+extern void *_switch_to(void *prev, void *next, void *last);
+#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
+
 #endif
diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h
new file mode 100644
index 0000000..7c297e9
--- /dev/null
+++ b/arch/x86/um/asm/vm-flags.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Copyright 2003 PathScale, Inc.
+ * Licensed under the GPL
+ */
+
+#ifndef __VM_FLAGS_X86_H
+#define __VM_FLAGS_X86_H
+
+#ifdef CONFIG_X86_32
+
+#define VM_DATA_DEFAULT_FLAGS \
+	(VM_READ | VM_WRITE | \
+	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
+		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#else
+
+#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
+	VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \
+	VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#endif
+#endif
diff --git a/arch/um/sys-x86_64/bug.c b/arch/x86/um/bug.c
similarity index 100%
rename from arch/um/sys-x86_64/bug.c
rename to arch/x86/um/bug.c
diff --git a/arch/um/sys-i386/bugs.c b/arch/x86/um/bugs_32.c
similarity index 93%
rename from arch/um/sys-i386/bugs.c
rename to arch/x86/um/bugs_32.c
index 2c6d0d7..a1fba5f 100644
--- a/arch/um/sys-i386/bugs.c
+++ b/arch/x86/um/bugs_32.c
@@ -4,17 +4,17 @@
  */
 
 #include <signal.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "longjmp.h"
-#include "task.h"
-#include "user.h"
 #include "sysdep/ptrace.h"
+#include <generated/asm-offsets.h>
 
 /* Set during early boot */
 static int host_has_cmov = 1;
 static jmp_buf cmov_test_return;
 
+#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
+
 static void cmov_sigill_test_handler(int sig)
 {
 	host_has_cmov = 0;
diff --git a/arch/um/sys-x86_64/bugs.c b/arch/x86/um/bugs_64.c
similarity index 100%
rename from arch/um/sys-x86_64/bugs.c
rename to arch/x86/um/bugs_64.c
diff --git a/arch/um/sys-i386/checksum.S b/arch/x86/um/checksum_32.S
similarity index 100%
rename from arch/um/sys-i386/checksum.S
rename to arch/x86/um/checksum_32.S
diff --git a/arch/um/sys-i386/delay.c b/arch/x86/um/delay.c
similarity index 100%
rename from arch/um/sys-i386/delay.c
rename to arch/x86/um/delay.c
diff --git a/arch/um/sys-i386/elfcore.c b/arch/x86/um/elfcore.c
similarity index 100%
rename from arch/um/sys-i386/elfcore.c
rename to arch/x86/um/elfcore.c
diff --git a/arch/um/sys-i386/fault.c b/arch/x86/um/fault.c
similarity index 100%
rename from arch/um/sys-i386/fault.c
rename to arch/x86/um/fault.c
diff --git a/arch/um/sys-x86_64/ksyms.c b/arch/x86/um/ksyms.c
similarity index 90%
rename from arch/um/sys-x86_64/ksyms.c
rename to arch/x86/um/ksyms.c
index 1db2fce..2e8f43e 100644
--- a/arch/um/sys-x86_64/ksyms.c
+++ b/arch/x86/um/ksyms.c
@@ -2,10 +2,12 @@
 #include <asm/string.h>
 #include <asm/checksum.h>
 
+#ifndef CONFIG_X86_32
 /*XXX: we need them because they would be exported by x86_64 */
 #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
 EXPORT_SYMBOL(memcpy);
 #else
 EXPORT_SYMBOL(__memcpy);
 #endif
+#endif
 EXPORT_SYMBOL(csum_partial);
diff --git a/arch/um/sys-i386/ldt.c b/arch/x86/um/ldt.c
similarity index 92%
rename from arch/um/sys-i386/ldt.c
rename to arch/x86/um/ldt.c
index 3f2bf20..26b0e39 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -137,7 +137,7 @@
 {
 	int i, err = 0;
 	unsigned long size;
-	uml_ldt_t * ldt = &current->mm->context.ldt;
+	uml_ldt_t *ldt = &current->mm->context.arch.ldt;
 
 	if (!ldt->entry_count)
 		goto out;
@@ -205,7 +205,7 @@
 
 static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
 {
-	uml_ldt_t * ldt = &current->mm->context.ldt;
+	uml_ldt_t *ldt = &current->mm->context.arch.ldt;
 	struct mm_id * mm_idp = &current->mm->context.id;
 	int i, err;
 	struct user_desc ldt_info;
@@ -397,7 +397,7 @@
 
 
 	if (!ptrace_ldt)
-		mutex_init(&new_mm->ldt.lock);
+		mutex_init(&new_mm->arch.ldt.lock);
 
 	if (!from_mm) {
 		memset(&desc, 0, sizeof(desc));
@@ -429,7 +429,7 @@
 					break;
 			}
 		}
-		new_mm->ldt.entry_count = 0;
+		new_mm->arch.ldt.entry_count = 0;
 
 		goto out;
 	}
@@ -457,26 +457,26 @@
 		 * i.e., we have to use the stub for modify_ldt, which
 		 * can't handle the big read buffer of up to 64kB.
 		 */
-		mutex_lock(&from_mm->ldt.lock);
-		if (from_mm->ldt.entry_count <= LDT_DIRECT_ENTRIES)
-			memcpy(new_mm->ldt.u.entries, from_mm->ldt.u.entries,
-			       sizeof(new_mm->ldt.u.entries));
+		mutex_lock(&from_mm->arch.ldt.lock);
+		if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES)
+			memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
+			       sizeof(new_mm->arch.ldt.u.entries));
 		else {
-			i = from_mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE;
+			i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
 			while (i-->0) {
 				page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
 				if (!page) {
 					err = -ENOMEM;
 					break;
 				}
-				new_mm->ldt.u.pages[i] =
+				new_mm->arch.ldt.u.pages[i] =
 					(struct ldt_entry *) page;
-				memcpy(new_mm->ldt.u.pages[i],
-				       from_mm->ldt.u.pages[i], PAGE_SIZE);
+				memcpy(new_mm->arch.ldt.u.pages[i],
+				       from_mm->arch.ldt.u.pages[i], PAGE_SIZE);
 			}
 		}
-		new_mm->ldt.entry_count = from_mm->ldt.entry_count;
-		mutex_unlock(&from_mm->ldt.lock);
+		new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
+		mutex_unlock(&from_mm->arch.ldt.lock);
 	}
 
     out:
@@ -488,12 +488,12 @@
 {
 	int i;
 
-	if (!ptrace_ldt && mm->ldt.entry_count > LDT_DIRECT_ENTRIES) {
-		i = mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE;
+	if (!ptrace_ldt && mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) {
+		i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
 		while (i-- > 0)
-			free_page((long) mm->ldt.u.pages[i]);
+			free_page((long) mm->arch.ldt.u.pages[i]);
 	}
-	mm->ldt.entry_count = 0;
+	mm->arch.ldt.entry_count = 0;
 }
 
 int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
diff --git a/arch/um/sys-i386/mem.c b/arch/x86/um/mem_32.c
similarity index 100%
rename from arch/um/sys-i386/mem.c
rename to arch/x86/um/mem_32.c
diff --git a/arch/um/sys-x86_64/mem.c b/arch/x86/um/mem_64.c
similarity index 100%
rename from arch/um/sys-x86_64/mem.c
rename to arch/x86/um/mem_64.c
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/x86/um/os-Linux/Makefile
similarity index 61%
rename from arch/um/os-Linux/sys-i386/Makefile
rename to arch/x86/um/os-Linux/Makefile
index b4bc6ac..253bfb8 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/x86/um/os-Linux/Makefile
@@ -3,7 +3,10 @@
 # Licensed under the GPL
 #
 
-obj-y = registers.o signal.o task_size.o tls.o
+obj-y = registers.o task_size.o mcontext.o
+
+obj-$(CONFIG_X86_32) += tls.o
+obj-$(CONFIG_64BIT) += prctl.o
 
 USER_OBJS := $(obj-y)
 
diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c
new file mode 100644
index 0000000..1d33d72
--- /dev/null
+++ b/arch/x86/um/os-Linux/mcontext.c
@@ -0,0 +1,31 @@
+#include <sys/ucontext.h>
+#define __FRAME_OFFSETS
+#include <asm/ptrace.h>
+#include <sysdep/ptrace.h>
+
+void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
+{
+#ifdef __i386__
+#define COPY2(X,Y) regs->gp[X] = mc->gregs[REG_##Y]
+#define COPY(X) regs->gp[X] = mc->gregs[REG_##X]
+#define COPY_SEG(X) regs->gp[X] = mc->gregs[REG_##X] & 0xffff;
+#define COPY_SEG_CPL3(X) regs->gp[X] = (mc->gregs[REG_##X] & 0xffff) | 3;
+	COPY_SEG(GS); COPY_SEG(FS); COPY_SEG(ES); COPY_SEG(DS);
+	COPY(EDI); COPY(ESI); COPY(EBP);
+	COPY2(UESP, ESP); /* sic */
+	COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
+	COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
+#else
+#define COPY2(X,Y) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##Y]
+#define COPY(X) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##X]
+	COPY(R8); COPY(R9); COPY(R10); COPY(R11);
+	COPY(R12); COPY(R13); COPY(R14); COPY(R15);
+	COPY(RDI); COPY(RSI); COPY(RBP); COPY(RBX);
+	COPY(RDX); COPY(RAX); COPY(RCX); COPY(RSP);
+	COPY(RIP);
+	COPY2(EFLAGS, EFL);
+	COPY2(CS, CSGSFS);
+	regs->gp[CS / sizeof(unsigned long)] &= 0xffff;
+	regs->gp[CS / sizeof(unsigned long)] |= 3;
+#endif
+}
diff --git a/arch/um/os-Linux/sys-x86_64/prctl.c b/arch/x86/um/os-Linux/prctl.c
similarity index 100%
rename from arch/um/os-Linux/sys-x86_64/prctl.c
rename to arch/x86/um/os-Linux/prctl.c
diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/x86/um/os-Linux/registers.c
similarity index 79%
rename from arch/um/os-Linux/sys-i386/registers.c
rename to arch/x86/um/os-Linux/registers.c
index 229f7a5..0cdbb86 100644
--- a/arch/um/os-Linux/sys-i386/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -6,10 +6,10 @@
 
 #include <errno.h>
 #include <sys/ptrace.h>
+#ifdef __i386__
 #include <sys/user.h>
-#include "kern_constants.h"
+#endif
 #include "longjmp.h"
-#include "user.h"
 #include "sysdep/ptrace_user.h"
 
 int save_fp_registers(int pid, unsigned long *fp_regs)
@@ -26,6 +26,8 @@
 	return 0;
 }
 
+#ifdef __i386__
+int have_fpx_regs = 1;
 int save_fpx_registers(int pid, unsigned long *fp_regs)
 {
 	if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0)
@@ -40,24 +42,6 @@
 	return 0;
 }
 
-unsigned long get_thread_reg(int reg, jmp_buf *buf)
-{
-	switch (reg) {
-	case EIP:
-		return buf[0]->__eip;
-	case UESP:
-		return buf[0]->__esp;
-	case EBP:
-		return buf[0]->__ebp;
-	default:
-		printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
-		       reg);
-		return 0;
-	}
-}
-
-int have_fpx_regs = 1;
-
 int get_fp_registers(int pid, unsigned long *regs)
 {
 	if (have_fpx_regs)
@@ -89,3 +73,41 @@
 
 	have_fpx_regs = 0;
 }
+#else
+
+int get_fp_registers(int pid, unsigned long *regs)
+{
+	return save_fp_registers(pid, regs);
+}
+
+int put_fp_registers(int pid, unsigned long *regs)
+{
+	return restore_fp_registers(pid, regs);
+}
+
+#endif
+
+unsigned long get_thread_reg(int reg, jmp_buf *buf)
+{
+	switch (reg) {
+#ifdef __i386__
+	case HOST_IP:
+		return buf[0]->__eip;
+	case HOST_SP:
+		return buf[0]->__esp;
+	case HOST_BP:
+		return buf[0]->__ebp;
+#else
+	case HOST_IP:
+		return buf[0]->__rip;
+	case HOST_SP:
+		return buf[0]->__rsp;
+	case HOST_BP:
+		return buf[0]->__rbp;
+#endif
+	default:
+		printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
+		       reg);
+		return 0;
+	}
+}
diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/x86/um/os-Linux/task_size.c
similarity index 95%
rename from arch/um/os-Linux/sys-i386/task_size.c
rename to arch/x86/um/os-Linux/task_size.c
index be04c1e..efb16c5 100644
--- a/arch/um/os-Linux/sys-i386/task_size.c
+++ b/arch/x86/um/os-Linux/task_size.c
@@ -3,7 +3,8 @@
 #include <signal.h>
 #include <sys/mman.h>
 #include "longjmp.h"
-#include "kern_constants.h"
+
+#ifdef __i386__
 
 static jmp_buf buf;
 
@@ -137,3 +138,13 @@
 
 	return top;
 }
+
+#else
+
+unsigned long os_get_top_address(void)
+{
+	/* The old value of CONFIG_TOP_ADDR */
+	return 0x7fc0000000;
+}
+
+#endif
diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c
new file mode 100644
index 0000000..82276b6
--- /dev/null
+++ b/arch/x86/um/os-Linux/tls.c
@@ -0,0 +1,67 @@
+#include <errno.h>
+#include <linux/unistd.h>
+
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "sysdep/tls.h"
+
+#ifndef PTRACE_GET_THREAD_AREA
+#define PTRACE_GET_THREAD_AREA 25
+#endif
+
+#ifndef PTRACE_SET_THREAD_AREA
+#define PTRACE_SET_THREAD_AREA 26
+#endif
+
+/* Checks whether host supports TLS, and sets *tls_min according to the value
+ * valid on the host.
+ * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */
+void check_host_supports_tls(int *supports_tls, int *tls_min)
+{
+	/* Values for x86 and x86_64.*/
+	int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(val); i++) {
+		user_desc_t info;
+		info.entry_number = val[i];
+
+		if (syscall(__NR_get_thread_area, &info) == 0) {
+			*tls_min = val[i];
+			*supports_tls = 1;
+			return;
+		} else {
+			if (errno == EINVAL)
+				continue;
+			else if (errno == ENOSYS)
+				*supports_tls = 0;
+				return;
+		}
+	}
+
+	*supports_tls = 0;
+}
+
+int os_set_thread_area(user_desc_t *info, int pid)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_SET_THREAD_AREA, pid, info->entry_number,
+		     (unsigned long) info);
+	if (ret < 0)
+		ret = -errno;
+	return ret;
+}
+
+int os_get_thread_area(user_desc_t *info, int pid)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_GET_THREAD_AREA, pid, info->entry_number,
+		     (unsigned long) info);
+	if (ret < 0)
+		ret = -errno;
+	return ret;
+}
diff --git a/arch/um/sys-i386/ptrace.c b/arch/x86/um/ptrace_32.c
similarity index 82%
rename from arch/um/sys-i386/ptrace.c
rename to arch/x86/um/ptrace_32.c
index 3375c27..3b949daa 100644
--- a/arch/um/sys-i386/ptrace.c
+++ b/arch/x86/um/ptrace_32.c
@@ -50,20 +50,47 @@
 /* 1 = access 0 = no access */
 #define FLAG_MASK 0x00044dd5
 
+static const int reg_offsets[] = {
+	[EBX] = HOST_BX,
+	[ECX] = HOST_CX,
+	[EDX] = HOST_DX,
+	[ESI] = HOST_SI,
+	[EDI] = HOST_DI,
+	[EBP] = HOST_BP,
+	[EAX] = HOST_AX,
+	[DS] = HOST_DS,
+	[ES] = HOST_ES,
+	[FS] = HOST_FS,
+	[GS] = HOST_GS,
+	[EIP] = HOST_IP,
+	[CS] = HOST_CS,
+	[EFL] = HOST_EFLAGS,
+	[UESP] = HOST_SP,
+	[SS] = HOST_SS,
+};
+
 int putreg(struct task_struct *child, int regno, unsigned long value)
 {
 	regno >>= 2;
 	switch (regno) {
+	case EBX:
+	case ECX:
+	case EDX:
+	case ESI:
+	case EDI:
+	case EBP:
+	case EAX:
+	case EIP:
+	case UESP:
+		break;
 	case FS:
 		if (value && (value & 3) != 3)
 			return -EIO;
-		PT_REGS_FS(&child->thread.regs) = value;
-		return 0;
+		break;
 	case GS:
 		if (value && (value & 3) != 3)
 			return -EIO;
-		PT_REGS_GS(&child->thread.regs) = value;
-		return 0;
+		break;
 	case DS:
 	case ES:
 		if (value && (value & 3) != 3)
@@ -78,10 +105,15 @@
 		break;
 	case EFL:
 		value &= FLAG_MASK;
-		value |= PT_REGS_EFLAGS(&child->thread.regs);
-		break;
+		child->thread.regs.regs.gp[HOST_EFLAGS] |= value;
+		return 0;
+	case ORIG_EAX:
+		child->thread.regs.regs.syscall = value;
+		return 0;
+	default :
+		panic("Bad register in putreg() : %d\n", regno);
 	}
-	PT_REGS_SET(&child->thread.regs, regno, value);
+	child->thread.regs.regs.gp[reg_offsets[regno]] = value;
 	return 0;
 }
 
@@ -106,22 +138,35 @@
 
 unsigned long getreg(struct task_struct *child, int regno)
 {
-	unsigned long retval = ~0UL;
+	unsigned long mask = ~0UL;
 
 	regno >>= 2;
 	switch (regno) {
+	case ORIG_EAX:
+		return child->thread.regs.regs.syscall;
 	case FS:
 	case GS:
 	case DS:
 	case ES:
 	case SS:
 	case CS:
-		retval = 0xffff;
-		/* fall through */
+		mask = 0xffff;
+		break;
+	case EIP:
+	case UESP:
+	case EAX:
+	case EBX:
+	case ECX:
+	case EDX:
+	case ESI:
+	case EDI:
+	case EBP:
+	case EFL:
+		break;
 	default:
-		retval &= PT_REG(&child->thread.regs, regno);
+		panic("Bad register in getreg() : %d\n", regno);
 	}
-	return retval;
+	return mask & child->thread.regs.regs.gp[reg_offsets[regno]];
 }
 
 /* read the word at location addr in the USER area. */
diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/x86/um/ptrace_64.c
similarity index 72%
rename from arch/um/sys-x86_64/ptrace.c
rename to arch/x86/um/ptrace_64.c
index 4005506..3b52bf0 100644
--- a/arch/um/sys-x86_64/ptrace.c
+++ b/arch/x86/um/ptrace_64.c
@@ -18,10 +18,39 @@
  */
 #define FLAG_MASK 0x44dd5UL
 
+static const int reg_offsets[] =
+{
+	[R8 >> 3] = HOST_R8,
+	[R9 >> 3] = HOST_R9,
+	[R10 >> 3] = HOST_R10,
+	[R11 >> 3] = HOST_R11,
+	[R12 >> 3] = HOST_R12,
+	[R13 >> 3] = HOST_R13,
+	[R14 >> 3] = HOST_R14,
+	[R15 >> 3] = HOST_R15,
+	[RIP >> 3] = HOST_IP,
+	[RSP >> 3] = HOST_SP,
+	[RAX >> 3] = HOST_AX,
+	[RBX >> 3] = HOST_BX,
+	[RCX >> 3] = HOST_CX,
+	[RDX >> 3] = HOST_DX,
+	[RSI >> 3] = HOST_SI,
+	[RDI >> 3] = HOST_DI,
+	[RBP >> 3] = HOST_BP,
+	[CS >> 3] = HOST_CS,
+	[SS >> 3] = HOST_SS,
+	[FS_BASE >> 3] = HOST_FS_BASE,
+	[GS_BASE >> 3] = HOST_GS_BASE,
+	[DS >> 3] = HOST_DS,
+	[ES >> 3] = HOST_ES,
+	[FS >> 3] = HOST_FS,
+	[GS >> 3] = HOST_GS,
+	[EFLAGS >> 3] = HOST_EFLAGS,
+	[ORIG_RAX >> 3] = HOST_ORIG_AX,
+};
+
 int putreg(struct task_struct *child, int regno, unsigned long value)
 {
-	unsigned long tmp;
-
 #ifdef TIF_IA32
 	/*
 	 * Some code in the 64bit emulation may not be 64bit clean.
@@ -31,6 +60,26 @@
 		value &= 0xffffffff;
 #endif
 	switch (regno) {
+	case R8:
+	case R9:
+	case R10:
+	case R11:
+	case R12:
+	case R13:
+	case R14:
+	case R15:
+	case RIP:
+	case RSP:
+	case RAX:
+	case RBX:
+	case RCX:
+	case RDX:
+	case RSI:
+	case RDI:
+	case RBP:
+	case ORIG_RAX:
+		break;
+
 	case FS:
 	case GS:
 	case DS:
@@ -50,12 +99,14 @@
 
 	case EFLAGS:
 		value &= FLAG_MASK;
-		tmp = PT_REGS_EFLAGS(&child->thread.regs) & ~FLAG_MASK;
-		value |= tmp;
-		break;
+		child->thread.regs.regs.gp[HOST_EFLAGS] |= value;
+		return 0;
+
+	default:
+		panic("Bad register in putreg(): %d\n", regno);
 	}
 
-	PT_REGS_SET(&child->thread.regs, regno, value);
+	child->thread.regs.regs.gp[reg_offsets[regno >> 3]] = value;
 	return 0;
 }
 
@@ -80,24 +131,46 @@
 
 unsigned long getreg(struct task_struct *child, int regno)
 {
-	unsigned long retval = ~0UL;
+	unsigned long mask = ~0UL;
+#ifdef TIF_IA32
+	if (test_tsk_thread_flag(child, TIF_IA32))
+		mask = 0xffffffff;
+#endif
 	switch (regno) {
+	case R8:
+	case R9:
+	case R10:
+	case R11:
+	case R12:
+	case R13:
+	case R14:
+	case R15:
+	case RIP:
+	case RSP:
+	case RAX:
+	case RBX:
+	case RCX:
+	case RDX:
+	case RSI:
+	case RDI:
+	case RBP:
+	case ORIG_RAX:
+	case EFLAGS:
+	case FS_BASE:
+	case GS_BASE:
+		break;
 	case FS:
 	case GS:
 	case DS:
 	case ES:
 	case SS:
 	case CS:
-		retval = 0xffff;
-		/* fall through */
+		mask = 0xffff;
+		break;
 	default:
-		retval &= PT_REG(&child->thread.regs, regno);
-#ifdef TIF_IA32
-		if (test_tsk_thread_flag(child, TIF_IA32))
-			retval &= 0xffffffff;
-#endif
+		panic("Bad register in getreg: %d\n", regno);
 	}
-	return retval;
+	return mask & child->thread.regs.regs.gp[reg_offsets[regno >> 3]];
 }
 
 int peek_user(struct task_struct *child, long addr, long data)
diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/x86/um/ptrace_user.c
similarity index 93%
rename from arch/um/sys-i386/ptrace_user.c
rename to arch/x86/um/ptrace_user.c
index 0b10c3e..3960ca1 100644
--- a/arch/um/sys-i386/ptrace_user.c
+++ b/arch/x86/um/ptrace_user.c
@@ -4,7 +4,7 @@
  */
 
 #include <errno.h>
-#include <sys/ptrace.h>
+#include "ptrace_user.h"
 
 int ptrace_getregs(long pid, unsigned long *regs_out)
 {
diff --git a/arch/um/sys-i386/setjmp.S b/arch/x86/um/setjmp_32.S
similarity index 100%
rename from arch/um/sys-i386/setjmp.S
rename to arch/x86/um/setjmp_32.S
diff --git a/arch/um/sys-x86_64/setjmp.S b/arch/x86/um/setjmp_64.S
similarity index 100%
rename from arch/um/sys-x86_64/setjmp.S
rename to arch/x86/um/setjmp_64.S
diff --git a/arch/x86/um/shared/sysdep/archsetjmp.h b/arch/x86/um/shared/sysdep/archsetjmp.h
new file mode 100644
index 0000000..ff7766d
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/archsetjmp.h
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "archsetjmp_32.h"
+#else
+#include "archsetjmp_64.h"
+#endif
diff --git a/arch/um/sys-i386/shared/sysdep/archsetjmp.h b/arch/x86/um/shared/sysdep/archsetjmp_32.h
similarity index 100%
rename from arch/um/sys-i386/shared/sysdep/archsetjmp.h
rename to arch/x86/um/shared/sysdep/archsetjmp_32.h
diff --git a/arch/um/sys-x86_64/shared/sysdep/archsetjmp.h b/arch/x86/um/shared/sysdep/archsetjmp_64.h
similarity index 100%
rename from arch/um/sys-x86_64/shared/sysdep/archsetjmp.h
rename to arch/x86/um/shared/sysdep/archsetjmp_64.h
diff --git a/arch/x86/um/shared/sysdep/faultinfo.h b/arch/x86/um/shared/sysdep/faultinfo.h
new file mode 100644
index 0000000..862ecb1
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/faultinfo.h
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "faultinfo_32.h"
+#else
+#include "faultinfo_64.h"
+#endif
diff --git a/arch/um/sys-i386/shared/sysdep/faultinfo.h b/arch/x86/um/shared/sysdep/faultinfo_32.h
similarity index 79%
rename from arch/um/sys-i386/shared/sysdep/faultinfo.h
rename to arch/x86/um/shared/sysdep/faultinfo_32.h
index db437cc..a26086b 100644
--- a/arch/um/sys-i386/shared/sysdep/faultinfo.h
+++ b/arch/x86/um/shared/sysdep/faultinfo_32.h
@@ -24,6 +24,12 @@
 #define FAULT_WRITE(fi) ((fi).error_code & 2)
 #define FAULT_ADDRESS(fi) ((fi).cr2)
 
+/* This is Page Fault */
+#define SEGV_IS_FIXABLE(fi)	((fi)->trap_no == 14)
+
+/* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */
+#define SEGV_MAYBE_FIXABLE(fi)	((fi)->trap_no == 0 && ptrace_faultinfo)
+
 #define PTRACE_FULL_FAULTINFO 0
 
 #endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/faultinfo.h b/arch/x86/um/shared/sysdep/faultinfo_64.h
similarity index 83%
rename from arch/um/sys-x86_64/shared/sysdep/faultinfo.h
rename to arch/x86/um/shared/sysdep/faultinfo_64.h
index cb917b0..f811cbe 100644
--- a/arch/um/sys-x86_64/shared/sysdep/faultinfo.h
+++ b/arch/x86/um/shared/sysdep/faultinfo_64.h
@@ -24,6 +24,12 @@
 #define FAULT_WRITE(fi) ((fi).error_code & 2)
 #define FAULT_ADDRESS(fi) ((fi).cr2)
 
+/* This is Page Fault */
+#define SEGV_IS_FIXABLE(fi)	((fi)->trap_no == 14)
+
+/* No broken SKAS API, which doesn't pass trap_no, here. */
+#define SEGV_MAYBE_FIXABLE(fi)	0
+
 #define PTRACE_FULL_FAULTINFO 1
 
 #endif
diff --git a/arch/um/sys-i386/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
similarity index 100%
rename from arch/um/sys-i386/shared/sysdep/kernel-offsets.h
rename to arch/x86/um/shared/sysdep/kernel-offsets.h
diff --git a/arch/x86/um/shared/sysdep/mcontext.h b/arch/x86/um/shared/sysdep/mcontext.h
new file mode 100644
index 0000000..b724c54
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/mcontext.h
@@ -0,0 +1,31 @@
+/* 
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYS_SIGCONTEXT_X86_H
+#define __SYS_SIGCONTEXT_X86_H
+
+extern void get_regs_from_mc(struct uml_pt_regs *, mcontext_t *);
+
+#ifdef __i386__
+
+#define GET_FAULTINFO_FROM_MC(fi, mc) \
+	{ \
+		(fi).cr2 = (mc)->cr2; \
+		(fi).error_code = (mc)->gregs[REG_ERR]; \
+		(fi).trap_no = (mc)->gregs[REG_TRAPNO]; \
+	}
+
+#else
+
+#define GET_FAULTINFO_FROM_MC(fi, mc) \
+	{ \
+		(fi).cr2 = (mc)->gregs[REG_CR2]; \
+		(fi).error_code = (mc)->gregs[REG_ERR]; \
+		(fi).trap_no = (mc)->gregs[REG_TRAPNO]; \
+	}
+
+#endif
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
new file mode 100644
index 0000000..711b162
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "ptrace_32.h"
+#else
+#include "ptrace_64.h"
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h
new file mode 100644
index 0000000..befd1df
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/ptrace_32.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_I386_PTRACE_H
+#define __SYSDEP_I386_PTRACE_H
+
+#include <generated/user_constants.h>
+#include "sysdep/faultinfo.h"
+
+#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long))
+#define MAX_REG_OFFSET (UM_FRAME_SIZE)
+
+static inline void update_debugregs(int seq) {}
+
+/* syscall emulation path in ptrace */
+
+#ifndef PTRACE_SYSEMU
+#define PTRACE_SYSEMU 31
+#endif
+
+void set_using_sysemu(int value);
+int get_using_sysemu(void);
+extern int sysemu_supported;
+
+#define REGS_IP(r) ((r)[HOST_IP])
+#define REGS_SP(r) ((r)[HOST_SP])
+#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
+#define REGS_EAX(r) ((r)[HOST_AX])
+#define REGS_EBX(r) ((r)[HOST_BX])
+#define REGS_ECX(r) ((r)[HOST_CX])
+#define REGS_EDX(r) ((r)[HOST_DX])
+#define REGS_ESI(r) ((r)[HOST_SI])
+#define REGS_EDI(r) ((r)[HOST_DI])
+#define REGS_EBP(r) ((r)[HOST_BP])
+#define REGS_CS(r) ((r)[HOST_CS])
+#define REGS_SS(r) ((r)[HOST_SS])
+#define REGS_DS(r) ((r)[HOST_DS])
+#define REGS_ES(r) ((r)[HOST_ES])
+#define REGS_FS(r) ((r)[HOST_FS])
+#define REGS_GS(r) ((r)[HOST_GS])
+
+#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res)
+
+#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
+#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
+
+#ifndef PTRACE_SYSEMU_SINGLESTEP
+#define PTRACE_SYSEMU_SINGLESTEP 32
+#endif
+
+struct uml_pt_regs {
+	unsigned long gp[MAX_REG_NR];
+	unsigned long fp[HOST_FPX_SIZE];
+	struct faultinfo faultinfo;
+	long syscall;
+	int is_user;
+};
+
+#define EMPTY_UML_PT_REGS { }
+
+#define UPT_IP(r) REGS_IP((r)->gp)
+#define UPT_SP(r) REGS_SP((r)->gp)
+#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
+#define UPT_EAX(r) REGS_EAX((r)->gp)
+#define UPT_EBX(r) REGS_EBX((r)->gp)
+#define UPT_ECX(r) REGS_ECX((r)->gp)
+#define UPT_EDX(r) REGS_EDX((r)->gp)
+#define UPT_ESI(r) REGS_ESI((r)->gp)
+#define UPT_EDI(r) REGS_EDI((r)->gp)
+#define UPT_EBP(r) REGS_EBP((r)->gp)
+#define UPT_ORIG_EAX(r) ((r)->syscall)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_SS(r) REGS_SS((r)->gp)
+#define UPT_DS(r) REGS_DS((r)->gp)
+#define UPT_ES(r) REGS_ES((r)->gp)
+#define UPT_FS(r) REGS_FS((r)->gp)
+#define UPT_GS(r) REGS_GS((r)->gp)
+
+#define UPT_SYSCALL_ARG1(r) UPT_EBX(r)
+#define UPT_SYSCALL_ARG2(r) UPT_ECX(r)
+#define UPT_SYSCALL_ARG3(r) UPT_EDX(r)
+#define UPT_SYSCALL_ARG4(r) UPT_ESI(r)
+#define UPT_SYSCALL_ARG5(r) UPT_EDI(r)
+#define UPT_SYSCALL_ARG6(r) UPT_EBP(r)
+
+extern int user_context(unsigned long sp);
+
+#define UPT_IS_USER(r) ((r)->is_user)
+
+struct syscall_args {
+	unsigned long args[6];
+};
+
+#define SYSCALL_ARGS(r) ((struct syscall_args) \
+			 { .args = { UPT_SYSCALL_ARG1(r),	\
+				     UPT_SYSCALL_ARG2(r),	\
+				     UPT_SYSCALL_ARG3(r),	\
+				     UPT_SYSCALL_ARG4(r),	\
+				     UPT_SYSCALL_ARG5(r),	\
+				     UPT_SYSCALL_ARG6(r) } } )
+
+#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
+
+#define UPT_ORIG_SYSCALL(r) UPT_EAX(r)
+#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r)
+#define UPT_SYSCALL_RET(r) UPT_EAX(r)
+
+#define UPT_FAULTINFO(r) (&(r)->faultinfo)
+
+extern void arch_init_registers(int pid);
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h
new file mode 100644
index 0000000..031edc5
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/ptrace_64.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ *
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_X86_64_PTRACE_H
+#define __SYSDEP_X86_64_PTRACE_H
+
+#include <generated/user_constants.h>
+#include "sysdep/faultinfo.h"
+
+#define MAX_REG_OFFSET (UM_FRAME_SIZE)
+#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
+
+#define REGS_IP(r) ((r)[HOST_IP])
+#define REGS_SP(r) ((r)[HOST_SP])
+
+#define REGS_RBX(r) ((r)[HOST_BX])
+#define REGS_RCX(r) ((r)[HOST_CX])
+#define REGS_RDX(r) ((r)[HOST_DX])
+#define REGS_RSI(r) ((r)[HOST_SI])
+#define REGS_RDI(r) ((r)[HOST_DI])
+#define REGS_RBP(r) ((r)[HOST_BP])
+#define REGS_RAX(r) ((r)[HOST_AX])
+#define REGS_R8(r) ((r)[HOST_R8])
+#define REGS_R9(r) ((r)[HOST_R9])
+#define REGS_R10(r) ((r)[HOST_R10])
+#define REGS_R11(r) ((r)[HOST_R11])
+#define REGS_R12(r) ((r)[HOST_R12])
+#define REGS_R13(r) ((r)[HOST_R13])
+#define REGS_R14(r) ((r)[HOST_R14])
+#define REGS_R15(r) ((r)[HOST_R15])
+#define REGS_CS(r) ((r)[HOST_CS])
+#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
+#define REGS_SS(r) ((r)[HOST_SS])
+
+#define HOST_FS_BASE 21
+#define HOST_GS_BASE 22
+#define HOST_DS 23
+#define HOST_ES 24
+#define HOST_FS 25
+#define HOST_GS 26
+
+/* Also defined in asm/ptrace-x86_64.h, but not in libc headers.  So, these
+ * are already defined for kernel code, but not for userspace code.
+ */
+#ifndef FS_BASE
+/* These aren't defined in ptrace.h, but exist in struct user_regs_struct,
+ * which is what x86_64 ptrace actually uses.
+ */
+#define FS_BASE (HOST_FS_BASE * sizeof(long))
+#define GS_BASE (HOST_GS_BASE * sizeof(long))
+#define DS (HOST_DS * sizeof(long))
+#define ES (HOST_ES * sizeof(long))
+#define FS (HOST_FS * sizeof(long))
+#define GS (HOST_GS * sizeof(long))
+#endif
+
+#define REGS_FS_BASE(r) ((r)[HOST_FS_BASE])
+#define REGS_GS_BASE(r) ((r)[HOST_GS_BASE])
+#define REGS_DS(r) ((r)[HOST_DS])
+#define REGS_ES(r) ((r)[HOST_ES])
+#define REGS_FS(r) ((r)[HOST_FS])
+#define REGS_GS(r) ((r)[HOST_GS])
+
+#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_AX])
+
+#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res)
+
+#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
+#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
+
+#define REGS_FAULT_ADDR(r) ((r)->fault_addr)
+
+#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type)
+
+#define REGS_TRAP(r) ((r)->trap_type)
+
+#define REGS_ERR(r) ((r)->fault_type)
+
+struct uml_pt_regs {
+	unsigned long gp[MAX_REG_NR];
+	unsigned long fp[HOST_FP_SIZE];
+	struct faultinfo faultinfo;
+	long syscall;
+	int is_user;
+};
+
+#define EMPTY_UML_PT_REGS { }
+
+#define UPT_RBX(r) REGS_RBX((r)->gp)
+#define UPT_RCX(r) REGS_RCX((r)->gp)
+#define UPT_RDX(r) REGS_RDX((r)->gp)
+#define UPT_RSI(r) REGS_RSI((r)->gp)
+#define UPT_RDI(r) REGS_RDI((r)->gp)
+#define UPT_RBP(r) REGS_RBP((r)->gp)
+#define UPT_RAX(r) REGS_RAX((r)->gp)
+#define UPT_R8(r) REGS_R8((r)->gp)
+#define UPT_R9(r) REGS_R9((r)->gp)
+#define UPT_R10(r) REGS_R10((r)->gp)
+#define UPT_R11(r) REGS_R11((r)->gp)
+#define UPT_R12(r) REGS_R12((r)->gp)
+#define UPT_R13(r) REGS_R13((r)->gp)
+#define UPT_R14(r) REGS_R14((r)->gp)
+#define UPT_R15(r) REGS_R15((r)->gp)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp)
+#define UPT_FS(r) REGS_FS((r)->gp)
+#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp)
+#define UPT_GS(r) REGS_GS((r)->gp)
+#define UPT_DS(r) REGS_DS((r)->gp)
+#define UPT_ES(r) REGS_ES((r)->gp)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_SS(r) REGS_SS((r)->gp)
+#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp)
+
+#define UPT_IP(r) REGS_IP((r)->gp)
+#define UPT_SP(r) REGS_SP((r)->gp)
+
+#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
+#define UPT_SYSCALL_NR(r) ((r)->syscall)
+#define UPT_SYSCALL_RET(r) UPT_RAX(r)
+
+extern int user_context(unsigned long sp);
+
+#define UPT_IS_USER(r) ((r)->is_user)
+
+#define UPT_SYSCALL_ARG1(r) UPT_RDI(r)
+#define UPT_SYSCALL_ARG2(r) UPT_RSI(r)
+#define UPT_SYSCALL_ARG3(r) UPT_RDX(r)
+#define UPT_SYSCALL_ARG4(r) UPT_R10(r)
+#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
+#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
+
+struct syscall_args {
+	unsigned long args[6];
+};
+
+#define SYSCALL_ARGS(r) ((struct syscall_args) \
+			 { .args = { UPT_SYSCALL_ARG1(r),	 \
+				     UPT_SYSCALL_ARG2(r),	 \
+				     UPT_SYSCALL_ARG3(r),	 \
+				     UPT_SYSCALL_ARG4(r),	 \
+				     UPT_SYSCALL_ARG5(r),	 \
+				     UPT_SYSCALL_ARG6(r) } } )
+
+#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
+
+#define UPT_FAULTINFO(r) (&(r)->faultinfo)
+
+static inline void arch_init_registers(int pid)
+{
+}
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_user.h b/arch/x86/um/shared/sysdep/ptrace_user.h
new file mode 100644
index 0000000..16cd6b5
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/ptrace_user.h
@@ -0,0 +1,27 @@
+#include <generated/user_constants.h>
+
+#define PT_OFFSET(r) ((r) * sizeof(long))
+
+#define PT_SYSCALL_NR(regs) ((regs)[HOST_ORIG_AX])
+#define PT_SYSCALL_NR_OFFSET PT_OFFSET(HOST_ORIG_AX)
+
+#define PT_SYSCALL_RET_OFFSET PT_OFFSET(HOST_AX)
+
+#define REGS_IP_INDEX HOST_IP
+#define REGS_SP_INDEX HOST_SP
+
+#ifdef __i386__
+#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE)
+#else
+#define FP_SIZE HOST_FP_SIZE
+
+/*
+ * x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though
+ * it's defined in the kernel's include/linux/ptrace.h. Additionally, use the
+ * 2.4 name and value for 2.4 host compatibility.
+ */
+#ifndef PTRACE_OLDSETOPTIONS
+#define PTRACE_OLDSETOPTIONS 21
+#endif
+
+#endif
diff --git a/arch/um/sys-i386/shared/sysdep/skas_ptrace.h b/arch/x86/um/shared/sysdep/skas_ptrace.h
similarity index 80%
rename from arch/um/sys-i386/shared/sysdep/skas_ptrace.h
rename to arch/x86/um/shared/sysdep/skas_ptrace.h
index e27b8a7..453febe 100644
--- a/arch/um/sys-i386/shared/sysdep/skas_ptrace.h
+++ b/arch/x86/um/shared/sysdep/skas_ptrace.h
@@ -3,8 +3,8 @@
  * Licensed under the GPL
  */
 
-#ifndef __SYSDEP_I386_SKAS_PTRACE_H
-#define __SYSDEP_I386_SKAS_PTRACE_H
+#ifndef __SYSDEP_X86_SKAS_PTRACE_H
+#define __SYSDEP_X86_SKAS_PTRACE_H
 
 struct ptrace_faultinfo {
         int is_write;
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
new file mode 100644
index 0000000..bd161e3
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -0,0 +1,14 @@
+#include <asm/unistd.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include "as-layout.h"
+#include "stub-data.h"
+
+#ifdef __i386__
+#include "stub_32.h"
+#else
+#include "stub_64.h"
+#endif
+
+extern void stub_segv_handler(int, siginfo_t *, void *);
+extern void stub_clone_handler(void);
diff --git a/arch/um/sys-i386/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub_32.h
similarity index 90%
rename from arch/um/sys-i386/shared/sysdep/stub.h
rename to arch/x86/um/shared/sysdep/stub_32.h
index 977dedd..51fd256 100644
--- a/arch/um/sys-i386/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -6,15 +6,7 @@
 #ifndef __SYSDEP_STUB_H
 #define __SYSDEP_STUB_H
 
-#include <sys/mman.h>
 #include <asm/ptrace.h>
-#include <asm/unistd.h>
-#include "as-layout.h"
-#include "stub-data.h"
-#include "kern_constants.h"
-
-extern void stub_segv_handler(int sig);
-extern void stub_clone_handler(void);
 
 #define STUB_SYSCALL_RET EAX
 #define STUB_MMAP_NR __NR_mmap2
diff --git a/arch/um/sys-x86_64/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub_64.h
similarity index 90%
rename from arch/um/sys-x86_64/shared/sysdep/stub.h
rename to arch/x86/um/shared/sysdep/stub_64.h
index 3432aa2..994df93 100644
--- a/arch/um/sys-x86_64/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -6,15 +6,7 @@
 #ifndef __SYSDEP_STUB_H
 #define __SYSDEP_STUB_H
 
-#include <sys/mman.h>
-#include <asm/unistd.h>
 #include <sysdep/ptrace_user.h>
-#include "as-layout.h"
-#include "stub-data.h"
-#include "kern_constants.h"
-
-extern void stub_segv_handler(int sig);
-extern void stub_clone_handler(void);
 
 #define STUB_SYSCALL_RET PT_INDEX(RAX)
 #define STUB_MMAP_NR __NR_mmap
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
new file mode 100644
index 0000000..bd9a89b
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "syscalls_32.h"
+#else
+#include "syscalls_64.h"
+#endif
diff --git a/arch/um/sys-i386/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls_32.h
similarity index 100%
rename from arch/um/sys-i386/shared/sysdep/syscalls.h
rename to arch/x86/um/shared/sysdep/syscalls_32.h
diff --git a/arch/um/sys-x86_64/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls_64.h
similarity index 96%
rename from arch/um/sys-x86_64/shared/sysdep/syscalls.h
rename to arch/x86/um/shared/sysdep/syscalls_64.h
index 7cfb0b08..8a7d5e1 100644
--- a/arch/um/sys-x86_64/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls_64.h
@@ -9,7 +9,6 @@
 
 #include <linux/msg.h>
 #include <linux/shm.h>
-#include <kern_constants.h>
 
 typedef long syscall_handler_t(void);
 
diff --git a/arch/um/sys-i386/shared/sysdep/tls.h b/arch/x86/um/shared/sysdep/tls.h
similarity index 79%
rename from arch/um/sys-i386/shared/sysdep/tls.h
rename to arch/x86/um/shared/sysdep/tls.h
index 3455075..27cce00 100644
--- a/arch/um/sys-i386/shared/sysdep/tls.h
+++ b/arch/x86/um/shared/sysdep/tls.h
@@ -17,16 +17,23 @@
 	unsigned int  limit_in_pages:1;
 	unsigned int  seg_not_present:1;
 	unsigned int  useable:1;
+#ifdef __x86_64__
+	unsigned int  lm:1;
+#endif
 } user_desc_t;
 
 # else /* __KERNEL__ */
 
-#  include <ldt.h>
 typedef struct user_desc user_desc_t;
 
 # endif /* __KERNEL__ */
 
+extern int os_set_thread_area(user_desc_t *info, int pid);
+extern int os_get_thread_area(user_desc_t *info, int pid);
+
+#ifdef __i386__
 #define GDT_ENTRY_TLS_MIN_I386 6
 #define GDT_ENTRY_TLS_MIN_X86_64 12
+#endif
 
 #endif /* _SYSDEP_TLS_H */
diff --git a/arch/um/sys-i386/signal.c b/arch/x86/um/signal.c
similarity index 69%
rename from arch/um/sys-i386/signal.c
rename to arch/x86/um/signal.c
index 89a4662..4883b95 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/x86/um/signal.c
@@ -1,36 +1,20 @@
 /*
- * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
+
+#include <linux/personality.h>
 #include <linux/ptrace.h>
+#include <linux/kernel.h>
 #include <asm/unistd.h>
 #include <asm/uaccess.h>
 #include <asm/ucontext.h>
 #include "frame_kern.h"
 #include "skas.h"
 
-void copy_sc(struct uml_pt_regs *regs, void *from)
-{
-	struct sigcontext *sc = from;
-
-	REGS_GS(regs->gp) = sc->gs;
-	REGS_FS(regs->gp) = sc->fs;
-	REGS_ES(regs->gp) = sc->es;
-	REGS_DS(regs->gp) = sc->ds;
-	REGS_EDI(regs->gp) = sc->di;
-	REGS_ESI(regs->gp) = sc->si;
-	REGS_EBP(regs->gp) = sc->bp;
-	REGS_SP(regs->gp) = sc->sp;
-	REGS_EBX(regs->gp) = sc->bx;
-	REGS_EDX(regs->gp) = sc->dx;
-	REGS_ECX(regs->gp) = sc->cx;
-	REGS_EAX(regs->gp) = sc->ax;
-	REGS_IP(regs->gp) = sc->ip;
-	REGS_CS(regs->gp) = sc->cs;
-	REGS_EFLAGS(regs->gp) = sc->flags;
-	REGS_SS(regs->gp) = sc->ss;
-}
+#ifdef CONFIG_X86_32
 
 /*
  * FPU tag word conversions.
@@ -164,6 +148,8 @@
 
 extern int have_fpx_regs;
 
+#endif
+
 static int copy_sc_from_user(struct pt_regs *regs,
 			     struct sigcontext __user *from)
 {
@@ -174,8 +160,45 @@
 	if (err)
 		return err;
 
+#define GETREG(regno, regname) regs->regs.gp[HOST_##regno] = sc.regname
+
+#ifdef CONFIG_X86_32
+	GETREG(GS, gs);
+	GETREG(FS, fs);
+	GETREG(ES, es);
+	GETREG(DS, ds);
+#endif
+	GETREG(DI, di);
+	GETREG(SI, si);
+	GETREG(BP, bp);
+	GETREG(SP, sp);
+	GETREG(BX, bx);
+	GETREG(DX, dx);
+	GETREG(CX, cx);
+	GETREG(AX, ax);
+	GETREG(IP, ip);
+
+#ifdef CONFIG_X86_64
+	GETREG(R8, r8);
+	GETREG(R9, r9);
+	GETREG(R10, r10);
+	GETREG(R11, r11);
+	GETREG(R12, r12);
+	GETREG(R13, r13);
+	GETREG(R14, r14);
+	GETREG(R15, r15);
+#endif
+
+	GETREG(CS, cs);
+	GETREG(EFLAGS, flags);
+#ifdef CONFIG_X86_32
+	GETREG(SS, ss);
+#endif
+
+#undef GETREG
+
 	pid = userspace_pid[current_thread_info()->cpu];
-	copy_sc(&regs->regs, &sc);
+#ifdef CONFIG_X86_32
 	if (have_fpx_regs) {
 		struct user_fxsr_struct fpx;
 
@@ -196,8 +219,9 @@
 			       -err);
 			return 1;
 		}
-	}
-	else {
+	} else
+#endif
+	{
 		struct user_i387_struct fp;
 
 		err = copy_from_user(&fp, sc.fpstate,
@@ -213,43 +237,66 @@
 			return 1;
 		}
 	}
-
 	return 0;
 }
 
 static int copy_sc_to_user(struct sigcontext __user *to,
 			   struct _fpstate __user *to_fp, struct pt_regs *regs,
-			   unsigned long sp)
+			   unsigned long mask)
 {
 	struct sigcontext sc;
 	struct faultinfo * fi = &current->thread.arch.faultinfo;
 	int err, pid;
+	memset(&sc, 0, sizeof(struct sigcontext));
 
-	sc.gs = REGS_GS(regs->regs.gp);
-	sc.fs = REGS_FS(regs->regs.gp);
-	sc.es = REGS_ES(regs->regs.gp);
-	sc.ds = REGS_DS(regs->regs.gp);
-	sc.di = REGS_EDI(regs->regs.gp);
-	sc.si = REGS_ESI(regs->regs.gp);
-	sc.bp = REGS_EBP(regs->regs.gp);
-	sc.sp = sp;
-	sc.bx = REGS_EBX(regs->regs.gp);
-	sc.dx = REGS_EDX(regs->regs.gp);
-	sc.cx = REGS_ECX(regs->regs.gp);
-	sc.ax = REGS_EAX(regs->regs.gp);
-	sc.ip = REGS_IP(regs->regs.gp);
-	sc.cs = REGS_CS(regs->regs.gp);
-	sc.flags = REGS_EFLAGS(regs->regs.gp);
-	sc.sp_at_signal = regs->regs.gp[UESP];
-	sc.ss = regs->regs.gp[SS];
+#define PUTREG(regno, regname) sc.regname = regs->regs.gp[HOST_##regno]
+
+#ifdef CONFIG_X86_32
+	PUTREG(GS, gs);
+	PUTREG(FS, fs);
+	PUTREG(ES, es);
+	PUTREG(DS, ds);
+#endif
+	PUTREG(DI, di);
+	PUTREG(SI, si);
+	PUTREG(BP, bp);
+	PUTREG(SP, sp);
+	PUTREG(BX, bx);
+	PUTREG(DX, dx);
+	PUTREG(CX, cx);
+	PUTREG(AX, ax);
+#ifdef CONFIG_X86_64
+	PUTREG(R8, r8);
+	PUTREG(R9, r9);
+	PUTREG(R10, r10);
+	PUTREG(R11, r11);
+	PUTREG(R12, r12);
+	PUTREG(R13, r13);
+	PUTREG(R14, r14);
+	PUTREG(R15, r15);
+#endif
+
 	sc.cr2 = fi->cr2;
 	sc.err = fi->error_code;
 	sc.trapno = fi->trap_no;
-
-	to_fp = (to_fp ? to_fp : (struct _fpstate __user *) (to + 1));
+	PUTREG(IP, ip);
+	PUTREG(CS, cs);
+	PUTREG(EFLAGS, flags);
+#ifdef CONFIG_X86_32
+	PUTREG(SP, sp_at_signal);
+	PUTREG(SS, ss);
+#endif
+#undef PUTREG
+	sc.oldmask = mask;
 	sc.fpstate = to_fp;
 
+	err = copy_to_user(to, &sc, sizeof(struct sigcontext));
+	if (err)
+		return 1;
+
 	pid = userspace_pid[current_thread_info()->cpu];
+
+#ifdef CONFIG_X86_32
 	if (have_fpx_regs) {
 		struct user_fxsr_struct fpx;
 
@@ -272,8 +319,9 @@
 		if (copy_to_user(&to_fp->_fxsr_env[0], &fpx,
 				 sizeof(struct user_fxsr_struct)))
 			return 1;
-	}
-	else {
+	} else
+#endif
+	{
 		struct user_i387_struct fp;
 
 		err = save_fp_registers(pid, (unsigned long *) &fp);
@@ -281,9 +329,10 @@
 			return 1;
 	}
 
-	return copy_to_user(to, &sc, sizeof(sc));
+	return 0;
 }
 
+#ifdef CONFIG_X86_32
 static int copy_ucontext_to_user(struct ucontext __user *uc,
 				 struct _fpstate __user *fp, sigset_t *set,
 				 unsigned long sp)
@@ -293,7 +342,7 @@
 	err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp);
 	err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags);
 	err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size);
-	err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, sp);
+	err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, 0);
 	err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set));
 	return err;
 }
@@ -326,7 +375,6 @@
 {
 	struct sigframe __user *frame;
 	void __user *restorer;
-	unsigned long save_sp = PT_REGS_SP(regs);
 	int err = 0;
 
 	/* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */
@@ -339,20 +387,9 @@
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
-	/* Update SP now because the page fault handler refuses to extend
-	 * the stack if the faulting address is too far below the current
-	 * SP, which frame now certainly is.  If there's an error, the original
-	 * value is restored on the way out.
-	 * When writing the sigcontext to the stack, we have to write the
-	 * original value, so that's passed to copy_sc_to_user, which does
-	 * the right thing with it.
-	 */
-	PT_REGS_SP(regs) = (unsigned long) frame;
-
 	err |= __put_user(restorer, &frame->pretcode);
 	err |= __put_user(sig, &frame->sig);
-	err |= copy_sc_to_user(&frame->sc, NULL, regs, save_sp);
-	err |= __put_user(mask->sig[0], &frame->sc.oldmask);
+	err |= copy_sc_to_user(&frame->sc, &frame->fpstate, regs, mask->sig[0]);
 	if (_NSIG_WORDS > 1)
 		err |= __copy_to_user(&frame->extramask, &mask->sig[1],
 				      sizeof(frame->extramask));
@@ -369,7 +406,7 @@
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
 
 	if (err)
-		goto err;
+		return err;
 
 	PT_REGS_SP(regs) = (unsigned long) frame;
 	PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
@@ -380,10 +417,6 @@
 	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
 		ptrace_notify(SIGTRAP);
 	return 0;
-
-err:
-	PT_REGS_SP(regs) = save_sp;
-	return err;
 }
 
 int setup_signal_stack_si(unsigned long stack_top, int sig,
@@ -392,7 +425,6 @@
 {
 	struct rt_sigframe __user *frame;
 	void __user *restorer;
-	unsigned long save_sp = PT_REGS_SP(regs);
 	int err = 0;
 
 	stack_top &= -8UL;
@@ -404,16 +436,13 @@
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
-	/* See comment above about why this is here */
-	PT_REGS_SP(regs) = (unsigned long) frame;
-
 	err |= __put_user(restorer, &frame->pretcode);
 	err |= __put_user(sig, &frame->sig);
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= copy_siginfo_to_user(&frame->info, info);
 	err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask,
-				     save_sp);
+					PT_REGS_SP(regs));
 
 	/*
 	 * This is movl $,%eax ; int $0x80
@@ -427,8 +456,9 @@
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
 
 	if (err)
-		goto err;
+		return err;
 
+	PT_REGS_SP(regs) = (unsigned long) frame;
 	PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
 	PT_REGS_EAX(regs) = (unsigned long) sig;
 	PT_REGS_EDX(regs) = (unsigned long) &frame->info;
@@ -437,13 +467,9 @@
 	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
 		ptrace_notify(SIGTRAP);
 	return 0;
-
-err:
-	PT_REGS_SP(regs) = save_sp;
-	return err;
 }
 
-long sys_sigreturn(struct pt_regs regs)
+long sys_sigreturn(struct pt_regs *regs)
 {
 	unsigned long sp = PT_REGS_SP(&current->thread.regs);
 	struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
@@ -458,11 +484,7 @@
 		goto segfault;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 
 	if (copy_sc_from_user(&current->thread.regs, sc))
 		goto segfault;
@@ -476,24 +498,107 @@
 	return 0;
 }
 
-long sys_rt_sigreturn(struct pt_regs regs)
+#else
+
+struct rt_sigframe
+{
+	char __user *pretcode;
+	struct ucontext uc;
+	struct siginfo info;
+	struct _fpstate fpstate;
+};
+
+int setup_signal_stack_si(unsigned long stack_top, int sig,
+			  struct k_sigaction *ka, struct pt_regs * regs,
+			  siginfo_t *info, sigset_t *set)
+{
+	struct rt_sigframe __user *frame;
+	int err = 0;
+	struct task_struct *me = current;
+
+	frame = (struct rt_sigframe __user *)
+		round_down(stack_top - sizeof(struct rt_sigframe), 16);
+	/* Subtract 128 for a red zone and 8 for proper alignment */
+	frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto out;
+
+	if (ka->sa.sa_flags & SA_SIGINFO) {
+		err |= copy_siginfo_to_user(&frame->info, info);
+		if (err)
+			goto out;
+	}
+
+	/* Create the ucontext.  */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
+			       set->sig[0]);
+	err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
+	if (sizeof(*set) == 16) {
+		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
+		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
+	}
+	else
+		err |= __copy_to_user(&frame->uc.uc_sigmask, set,
+				      sizeof(*set));
+
+	/*
+	 * Set up to return from userspace.  If provided, use a stub
+	 * already in userspace.
+	 */
+	/* x86-64 should always use SA_RESTORER. */
+	if (ka->sa.sa_flags & SA_RESTORER)
+		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+	else
+		/* could use a vstub here */
+		return err;
+
+	if (err)
+		return err;
+
+	/* Set up registers for signal handler */
+	{
+		struct exec_domain *ed = current_thread_info()->exec_domain;
+		if (unlikely(ed && ed->signal_invmap && sig < 32))
+			sig = ed->signal_invmap[sig];
+	}
+
+	PT_REGS_SP(regs) = (unsigned long) frame;
+	PT_REGS_RDI(regs) = sig;
+	/* In case the signal handler was declared without prototypes */
+	PT_REGS_RAX(regs) = 0;
+
+	/*
+	 * This also works for non SA_SIGINFO handlers because they expect the
+	 * next argument after the signal number on the stack.
+	 */
+	PT_REGS_RSI(regs) = (unsigned long) &frame->info;
+	PT_REGS_RDX(regs) = (unsigned long) &frame->uc;
+	PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler;
+ out:
+	return err;
+}
+#endif
+
+long sys_rt_sigreturn(struct pt_regs *regs)
 {
 	unsigned long sp = PT_REGS_SP(&current->thread.regs);
 	struct rt_sigframe __user *frame =
-		(struct rt_sigframe __user *) (sp - 4);
-	sigset_t set;
+		(struct rt_sigframe __user *)(sp - sizeof(long));
 	struct ucontext __user *uc = &frame->uc;
-	int sig_size = _NSIG_WORDS * sizeof(unsigned long);
+	sigset_t set;
 
-	if (copy_from_user(&set, &uc->uc_sigmask, sig_size))
+	if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
 		goto segfault;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 
 	if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
 		goto segfault;
@@ -506,3 +611,14 @@
 	force_sig(SIGSEGV, current);
 	return 0;
 }
+
+#ifdef CONFIG_X86_32
+long ptregs_sigreturn(void)
+{
+	return sys_sigreturn(NULL);
+}
+long ptregs_rt_sigreturn(void)
+{
+	return sys_rt_sigreturn(NULL);
+}
+#endif
diff --git a/arch/um/sys-i386/stub.S b/arch/x86/um/stub_32.S
similarity index 100%
rename from arch/um/sys-i386/stub.S
rename to arch/x86/um/stub_32.S
diff --git a/arch/um/sys-x86_64/stub.S b/arch/x86/um/stub_64.S
similarity index 100%
rename from arch/um/sys-x86_64/stub.S
rename to arch/x86/um/stub_64.S
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
new file mode 100644
index 0000000..b7450bd
--- /dev/null
+++ b/arch/x86/um/stub_segv.c
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "sysdep/stub.h"
+#include "sysdep/faultinfo.h"
+#include "sysdep/mcontext.h"
+
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_segv_handler(int sig, siginfo_t *info, void *p)
+{
+	struct ucontext *uc = p;
+
+	GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA),
+			      &uc->uc_mcontext);
+	trap_myself();
+}
+
diff --git a/arch/um/sys-i386/sys_call_table.S b/arch/x86/um/sys_call_table_32.S
similarity index 81%
rename from arch/um/sys-i386/sys_call_table.S
rename to arch/x86/um/sys_call_table_32.S
index de274071..a7ca80d 100644
--- a/arch/um/sys-i386/sys_call_table.S
+++ b/arch/x86/um/sys_call_table_32.S
@@ -13,16 +13,14 @@
 #define ptregs_execve sys_execve
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
-#define ptregs_sigreturn sys_sigreturn
 #define ptregs_clone sys_clone
 #define ptregs_vm86 sys_vm86
-#define ptregs_rt_sigreturn sys_rt_sigreturn
 #define ptregs_sigaltstack sys_sigaltstack
 #define ptregs_vfork sys_vfork
 
 .section .rodata,"a"
 
-#include "../../x86/kernel/syscall_table_32.S"
+#include "../kernel/syscall_table_32.S"
 
 ENTRY(syscall_table_size)
 .long .-sys_call_table
diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/x86/um/sys_call_table_64.c
similarity index 95%
rename from arch/um/sys-x86_64/syscall_table.c
rename to arch/x86/um/sys_call_table_64.c
index 47d469e..99522f7 100644
--- a/arch/um/sys-x86_64/syscall_table.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -6,7 +6,6 @@
 #include <linux/linkage.h>
 #include <linux/sys.h>
 #include <linux/cache.h>
-#include <kern_constants.h>
 
 #define __NO_STUBS
 
@@ -59,7 +58,7 @@
  */
 
 sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
-#include "../../x86/include/asm/unistd_64.h"
+#include <asm/unistd_64.h>
 };
 
 int syscall_table_size = sizeof(sys_call_table);
diff --git a/arch/um/sys-i386/syscalls.c b/arch/x86/um/syscalls_32.c
similarity index 100%
rename from arch/um/sys-i386/syscalls.c
rename to arch/x86/um/syscalls_32.c
diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/x86/um/syscalls_64.c
similarity index 100%
rename from arch/um/sys-x86_64/syscalls.c
rename to arch/x86/um/syscalls_64.c
diff --git a/arch/um/sys-i386/sysrq.c b/arch/x86/um/sysrq_32.c
similarity index 100%
rename from arch/um/sys-i386/sysrq.c
rename to arch/x86/um/sysrq_32.c
diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/x86/um/sysrq_64.c
similarity index 94%
rename from arch/um/sys-x86_64/sysrq.c
rename to arch/x86/um/sysrq_64.c
index f4f82be..e891343 100644
--- a/arch/um/sys-x86_64/sysrq.c
+++ b/arch/x86/um/sysrq_64.c
@@ -20,7 +20,7 @@
 		current->comm, print_tainted(), init_utsname()->release);
 	printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff,
 	       PT_REGS_RIP(regs));
-	printk(KERN_INFO "RSP: %016lx  EFLAGS: %08lx\n", PT_REGS_RSP(regs),
+	printk(KERN_INFO "RSP: %016lx  EFLAGS: %08lx\n", PT_REGS_SP(regs),
 	       PT_REGS_EFLAGS(regs));
 	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs));
diff --git a/arch/um/sys-i386/tls.c b/arch/x86/um/tls_32.c
similarity index 100%
rename from arch/um/sys-i386/tls.c
rename to arch/x86/um/tls_32.c
diff --git a/arch/um/sys-x86_64/tls.c b/arch/x86/um/tls_64.c
similarity index 100%
rename from arch/um/sys-x86_64/tls.c
rename to arch/x86/um/tls_64.c
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
new file mode 100644
index 0000000..ca49be8
--- /dev/null
+++ b/arch/x86/um/user-offsets.c
@@ -0,0 +1,80 @@
+#include <stdio.h>
+#include <stddef.h>
+#include <signal.h>
+#include <sys/poll.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+#define __FRAME_OFFSETS
+#include <asm/ptrace.h>
+#include <asm/types.h>
+
+#define DEFINE(sym, val) \
+	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define DEFINE_LONGS(sym, val) \
+	asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
+
+void foo(void)
+{
+#ifdef __i386__
+	DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct));
+	DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct));
+
+	DEFINE(HOST_IP, EIP);
+	DEFINE(HOST_SP, UESP);
+	DEFINE(HOST_EFLAGS, EFL);
+	DEFINE(HOST_AX, EAX);
+	DEFINE(HOST_BX, EBX);
+	DEFINE(HOST_CX, ECX);
+	DEFINE(HOST_DX, EDX);
+	DEFINE(HOST_SI, ESI);
+	DEFINE(HOST_DI, EDI);
+	DEFINE(HOST_BP, EBP);
+	DEFINE(HOST_CS, CS);
+	DEFINE(HOST_SS, SS);
+	DEFINE(HOST_DS, DS);
+	DEFINE(HOST_FS, FS);
+	DEFINE(HOST_ES, ES);
+	DEFINE(HOST_GS, GS);
+	DEFINE(HOST_ORIG_AX, ORIG_EAX);
+#else
+	DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
+	DEFINE_LONGS(HOST_BX, RBX);
+	DEFINE_LONGS(HOST_CX, RCX);
+	DEFINE_LONGS(HOST_DI, RDI);
+	DEFINE_LONGS(HOST_SI, RSI);
+	DEFINE_LONGS(HOST_DX, RDX);
+	DEFINE_LONGS(HOST_BP, RBP);
+	DEFINE_LONGS(HOST_AX, RAX);
+	DEFINE_LONGS(HOST_R8, R8);
+	DEFINE_LONGS(HOST_R9, R9);
+	DEFINE_LONGS(HOST_R10, R10);
+	DEFINE_LONGS(HOST_R11, R11);
+	DEFINE_LONGS(HOST_R12, R12);
+	DEFINE_LONGS(HOST_R13, R13);
+	DEFINE_LONGS(HOST_R14, R14);
+	DEFINE_LONGS(HOST_R15, R15);
+	DEFINE_LONGS(HOST_ORIG_AX, ORIG_RAX);
+	DEFINE_LONGS(HOST_CS, CS);
+	DEFINE_LONGS(HOST_SS, SS);
+	DEFINE_LONGS(HOST_EFLAGS, EFLAGS);
+#if 0
+	DEFINE_LONGS(HOST_FS, FS);
+	DEFINE_LONGS(HOST_GS, GS);
+	DEFINE_LONGS(HOST_DS, DS);
+	DEFINE_LONGS(HOST_ES, ES);
+#endif
+
+	DEFINE_LONGS(HOST_IP, RIP);
+	DEFINE_LONGS(HOST_SP, RSP);
+#endif
+
+	DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
+	DEFINE(UM_POLLIN, POLLIN);
+	DEFINE(UM_POLLPRI, POLLPRI);
+	DEFINE(UM_POLLOUT, POLLOUT);
+
+	DEFINE(UM_PROT_READ, PROT_READ);
+	DEFINE(UM_PROT_WRITE, PROT_WRITE);
+	DEFINE(UM_PROT_EXEC, PROT_EXEC);
+}
diff --git a/arch/um/sys-x86_64/vdso/Makefile b/arch/x86/um/vdso/Makefile
similarity index 94%
rename from arch/um/sys-x86_64/vdso/Makefile
rename to arch/x86/um/vdso/Makefile
index 5dffe6d..6c803ca 100644
--- a/arch/um/sys-x86_64/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -46,8 +46,8 @@
 #
 # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
 #
-CFLAGS_REMOVE_vdso-note.o = -pg
-CFLAGS_REMOVE_um_vdso.o = -pg
+CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-arcs -ftest-coverage
+CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage
 
 targets += vdso-syms.lds
 obj-$(VDSO64-y)			+= vdso-syms.lds
diff --git a/arch/um/sys-x86_64/vdso/checkundef.sh b/arch/x86/um/vdso/checkundef.sh
similarity index 100%
rename from arch/um/sys-x86_64/vdso/checkundef.sh
rename to arch/x86/um/vdso/checkundef.sh
diff --git a/arch/um/sys-x86_64/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
similarity index 100%
rename from arch/um/sys-x86_64/vdso/um_vdso.c
rename to arch/x86/um/vdso/um_vdso.c
diff --git a/arch/um/sys-x86_64/vdso/vdso-layout.lds.S b/arch/x86/um/vdso/vdso-layout.lds.S
similarity index 100%
rename from arch/um/sys-x86_64/vdso/vdso-layout.lds.S
rename to arch/x86/um/vdso/vdso-layout.lds.S
diff --git a/arch/um/sys-x86_64/vdso/vdso-note.S b/arch/x86/um/vdso/vdso-note.S
similarity index 100%
rename from arch/um/sys-x86_64/vdso/vdso-note.S
rename to arch/x86/um/vdso/vdso-note.S
diff --git a/arch/um/sys-x86_64/vdso/vdso.S b/arch/x86/um/vdso/vdso.S
similarity index 69%
rename from arch/um/sys-x86_64/vdso/vdso.S
rename to arch/x86/um/vdso/vdso.S
index ec82c16..1cb468a 100644
--- a/arch/um/sys-x86_64/vdso/vdso.S
+++ b/arch/x86/um/vdso/vdso.S
@@ -4,7 +4,7 @@
 
 	.globl vdso_start, vdso_end
 vdso_start:
-	.incbin "arch/um/sys-x86_64/vdso/vdso.so"
+	.incbin "arch/x86/um/vdso/vdso.so"
 vdso_end:
 
 __FINIT
diff --git a/arch/um/sys-x86_64/vdso/vdso.lds.S b/arch/x86/um/vdso/vdso.lds.S
similarity index 100%
rename from arch/um/sys-x86_64/vdso/vdso.lds.S
rename to arch/x86/um/vdso/vdso.lds.S
diff --git a/arch/um/sys-x86_64/vdso/vma.c b/arch/x86/um/vdso/vma.c
similarity index 95%
rename from arch/um/sys-x86_64/vdso/vma.c
rename to arch/x86/um/vdso/vma.c
index 9495c8d..91f4ec9 100644
--- a/arch/um/sys-x86_64/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -28,7 +28,7 @@
 
 	um_vdso_addr = task_size - PAGE_SIZE;
 
-	vdsop = kmalloc(GFP_KERNEL, sizeof(struct page *));
+	vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL);
 	if (!vdsop)
 		goto oom;
 
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 423fd56..4364303 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -298,7 +298,7 @@
 config RTC
 	tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)"
 	depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV \
-			&& !ARM && !SUPERH && !S390 && !AVR32 && !BLACKFIN
+			&& !ARM && !SUPERH && !S390 && !AVR32 && !BLACKFIN && !UML
 	---help---
 	  If you say Y here and create a character special file /dev/rtc with
 	  major number 10 and minor number 135 using mknod ("man mknod"), you
@@ -346,7 +346,7 @@
 
 config GEN_RTC
 	tristate "Generic /dev/rtc emulation"
-	depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 && !BLACKFIN
+	depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 && !BLACKFIN && !UML
 	---help---
 	  If you say Y here and create a character special file /dev/rtc with
 	  major number 10 and minor number 135 using mknod ("man mknod"), you
@@ -490,7 +490,7 @@
 
 config PC8736x_GPIO
 	tristate "NatSemi PC8736x GPIO Support"
-	depends on X86_32
+	depends on X86_32 && !UML
 	default SCx200_GPIO	# mostly N
 	select NSC_GPIO		# needed for support routines
 	help
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index e013587..0689bf6 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -235,3 +235,18 @@
 	 module will be called ppc4xx-rng.
 
 	 If unsure, say N.
+
+config UML_RANDOM
+	depends on UML
+	tristate "Hardware random number generator"
+	help
+	  This option enables UML's "hardware" random number generator.  It
+	  attaches itself to the host's /dev/random, supplying as much entropy
+	  as the host has, rather than the small amount the UML gets from its
+	  own drivers.  It registers itself as a standard hardware random number
+	  generator, major 10, minor 183, and the canonical device name is
+	  /dev/hwrng.
+	  The way to make use of this is to install the rng-tools package
+	  (check your distro, or download from
+	  http://sourceforge.net/projects/gkernel/).  rngd periodically reads
+	  /dev/hwrng and injects the entropy into /dev/random.
diff --git a/drivers/char/ttyprintk.c b/drivers/char/ttyprintk.c
index a1f68af..f228615 100644
--- a/drivers/char/ttyprintk.c
+++ b/drivers/char/ttyprintk.c
@@ -170,7 +170,7 @@
 	.ioctl = tpk_ioctl,
 };
 
-struct tty_port_operations null_ops = { };
+static struct tty_port_operations null_ops = { };
 
 static struct tty_driver *ttyprintk_driver;
 
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index 23e82e46..001b147 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -3,7 +3,7 @@
 #
 
 menu "Input device support"
-	depends on !S390
+	depends on !S390 && !UML
 
 config INPUT
 	tristate "Generic input layer (needed for keyboard, mouse, ...)" if EXPERT
diff --git a/drivers/isdn/Kconfig b/drivers/isdn/Kconfig
index 4fb6016..a233ed5 100644
--- a/drivers/isdn/Kconfig
+++ b/drivers/isdn/Kconfig
@@ -5,7 +5,7 @@
 menuconfig ISDN
 	bool "ISDN support"
 	depends on NET
-	depends on !S390
+	depends on !S390 && !UML
 	---help---
 	  ISDN ("Integrated Services Digital Network", called RNIS in France)
 	  is a fully digital telephone service that can be used for voice and
diff --git a/drivers/net/wireless/ath/Kconfig b/drivers/net/wireless/ath/Kconfig
index 0735488..0960224 100644
--- a/drivers/net/wireless/ath/Kconfig
+++ b/drivers/net/wireless/ath/Kconfig
@@ -1,6 +1,6 @@
 menuconfig ATH_COMMON
 	tristate "Atheros Wireless Cards"
-	depends on CFG80211
+	depends on CFG80211 && (!UML || BROKEN)
 	---help---
 	  This will enable the support for the Atheros wireless drivers.
 	  ath5k, ath9k, ath9k_htc and ar9170 drivers share some common code, this option
diff --git a/drivers/net/wireless/rtlwifi/Kconfig b/drivers/net/wireless/rtlwifi/Kconfig
index 45e1476..d6c42e6 100644
--- a/drivers/net/wireless/rtlwifi/Kconfig
+++ b/drivers/net/wireless/rtlwifi/Kconfig
@@ -12,7 +12,7 @@
 
 config RTL8192SE
 	tristate "Realtek RTL8192SE/RTL8191SE PCIe Wireless Network Adapter"
-	depends on MAC80211 && EXPERIMENTAL
+	depends on MAC80211 && EXPERIMENTAL && PCI
 	select FW_LOADER
 	select RTLWIFI
 	---help---
@@ -23,7 +23,7 @@
 
 config RTL8192DE
 	tristate "Realtek RTL8192DE/RTL8188DE PCIe Wireless Network Adapter"
-	depends on MAC80211 && EXPERIMENTAL
+	depends on MAC80211 && EXPERIMENTAL && PCI
 	select FW_LOADER
 	select RTLWIFI
 	---help---
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 57de051..9f88641 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -70,6 +70,7 @@
 
 config BATTERY_DS2780
 	tristate "DS2780 battery driver"
+	depends on HAS_IOMEM
 	select W1
 	select W1_SLAVE_DS2780
 	help
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 5a538fc..53eb4e5 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -8,7 +8,7 @@
 menuconfig RTC_CLASS
 	bool "Real Time Clock"
 	default n
-	depends on !S390
+	depends on !S390 && !UML
 	select RTC_LIB
 	help
 	  Generic RTC class support. If you say yes here, you will
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 8816f53..b3d1741 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -1,6 +1,6 @@
 config VT
 	bool "Virtual terminal" if EXPERT
-	depends on !S390
+	depends on !S390 && !UML
 	select INPUT
 	default y
 	---help---
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 86b0735..64c6752 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -726,7 +726,7 @@
 
 config SBC7240_WDT
 	tristate "SBC Nano 7240 Watchdog Timer"
-	depends on X86_32
+	depends on X86_32 && !UML
 	---help---
 	  This is the driver for the hardware watchdog found on the IEI
 	  single board computers EPIC Nano 7240 (and likely others). This
@@ -1174,6 +1174,10 @@
 	  by Xen 4.0 and newer.  The watchdog timeout period is normally one
 	  minute but can be changed with a boot-time parameter.
 
+config UML_WATCHDOG
+	tristate "UML watchdog"
+	depends on UML
+
 #
 # ISA-based Watchdog Cards
 #
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 8f44cef..a8cbe1b 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -421,7 +421,7 @@
 void ext2_init_block_alloc_info(struct inode *inode)
 {
 	struct ext2_inode_info *ei = EXT2_I(inode);
-	struct ext2_block_alloc_info *block_i = ei->i_block_alloc_info;
+	struct ext2_block_alloc_info *block_i;
 	struct super_block *sb = inode->i_sb;
 
 	block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1dd62ed..bd8ac16 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -327,10 +327,10 @@
 	if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
 		return ERR_PTR(-ESTALE);
 
-	/* iget isn't really right if the inode is currently unallocated!!
-	 * ext2_read_inode currently does appropriate checks, but
-	 * it might be "neater" to call ext2_get_inode first and check
-	 * if the inode is valid.....
+	/*
+	 * ext2_iget isn't quite right if the inode is currently unallocated!
+	 * However ext2_iget currently does appropriate checks to handle stale
+	 * inodes so everything is OK.
 	 */
 	inode = ext2_iget(sb, ino);
 	if (IS_ERR(inode))
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 6386d76..a203892 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -427,7 +427,7 @@
 void ext3_init_block_alloc_info(struct inode *inode)
 {
 	struct ext3_inode_info *ei = EXT3_I(inode);
-	struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+	struct ext3_block_alloc_info *block_i;
 	struct super_block *sb = inode->i_sb;
 
 	block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
@@ -1440,14 +1440,14 @@
  *
  * Check if filesystem has at least 1 free block available for allocation.
  */
-static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
+static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
 {
 	ext3_fsblk_t free_blocks, root_blocks;
 
 	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
 	root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
 	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
-		sbi->s_resuid != current_fsuid() &&
+		!use_reservation && sbi->s_resuid != current_fsuid() &&
 		(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
 		return 0;
 	}
@@ -1468,7 +1468,7 @@
  */
 int ext3_should_retry_alloc(struct super_block *sb, int *retries)
 {
-	if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
+	if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3)
 		return 0;
 
 	jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -1546,7 +1546,7 @@
 	if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
 		my_rsv = &block_i->rsv_window_node;
 
-	if (!ext3_has_free_blocks(sbi)) {
+	if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) {
 		*errp = -ENOSPC;
 		goto out;
 	}
@@ -1924,9 +1924,10 @@
  * reaches any used block. Then issue a TRIM command on this extent and free
  * the extent in the block bitmap. This is done until whole group is scanned.
  */
-ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
-				ext3_grpblk_t start, ext3_grpblk_t max,
-				ext3_grpblk_t minblocks)
+static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
+					unsigned int group,
+					ext3_grpblk_t start, ext3_grpblk_t max,
+					ext3_grpblk_t minblocks)
 {
 	handle_t *handle;
 	ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index d494c55..1860ed3 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -61,13 +61,6 @@
 	if (ret)
 		goto out;
 
-	/*
-	 * Taking the mutex here just to keep consistent with how fsync was
-	 * called previously, however it looks like we don't need to take
-	 * i_mutex at all.
-	 */
-	mutex_lock(&inode->i_mutex);
-
 	J_ASSERT(ext3_journal_current_handle() == NULL);
 
 	/*
@@ -85,7 +78,6 @@
 	 *  safe in-journal, which is all fsync() needs to ensure.
 	 */
 	if (ext3_should_journal_data(inode)) {
-		mutex_unlock(&inode->i_mutex);
 		ret = ext3_force_commit(inode->i_sb);
 		goto out;
 	}
@@ -108,8 +100,6 @@
 	 */
 	if (needs_barrier)
 		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
-
-	mutex_unlock(&inode->i_mutex);
 out:
 	trace_ext3_sync_file_exit(inode, ret);
 	return ret;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 835d4ea..5c866e0 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -178,42 +178,6 @@
 }
 
 /*
- * There are two policies for allocating an inode.  If the new inode is
- * a directory, then a forward search is made for a block group with both
- * free space and a low directory-to-inode ratio; if that fails, then of
- * the groups with above-average free space, that group with the fewest
- * directories already is chosen.
- *
- * For other inodes, search forward from the parent directory\'s block
- * group to find a free inode.
- */
-static int find_group_dir(struct super_block *sb, struct inode *parent)
-{
-	int ngroups = EXT3_SB(sb)->s_groups_count;
-	unsigned int freei, avefreei;
-	struct ext3_group_desc *desc, *best_desc = NULL;
-	int group, best_group = -1;
-
-	freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
-	avefreei = freei / ngroups;
-
-	for (group = 0; group < ngroups; group++) {
-		desc = ext3_get_group_desc (sb, group, NULL);
-		if (!desc || !desc->bg_free_inodes_count)
-			continue;
-		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
-			continue;
-		if (!best_desc ||
-		    (le16_to_cpu(desc->bg_free_blocks_count) >
-		     le16_to_cpu(best_desc->bg_free_blocks_count))) {
-			best_group = group;
-			best_desc = desc;
-		}
-	}
-	return best_group;
-}
-
-/*
  * Orlov's allocator for directories.
  *
  * We always try to spread first-level directories.
@@ -436,12 +400,9 @@
 
 	sbi = EXT3_SB(sb);
 	es = sbi->s_es;
-	if (S_ISDIR(mode)) {
-		if (test_opt (sb, OLDALLOC))
-			group = find_group_dir(sb, dir);
-		else
-			group = find_group_orlov(sb, dir);
-	} else
+	if (S_ISDIR(mode))
+		group = find_group_orlov(sb, dir);
+	else
 		group = find_group_other(sb, dir);
 
 	err = -ENOSPC;
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index c7f4394..ba1b54e 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -150,30 +150,6 @@
 		mnt_drop_write(filp->f_path.mnt);
 		return err;
 	}
-#ifdef CONFIG_JBD_DEBUG
-	case EXT3_IOC_WAIT_FOR_READONLY:
-		/*
-		 * This is racy - by the time we're woken up and running,
-		 * the superblock could be released.  And the module could
-		 * have been unloaded.  So sue me.
-		 *
-		 * Returns 1 if it slept, else zero.
-		 */
-		{
-			struct super_block *sb = inode->i_sb;
-			DECLARE_WAITQUEUE(wait, current);
-			int ret = 0;
-
-			set_current_state(TASK_INTERRUPTIBLE);
-			add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
-			if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
-				schedule();
-				ret = 1;
-			}
-			remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
-			return ret;
-		}
-#endif
 	case EXT3_IOC_GETRSVSZ:
 		if (test_opt(inode->i_sb, RESERVATION)
 			&& S_ISREG(inode->i_mode)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7beb69a..922d289 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -652,8 +652,6 @@
 		seq_puts(seq, ",nouid32");
 	if (test_opt(sb, DEBUG))
 		seq_puts(seq, ",debug");
-	if (test_opt(sb, OLDALLOC))
-		seq_puts(seq, ",oldalloc");
 #ifdef CONFIG_EXT3_FS_XATTR
 	if (test_opt(sb, XATTR_USER))
 		seq_puts(seq, ",user_xattr");
@@ -1049,10 +1047,12 @@
 			set_opt (sbi->s_mount_opt, DEBUG);
 			break;
 		case Opt_oldalloc:
-			set_opt (sbi->s_mount_opt, OLDALLOC);
+			ext3_msg(sb, KERN_WARNING,
+				"Ignoring deprecated oldalloc option");
 			break;
 		case Opt_orlov:
-			clear_opt (sbi->s_mount_opt, OLDALLOC);
+			ext3_msg(sb, KERN_WARNING,
+				"Ignoring deprecated orlov option");
 			break;
 #ifdef CONFIG_EXT3_FS_XATTR
 		case Opt_user_xattr:
@@ -2669,13 +2669,13 @@
 			/*
 			 * If we have an unprocessed orphan list hanging
 			 * around from a previously readonly bdev mount,
-			 * require a full umount/remount for now.
+			 * require a full umount & mount for now.
 			 */
 			if (es->s_last_orphan) {
 				ext3_msg(sb, KERN_WARNING, "warning: couldn't "
 				       "remount RDWR because of unprocessed "
 				       "orphan inode list.  Please "
-				       "umount/remount instead.");
+				       "umount & mount instead.");
 				err = -EINVAL;
 				goto restore_opts;
 			}
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f8224ad..f6dba45 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -28,7 +28,8 @@
  */
 
 /*
- * Calculate the block group number and offset, given a block number
+ * Calculate the block group number and offset into the block/cluster
+ * allocation bitmap, given a block number
  */
 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 		ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
@@ -37,7 +38,8 @@
 	ext4_grpblk_t offset;
 
 	blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
-	offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
+	offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
+		EXT4_SB(sb)->s_cluster_bits;
 	if (offsetp)
 		*offsetp = offset;
 	if (blockgrpp)
@@ -55,131 +57,170 @@
 	return 0;
 }
 
-static int ext4_group_used_meta_blocks(struct super_block *sb,
+/* Return the number of clusters used for file system metadata; this
+ * represents the overhead needed by the file system.
+ */
+unsigned ext4_num_overhead_clusters(struct super_block *sb,
+				    ext4_group_t block_group,
+				    struct ext4_group_desc *gdp)
+{
+	unsigned num_clusters;
+	int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
+	ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
+	ext4_fsblk_t itbl_blk;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+	/* This is the number of clusters used by the superblock,
+	 * block group descriptors, and reserved block group
+	 * descriptor blocks */
+	num_clusters = ext4_num_base_meta_clusters(sb, block_group);
+
+	/*
+	 * For the allocation bitmaps and inode table, we first need
+	 * to check to see if the block is in the block group.  If it
+	 * is, then check to see if the cluster is already accounted
+	 * for in the clusters used for the base metadata cluster, or
+	 * if we can increment the base metadata cluster to include
+	 * that block.  Otherwise, we will have to track the cluster
+	 * used for the allocation bitmap or inode table explicitly.
+	 * Normally all of these blocks are contiguous, so the special
+	 * case handling shouldn't be necessary except for *very*
+	 * unusual file system layouts.
+	 */
+	if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
+		block_cluster = EXT4_B2C(sbi, (start -
+					       ext4_block_bitmap(sb, gdp)));
+		if (block_cluster < num_clusters)
+			block_cluster = -1;
+		else if (block_cluster == num_clusters) {
+			num_clusters++;
+			block_cluster = -1;
+		}
+	}
+
+	if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
+		inode_cluster = EXT4_B2C(sbi,
+					 start - ext4_inode_bitmap(sb, gdp));
+		if (inode_cluster < num_clusters)
+			inode_cluster = -1;
+		else if (inode_cluster == num_clusters) {
+			num_clusters++;
+			inode_cluster = -1;
+		}
+	}
+
+	itbl_blk = ext4_inode_table(sb, gdp);
+	for (i = 0; i < sbi->s_itb_per_group; i++) {
+		if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
+			c = EXT4_B2C(sbi, start - itbl_blk + i);
+			if ((c < num_clusters) || (c == inode_cluster) ||
+			    (c == block_cluster) || (c == itbl_cluster))
+				continue;
+			if (c == num_clusters) {
+				num_clusters++;
+				continue;
+			}
+			num_clusters++;
+			itbl_cluster = c;
+		}
+	}
+
+	if (block_cluster != -1)
+		num_clusters++;
+	if (inode_cluster != -1)
+		num_clusters++;
+
+	return num_clusters;
+}
+
+static unsigned int num_clusters_in_group(struct super_block *sb,
+					  ext4_group_t block_group)
+{
+	unsigned int blocks;
+
+	if (block_group == ext4_get_groups_count(sb) - 1) {
+		/*
+		 * Even though mke2fs always initializes the first and
+		 * last group, just in case some other tool was used,
+		 * we need to make sure we calculate the right free
+		 * blocks.
+		 */
+		blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
+			ext4_group_first_block_no(sb, block_group);
+	} else
+		blocks = EXT4_BLOCKS_PER_GROUP(sb);
+	return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
+}
+
+/* Initializes an uninitialized block bitmap */
+void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+			    ext4_group_t block_group,
+			    struct ext4_group_desc *gdp)
+{
+	unsigned int bit, bit_max;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	ext4_fsblk_t start, tmp;
+	int flex_bg = 0;
+
+	J_ASSERT_BH(bh, buffer_locked(bh));
+
+	/* If checksum is bad mark all blocks used to prevent allocation
+	 * essentially implementing a per-group read-only flag. */
+	if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+		ext4_error(sb, "Checksum bad for group %u", block_group);
+		ext4_free_group_clusters_set(sb, gdp, 0);
+		ext4_free_inodes_set(sb, gdp, 0);
+		ext4_itable_unused_set(sb, gdp, 0);
+		memset(bh->b_data, 0xff, sb->s_blocksize);
+		return;
+	}
+	memset(bh->b_data, 0, sb->s_blocksize);
+
+	bit_max = ext4_num_base_meta_clusters(sb, block_group);
+	for (bit = 0; bit < bit_max; bit++)
+		ext4_set_bit(bit, bh->b_data);
+
+	start = ext4_group_first_block_no(sb, block_group);
+
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+		flex_bg = 1;
+
+	/* Set bits for block and inode bitmaps, and inode table */
+	tmp = ext4_block_bitmap(sb, gdp);
+	if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+		ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
+
+	tmp = ext4_inode_bitmap(sb, gdp);
+	if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+		ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
+
+	tmp = ext4_inode_table(sb, gdp);
+	for (; tmp < ext4_inode_table(sb, gdp) +
+		     sbi->s_itb_per_group; tmp++) {
+		if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+			ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
+	}
+
+	/*
+	 * Also if the number of blocks within the group is less than
+	 * the blocksize * 8 ( which is the size of bitmap ), set rest
+	 * of the block bitmap to 1
+	 */
+	ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
+			     sb->s_blocksize * 8, bh->b_data);
+}
+
+/* Return the number of free blocks in a block group.  It is used when
+ * the block bitmap is uninitialized, so we can't just count the bits
+ * in the bitmap. */
+unsigned ext4_free_clusters_after_init(struct super_block *sb,
 				       ext4_group_t block_group,
 				       struct ext4_group_desc *gdp)
 {
-	ext4_fsblk_t tmp;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	/* block bitmap, inode bitmap, and inode table blocks */
-	int used_blocks = sbi->s_itb_per_group + 2;
-
-	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
-		if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
-					block_group))
-			used_blocks--;
-
-		if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp),
-					block_group))
-			used_blocks--;
-
-		tmp = ext4_inode_table(sb, gdp);
-		for (; tmp < ext4_inode_table(sb, gdp) +
-				sbi->s_itb_per_group; tmp++) {
-			if (!ext4_block_in_group(sb, tmp, block_group))
-				used_blocks -= 1;
-		}
-	}
-	return used_blocks;
+	return num_clusters_in_group(sb, block_group) - 
+		ext4_num_overhead_clusters(sb, block_group, gdp);
 }
 
-/* Initializes an uninitialized block bitmap if given, and returns the
- * number of blocks free in the group. */
-unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
-		 ext4_group_t block_group, struct ext4_group_desc *gdp)
-{
-	int bit, bit_max;
-	ext4_group_t ngroups = ext4_get_groups_count(sb);
-	unsigned free_blocks, group_blocks;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-
-	if (bh) {
-		J_ASSERT_BH(bh, buffer_locked(bh));
-
-		/* If checksum is bad mark all blocks used to prevent allocation
-		 * essentially implementing a per-group read-only flag. */
-		if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-			ext4_error(sb, "Checksum bad for group %u",
-					block_group);
-			ext4_free_blks_set(sb, gdp, 0);
-			ext4_free_inodes_set(sb, gdp, 0);
-			ext4_itable_unused_set(sb, gdp, 0);
-			memset(bh->b_data, 0xff, sb->s_blocksize);
-			return 0;
-		}
-		memset(bh->b_data, 0, sb->s_blocksize);
-	}
-
-	/* Check for superblock and gdt backups in this group */
-	bit_max = ext4_bg_has_super(sb, block_group);
-
-	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
-	    block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
-			  sbi->s_desc_per_block) {
-		if (bit_max) {
-			bit_max += ext4_bg_num_gdb(sb, block_group);
-			bit_max +=
-				le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
-		}
-	} else { /* For META_BG_BLOCK_GROUPS */
-		bit_max += ext4_bg_num_gdb(sb, block_group);
-	}
-
-	if (block_group == ngroups - 1) {
-		/*
-		 * Even though mke2fs always initialize first and last group
-		 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
-		 * to make sure we calculate the right free blocks
-		 */
-		group_blocks = ext4_blocks_count(sbi->s_es) -
-			ext4_group_first_block_no(sb, ngroups - 1);
-	} else {
-		group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
-	}
-
-	free_blocks = group_blocks - bit_max;
-
-	if (bh) {
-		ext4_fsblk_t start, tmp;
-		int flex_bg = 0;
-
-		for (bit = 0; bit < bit_max; bit++)
-			ext4_set_bit(bit, bh->b_data);
-
-		start = ext4_group_first_block_no(sb, block_group);
-
-		if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-					      EXT4_FEATURE_INCOMPAT_FLEX_BG))
-			flex_bg = 1;
-
-		/* Set bits for block and inode bitmaps, and inode table */
-		tmp = ext4_block_bitmap(sb, gdp);
-		if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
-			ext4_set_bit(tmp - start, bh->b_data);
-
-		tmp = ext4_inode_bitmap(sb, gdp);
-		if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
-			ext4_set_bit(tmp - start, bh->b_data);
-
-		tmp = ext4_inode_table(sb, gdp);
-		for (; tmp < ext4_inode_table(sb, gdp) +
-				sbi->s_itb_per_group; tmp++) {
-			if (!flex_bg ||
-				ext4_block_in_group(sb, tmp, block_group))
-				ext4_set_bit(tmp - start, bh->b_data);
-		}
-		/*
-		 * Also if the number of blocks within the group is
-		 * less than the blocksize * 8 ( which is the size
-		 * of bitmap ), set rest of the block bitmap to 1
-		 */
-		ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
-				     bh->b_data);
-	}
-	return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
-}
-
-
 /*
  * The free blocks are managed by bitmaps.  A file system contains several
  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
@@ -362,53 +403,54 @@
 }
 
 /**
- * ext4_has_free_blocks()
+ * ext4_has_free_clusters()
  * @sbi:	in-core super block structure.
- * @nblocks:	number of needed blocks
+ * @nclusters:	number of needed blocks
+ * @flags:	flags from ext4_mb_new_blocks()
  *
- * Check if filesystem has nblocks free & available for allocation.
+ * Check if filesystem has nclusters free & available for allocation.
  * On success return 1, return 0 on failure.
  */
-static int ext4_has_free_blocks(struct ext4_sb_info *sbi,
-				s64 nblocks, unsigned int flags)
+static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
+				  s64 nclusters, unsigned int flags)
 {
-	s64 free_blocks, dirty_blocks, root_blocks;
-	struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
-	struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
+	s64 free_clusters, dirty_clusters, root_clusters;
+	struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
+	struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
 
-	free_blocks  = percpu_counter_read_positive(fbc);
-	dirty_blocks = percpu_counter_read_positive(dbc);
-	root_blocks = ext4_r_blocks_count(sbi->s_es);
+	free_clusters  = percpu_counter_read_positive(fcc);
+	dirty_clusters = percpu_counter_read_positive(dcc);
+	root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
 
-	if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
-						EXT4_FREEBLOCKS_WATERMARK) {
-		free_blocks  = percpu_counter_sum_positive(fbc);
-		dirty_blocks = percpu_counter_sum_positive(dbc);
+	if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
+					EXT4_FREECLUSTERS_WATERMARK) {
+		free_clusters  = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
+		dirty_clusters = percpu_counter_sum_positive(dcc);
 	}
-	/* Check whether we have space after
-	 * accounting for current dirty blocks & root reserved blocks.
+	/* Check whether we have space after accounting for current
+	 * dirty clusters & root reserved clusters.
 	 */
-	if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks))
+	if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
 		return 1;
 
-	/* Hm, nope.  Are (enough) root reserved blocks available? */
+	/* Hm, nope.  Are (enough) root reserved clusters available? */
 	if (sbi->s_resuid == current_fsuid() ||
 	    ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
 	    capable(CAP_SYS_RESOURCE) ||
 		(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
 
-		if (free_blocks >= (nblocks + dirty_blocks))
+		if (free_clusters >= (nclusters + dirty_clusters))
 			return 1;
 	}
 
 	return 0;
 }
 
-int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
-			   s64 nblocks, unsigned int flags)
+int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
+			     s64 nclusters, unsigned int flags)
 {
-	if (ext4_has_free_blocks(sbi, nblocks, flags)) {
-		percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
+	if (ext4_has_free_clusters(sbi, nclusters, flags)) {
+		percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
 		return 0;
 	} else
 		return -ENOSPC;
@@ -428,7 +470,7 @@
  */
 int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 {
-	if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) ||
+	if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
 	    (*retries)++ > 3 ||
 	    !EXT4_SB(sb)->s_journal)
 		return 0;
@@ -444,7 +486,7 @@
  * @handle:             handle to this transaction
  * @inode:              file inode
  * @goal:               given target block(filesystem wide)
- * @count:		pointer to total number of blocks needed
+ * @count:		pointer to total number of clusters needed
  * @errp:               error code
  *
  * Return 1st allocated block number on success, *count stores total account
@@ -476,18 +518,19 @@
 		spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 		EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
 		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-		dquot_alloc_block_nofail(inode, ar.len);
+		dquot_alloc_block_nofail(inode,
+				EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
 	}
 	return ret;
 }
 
 /**
- * ext4_count_free_blocks() -- count filesystem free blocks
+ * ext4_count_free_clusters() -- count filesystem free clusters
  * @sb:		superblock
  *
- * Adds up the number of free blocks from each block group.
+ * Adds up the number of free clusters from each block group.
  */
-ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
+ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
 {
 	ext4_fsblk_t desc_count;
 	struct ext4_group_desc *gdp;
@@ -508,7 +551,7 @@
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
-		desc_count += ext4_free_blks_count(sb, gdp);
+		desc_count += ext4_free_group_clusters(sb, gdp);
 		brelse(bitmap_bh);
 		bitmap_bh = ext4_read_block_bitmap(sb, i);
 		if (bitmap_bh == NULL)
@@ -516,12 +559,13 @@
 
 		x = ext4_count_free(bitmap_bh, sb->s_blocksize);
 		printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
-			i, ext4_free_blks_count(sb, gdp), x);
+			i, ext4_free_group_clusters(sb, gdp), x);
 		bitmap_count += x;
 	}
 	brelse(bitmap_bh);
-	printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu"
-		", computed = %llu, %llu\n", ext4_free_blocks_count(es),
+	printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
+	       ", computed = %llu, %llu\n",
+	       EXT4_B2C(sbi, ext4_free_blocks_count(es)),
 	       desc_count, bitmap_count);
 	return bitmap_count;
 #else
@@ -530,7 +574,7 @@
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
-		desc_count += ext4_free_blks_count(sb, gdp);
+		desc_count += ext4_free_group_clusters(sb, gdp);
 	}
 
 	return desc_count;
@@ -620,6 +664,31 @@
 
 }
 
+/*
+ * This function returns the number of file system metadata clusters at
+ * the beginning of a block group, including the reserved gdt blocks.
+ */
+unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+				     ext4_group_t block_group)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	unsigned num;
+
+	/* Check for superblock and gdt backups in this group */
+	num = ext4_bg_has_super(sb, block_group);
+
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
+	    block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
+			  sbi->s_desc_per_block) {
+		if (num) {
+			num += ext4_bg_num_gdb(sb, block_group);
+			num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+		}
+	} else { /* For META_BG_BLOCK_GROUPS */
+		num += ext4_bg_num_gdb(sb, block_group);
+	}
+	return EXT4_NUM_B2C(sbi, num);
+}
 /**
  *	ext4_inode_to_goal_block - return a hint for block allocation
  *	@inode: inode for block allocation
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cec3145..5b0e26a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -144,9 +144,17 @@
 #define EXT4_MAP_UNWRITTEN	(1 << BH_Unwritten)
 #define EXT4_MAP_BOUNDARY	(1 << BH_Boundary)
 #define EXT4_MAP_UNINIT		(1 << BH_Uninit)
+/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
+ * ext4_map_blocks wants to know whether or not the underlying cluster has
+ * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
+ * the requested mapping was from previously mapped (or delayed allocated)
+ * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
+ * should never appear on buffer_head's state flags.
+ */
+#define EXT4_MAP_FROM_CLUSTER	(1 << BH_AllocFromCluster)
 #define EXT4_MAP_FLAGS		(EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
 				 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
-				 EXT4_MAP_UNINIT)
+				 EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
 
 struct ext4_map_blocks {
 	ext4_fsblk_t m_pblk;
@@ -239,8 +247,11 @@
 # define EXT4_BLOCK_SIZE(s)		(EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
 #endif
 #define	EXT4_ADDR_PER_BLOCK(s)		(EXT4_BLOCK_SIZE(s) / sizeof(__u32))
+#define EXT4_CLUSTER_SIZE(s)		(EXT4_BLOCK_SIZE(s) << \
+					 EXT4_SB(s)->s_cluster_bits)
 #ifdef __KERNEL__
 # define EXT4_BLOCK_SIZE_BITS(s)	((s)->s_blocksize_bits)
+# define EXT4_CLUSTER_BITS(s)		(EXT4_SB(s)->s_cluster_bits)
 #else
 # define EXT4_BLOCK_SIZE_BITS(s)	((s)->s_log_block_size + 10)
 #endif
@@ -258,6 +269,14 @@
 #endif
 #define EXT4_BLOCK_ALIGN(size, blkbits)		ALIGN((size), (1 << (blkbits)))
 
+/* Translate a block number to a cluster number */
+#define EXT4_B2C(sbi, blk)	((blk) >> (sbi)->s_cluster_bits)
+/* Translate a cluster number to a block number */
+#define EXT4_C2B(sbi, cluster)	((cluster) << (sbi)->s_cluster_bits)
+/* Translate # of blks to # of clusters */
+#define EXT4_NUM_B2C(sbi, blks)	(((blks) + (sbi)->s_cluster_ratio - 1) >> \
+				 (sbi)->s_cluster_bits)
+
 /*
  * Structure of a blocks group descriptor
  */
@@ -289,7 +308,7 @@
 
 struct flex_groups {
 	atomic_t free_inodes;
-	atomic_t free_blocks;
+	atomic_t free_clusters;
 	atomic_t used_dirs;
 };
 
@@ -306,6 +325,7 @@
 #define EXT4_DESC_SIZE(s)		(EXT4_SB(s)->s_desc_size)
 #ifdef __KERNEL__
 # define EXT4_BLOCKS_PER_GROUP(s)	(EXT4_SB(s)->s_blocks_per_group)
+# define EXT4_CLUSTERS_PER_GROUP(s)	(EXT4_SB(s)->s_clusters_per_group)
 # define EXT4_DESC_PER_BLOCK(s)		(EXT4_SB(s)->s_desc_per_block)
 # define EXT4_INODES_PER_GROUP(s)	(EXT4_SB(s)->s_inodes_per_group)
 # define EXT4_DESC_PER_BLOCK_BITS(s)	(EXT4_SB(s)->s_desc_per_block_bits)
@@ -358,8 +378,7 @@
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
-			   EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
-			   EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
+			   EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
 			   EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
 			   EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
 
@@ -520,6 +539,8 @@
 #define EXT4_GET_BLOCKS_PUNCH_OUT_EXT		0x0020
 	/* Don't normalize allocation size (used for fallocate) */
 #define EXT4_GET_BLOCKS_NO_NORMALIZE		0x0040
+	/* Request will not result in inode size update (user for fallocate) */
+#define EXT4_GET_BLOCKS_KEEP_SIZE		0x0080
 
 /*
  * Flags used by ext4_free_blocks
@@ -528,6 +549,13 @@
 #define EXT4_FREE_BLOCKS_FORGET		0x0002
 #define EXT4_FREE_BLOCKS_VALIDATED	0x0004
 #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE	0x0008
+#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER	0x0010
+#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER	0x0020
+
+/*
+ * Flags used by ext4_discard_partial_page_buffers
+ */
+#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED	0x0001
 
 /*
  * ioctl commands
@@ -538,9 +566,6 @@
 #define	EXT4_IOC_SETVERSION		_IOW('f', 4, long)
 #define	EXT4_IOC_GETVERSION_OLD		FS_IOC_GETVERSION
 #define	EXT4_IOC_SETVERSION_OLD		FS_IOC_SETVERSION
-#ifdef CONFIG_JBD2_DEBUG
-#define EXT4_IOC_WAIT_FOR_READONLY	_IOR('f', 99, long)
-#endif
 #define EXT4_IOC_GETRSVSZ		_IOR('f', 5, long)
 #define EXT4_IOC_SETRSVSZ		_IOW('f', 6, long)
 #define EXT4_IOC_GROUP_EXTEND		_IOW('f', 7, unsigned long)
@@ -563,9 +588,6 @@
 #define EXT4_IOC32_SETRSVSZ		_IOW('f', 6, int)
 #define EXT4_IOC32_GROUP_EXTEND		_IOW('f', 7, unsigned int)
 #define EXT4_IOC32_GROUP_ADD		_IOW('f', 8, struct compat_ext4_new_group_input)
-#ifdef CONFIG_JBD2_DEBUG
-#define EXT4_IOC32_WAIT_FOR_READONLY	_IOR('f', 99, int)
-#endif
 #define EXT4_IOC32_GETVERSION_OLD	FS_IOC32_GETVERSION
 #define EXT4_IOC32_SETVERSION_OLD	FS_IOC32_SETVERSION
 #endif
@@ -837,6 +859,7 @@
 	ext4_group_t	i_last_alloc_group;
 
 	/* allocation reservation info for delalloc */
+	/* In case of bigalloc, these refer to clusters rather than blocks */
 	unsigned int i_reserved_data_blocks;
 	unsigned int i_reserved_meta_blocks;
 	unsigned int i_allocated_meta_blocks;
@@ -886,7 +909,6 @@
 /*
  * Mount flags
  */
-#define EXT4_MOUNT_OLDALLOC		0x00002  /* Don't use the new Orlov allocator */
 #define EXT4_MOUNT_GRPID		0x00004	/* Create files with directory's group */
 #define EXT4_MOUNT_DEBUG		0x00008	/* Some debugging messages */
 #define EXT4_MOUNT_ERRORS_CONT		0x00010	/* Continue on errors */
@@ -918,6 +940,9 @@
 #define EXT4_MOUNT_DISCARD		0x40000000 /* Issue DISCARD requests */
 #define EXT4_MOUNT_INIT_INODE_TABLE	0x80000000 /* Initialize uninitialized itables */
 
+#define EXT4_MOUNT2_EXPLICIT_DELALLOC	0x00000001 /* User explicitly
+						      specified delalloc */
+
 #define clear_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt &= \
 						~EXT4_MOUNT_##opt
 #define set_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt |= \
@@ -968,9 +993,9 @@
 /*10*/	__le32	s_free_inodes_count;	/* Free inodes count */
 	__le32	s_first_data_block;	/* First Data Block */
 	__le32	s_log_block_size;	/* Block size */
-	__le32	s_obso_log_frag_size;	/* Obsoleted fragment size */
+	__le32	s_log_cluster_size;	/* Allocation cluster size */
 /*20*/	__le32	s_blocks_per_group;	/* # Blocks per group */
-	__le32	s_obso_frags_per_group;	/* Obsoleted fragments per group */
+	__le32	s_clusters_per_group;	/* # Clusters per group */
 	__le32	s_inodes_per_group;	/* # Inodes per group */
 	__le32	s_mtime;		/* Mount time */
 /*30*/	__le32	s_wtime;		/* Write time */
@@ -1066,7 +1091,10 @@
 	__u8	s_last_error_func[32];	/* function where the error happened */
 #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
 	__u8	s_mount_opts[64];
-	__le32	s_reserved[112];        /* Padding to the end of the block */
+	__le32	s_usr_quota_inum;	/* inode for tracking user quota */
+	__le32	s_grp_quota_inum;	/* inode for tracking group quota */
+	__le32	s_overhead_clusters;	/* overhead blocks/clusters in fs */
+	__le32  s_reserved[109];        /* Padding to the end of the block */
 };
 
 #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@ -1086,6 +1114,7 @@
 	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
 	unsigned long s_inodes_per_block;/* Number of inodes per block */
 	unsigned long s_blocks_per_group;/* Number of blocks in a group */
+	unsigned long s_clusters_per_group; /* Number of clusters in a group */
 	unsigned long s_inodes_per_group;/* Number of inodes in a group */
 	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
 	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
@@ -1094,6 +1123,8 @@
 	ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
 	unsigned long s_overhead_last;  /* Last calculated overhead */
 	unsigned long s_blocks_last;    /* Last seen block count */
+	unsigned int s_cluster_ratio;	/* Number of blocks per cluster */
+	unsigned int s_cluster_bits;	/* log2 of s_cluster_ratio */
 	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
 	struct buffer_head * s_sbh;	/* Buffer containing the super block */
 	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
@@ -1117,10 +1148,10 @@
 	u32 s_hash_seed[4];
 	int s_def_hash_version;
 	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
-	struct percpu_counter s_freeblocks_counter;
+	struct percpu_counter s_freeclusters_counter;
 	struct percpu_counter s_freeinodes_counter;
 	struct percpu_counter s_dirs_counter;
-	struct percpu_counter s_dirtyblocks_counter;
+	struct percpu_counter s_dirtyclusters_counter;
 	struct blockgroup_lock *s_blockgroup_lock;
 	struct proc_dir_entry *s_proc;
 	struct kobject s_kobj;
@@ -1136,10 +1167,6 @@
 	u32 s_max_batch_time;
 	u32 s_min_batch_time;
 	struct block_device *journal_bdev;
-#ifdef CONFIG_JBD2_DEBUG
-	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
-	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
-#endif
 #ifdef CONFIG_QUOTA
 	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
 	int s_jquota_fmt;			/* Format of quota to use */
@@ -1248,6 +1275,15 @@
 		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
 
+static inline void ext4_set_io_unwritten_flag(struct inode *inode,
+					      struct ext4_io_end *io_end)
+{
+	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+		io_end->flag |= EXT4_IO_END_UNWRITTEN;
+		atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+	}
+}
+
 /*
  * Inode dynamic state flags
  */
@@ -1360,6 +1396,7 @@
 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK	0x0020
 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE	0x0040
 #define EXT4_FEATURE_RO_COMPAT_QUOTA		0x0100
+#define EXT4_FEATURE_RO_COMPAT_BIGALLOC		0x0200
 
 #define EXT4_FEATURE_INCOMPAT_COMPRESSION	0x0001
 #define EXT4_FEATURE_INCOMPAT_FILETYPE		0x0002
@@ -1402,7 +1439,8 @@
 					 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
 					 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
 					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
-					 EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
+					 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
+					 EXT4_FEATURE_RO_COMPAT_BIGALLOC)
 
 /*
  * Default values for user and/or group using reserved blocks
@@ -1735,9 +1773,9 @@
 					 unsigned int flags,
 					 unsigned long *count,
 					 int *errp);
-extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
-				  s64 nblocks, unsigned int flags);
-extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
+extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
+				    s64 nclusters, unsigned int flags);
+extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
 extern void ext4_check_blocks_bitmap(struct super_block *);
 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 						    ext4_group_t block_group,
@@ -1745,12 +1783,18 @@
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
 struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
 				      ext4_group_t block_group);
-extern unsigned ext4_init_block_bitmap(struct super_block *sb,
-				       struct buffer_head *bh,
-				       ext4_group_t group,
-				       struct ext4_group_desc *desc);
-#define ext4_free_blocks_after_init(sb, group, desc)			\
-		ext4_init_block_bitmap(sb, NULL, group, desc)
+extern void ext4_init_block_bitmap(struct super_block *sb,
+				   struct buffer_head *bh,
+				   ext4_group_t group,
+				   struct ext4_group_desc *desc);
+extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
+					      ext4_group_t block_group,
+					      struct ext4_group_desc *gdp);
+extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+					    ext4_group_t block_group);
+extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
+					   ext4_group_t block_group,
+					   struct ext4_group_desc *gdp);
 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
 /* dir.c */
@@ -1776,7 +1820,8 @@
 
 /* ialloc.c */
 extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
-				    const struct qstr *qstr, __u32 goal);
+				    const struct qstr *qstr, __u32 goal,
+				    uid_t *owner);
 extern void ext4_free_inode(handle_t *, struct inode *);
 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1839,6 +1884,12 @@
 		struct address_space *mapping, loff_t from);
 extern int ext4_block_zero_page_range(handle_t *handle,
 		struct address_space *mapping, loff_t from, loff_t length);
+extern int ext4_discard_partial_page_buffers(handle_t *handle,
+		struct address_space *mapping, loff_t from,
+		loff_t length, int flags);
+extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+		struct inode *inode, struct page *page, loff_t from,
+		loff_t length, int flags);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1927,8 +1978,8 @@
 				      struct ext4_group_desc *bg);
 extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 				     struct ext4_group_desc *bg);
-extern __u32 ext4_free_blks_count(struct super_block *sb,
-				struct ext4_group_desc *bg);
+extern __u32 ext4_free_group_clusters(struct super_block *sb,
+				      struct ext4_group_desc *bg);
 extern __u32 ext4_free_inodes_count(struct super_block *sb,
 				 struct ext4_group_desc *bg);
 extern __u32 ext4_used_dirs_count(struct super_block *sb,
@@ -1941,8 +1992,9 @@
 				  struct ext4_group_desc *bg, ext4_fsblk_t blk);
 extern void ext4_inode_table_set(struct super_block *sb,
 				 struct ext4_group_desc *bg, ext4_fsblk_t blk);
-extern void ext4_free_blks_set(struct super_block *sb,
-			       struct ext4_group_desc *bg, __u32 count);
+extern void ext4_free_group_clusters_set(struct super_block *sb,
+					 struct ext4_group_desc *bg,
+					 __u32 count);
 extern void ext4_free_inodes_set(struct super_block *sb,
 				struct ext4_group_desc *bg, __u32 count);
 extern void ext4_used_dirs_set(struct super_block *sb,
@@ -2051,13 +2103,13 @@
 } while (0)
 
 #ifdef CONFIG_SMP
-/* Each CPU can accumulate percpu_counter_batch blocks in their local
- * counters. So we need to make sure we have free blocks more
+/* Each CPU can accumulate percpu_counter_batch clusters in their local
+ * counters. So we need to make sure we have free clusters more
  * than percpu_counter_batch  * nr_cpu_ids. Also add a window of 4 times.
  */
-#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
+#define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
 #else
-#define EXT4_FREEBLOCKS_WATERMARK 0
+#define EXT4_FREECLUSTERS_WATERMARK 0
 #endif
 
 static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
@@ -2243,10 +2295,19 @@
 enum ext4_state_bits {
 	BH_Uninit	/* blocks are allocated but uninitialized on disk */
 	  = BH_JBDPrivateStart,
+	BH_AllocFromCluster,	/* allocated blocks were part of already
+				 * allocated cluster. Note that this flag will
+				 * never, ever appear in a buffer_head's state
+				 * flag. See EXT4_MAP_FROM_CLUSTER to see where
+				 * this is used. */
+	BH_Da_Mapped,	/* Delayed allocated block that now has a mapping. This
+			 * flag is set when ext4_map_blocks is called on a
+			 * delayed allocated block to get its real mapping. */
 };
 
 BUFFER_FNS(Uninit, uninit)
 TAS_BUFFER_FNS(Uninit, uninit)
+BUFFER_FNS(Da_Mapped, da_mapped)
 
 /*
  * Add new method to test wether block and inode bitmaps are properly
@@ -2282,4 +2343,6 @@
 
 #endif	/* __KERNEL__ */
 
+#include "ext4_extents.h"
+
 #endif	/* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 095c36f..a52db3a 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -290,5 +290,7 @@
 							struct ext4_ext_path *);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern int ext4_ext_check_inode(struct inode *inode);
+extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
+				      int search_hint_reverse);
 #endif /* _EXT4_EXTENTS */
 
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index f5240aa..aca1790 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -109,9 +109,11 @@
 
 	if (ext4_handle_valid(handle)) {
 		err = jbd2_journal_dirty_metadata(handle, bh);
-		if (err)
-			ext4_journal_abort_handle(where, line, __func__,
-						  bh, handle, err);
+		if (err) {
+			/* Errors can only happen if there is a bug */
+			handle->h_err = err;
+			__ext4_journal_stop(where, line, handle);
+		}
 	} else {
 		if (inode)
 			mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 57cf568..61fa9e1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -42,7 +42,6 @@
 #include <asm/uaccess.h>
 #include <linux/fiemap.h>
 #include "ext4_jbd2.h"
-#include "ext4_extents.h"
 
 #include <trace/events/ext4.h>
 
@@ -96,13 +95,17 @@
  *  - ENOMEM
  *  - EIO
  */
-static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
-				struct ext4_ext_path *path)
+#define ext4_ext_dirty(handle, inode, path) \
+		__ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
+static int __ext4_ext_dirty(const char *where, unsigned int line,
+			    handle_t *handle, struct inode *inode,
+			    struct ext4_ext_path *path)
 {
 	int err;
 	if (path->p_bh) {
 		/* path points to block */
-		err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
+		err = __ext4_handle_dirty_metadata(where, line, handle,
+						   inode, path->p_bh);
 	} else {
 		/* path points to leaf/index in inode body */
 		err = ext4_mark_inode_dirty(handle, inode);
@@ -114,11 +117,9 @@
 			      struct ext4_ext_path *path,
 			      ext4_lblk_t block)
 {
-	int depth;
-
 	if (path) {
+		int depth = path->p_depth;
 		struct ext4_extent *ex;
-		depth = path->p_depth;
 
 		/*
 		 * Try to predict block placement assuming that we are
@@ -180,12 +181,10 @@
 
 	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
 			/ sizeof(struct ext4_extent);
-	if (!check) {
 #ifdef AGGRESSIVE_TEST
-		if (size > 6)
-			size = 6;
+	if (!check && size > 6)
+		size = 6;
 #endif
-	}
 	return size;
 }
 
@@ -195,12 +194,10 @@
 
 	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
 			/ sizeof(struct ext4_extent_idx);
-	if (!check) {
 #ifdef AGGRESSIVE_TEST
-		if (size > 5)
-			size = 5;
+	if (!check && size > 5)
+		size = 5;
 #endif
-	}
 	return size;
 }
 
@@ -211,12 +208,10 @@
 	size = sizeof(EXT4_I(inode)->i_data);
 	size -= sizeof(struct ext4_extent_header);
 	size /= sizeof(struct ext4_extent);
-	if (!check) {
 #ifdef AGGRESSIVE_TEST
-		if (size > 3)
-			size = 3;
+	if (!check && size > 3)
+		size = 3;
 #endif
-	}
 	return size;
 }
 
@@ -227,12 +222,10 @@
 	size = sizeof(EXT4_I(inode)->i_data);
 	size -= sizeof(struct ext4_extent_header);
 	size /= sizeof(struct ext4_extent_idx);
-	if (!check) {
 #ifdef AGGRESSIVE_TEST
-		if (size > 4)
-			size = 4;
+	if (!check && size > 4)
+		size = 4;
 #endif
-	}
 	return size;
 }
 
@@ -244,7 +237,7 @@
 int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
-	int idxs, num = 0;
+	int idxs;
 
 	idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
 		/ sizeof(struct ext4_extent_idx));
@@ -259,6 +252,8 @@
 	 */
 	if (ei->i_da_metadata_calc_len &&
 	    ei->i_da_metadata_calc_last_lblock+1 == lblock) {
+		int num = 0;
+
 		if ((ei->i_da_metadata_calc_len % idxs) == 0)
 			num++;
 		if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
@@ -321,8 +316,6 @@
 				struct ext4_extent_header *eh,
 				int depth)
 {
-	struct ext4_extent *ext;
-	struct ext4_extent_idx *ext_idx;
 	unsigned short entries;
 	if (eh->eh_entries == 0)
 		return 1;
@@ -331,7 +324,7 @@
 
 	if (depth == 0) {
 		/* leaf entries */
-		ext = EXT_FIRST_EXTENT(eh);
+		struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
 		while (entries) {
 			if (!ext4_valid_extent(inode, ext))
 				return 0;
@@ -339,7 +332,7 @@
 			entries--;
 		}
 	} else {
-		ext_idx = EXT_FIRST_INDEX(eh);
+		struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
 		while (entries) {
 			if (!ext4_valid_extent_idx(inode, ext_idx))
 				return 0;
@@ -751,31 +744,30 @@
 		return -EIO;
 	}
 
-	len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
 	if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
 		/* insert after */
-		if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
-			len = (len - 1) * sizeof(struct ext4_extent_idx);
-			len = len < 0 ? 0 : len;
-			ext_debug("insert new index %d after: %llu. "
-					"move %d from 0x%p to 0x%p\n",
-					logical, ptr, len,
-					(curp->p_idx + 1), (curp->p_idx + 2));
-			memmove(curp->p_idx + 2, curp->p_idx + 1, len);
-		}
+		ext_debug("insert new index %d after: %llu\n", logical, ptr);
 		ix = curp->p_idx + 1;
 	} else {
 		/* insert before */
-		len = len * sizeof(struct ext4_extent_idx);
-		len = len < 0 ? 0 : len;
-		ext_debug("insert new index %d before: %llu. "
-				"move %d from 0x%p to 0x%p\n",
-				logical, ptr, len,
-				curp->p_idx, (curp->p_idx + 1));
-		memmove(curp->p_idx + 1, curp->p_idx, len);
+		ext_debug("insert new index %d before: %llu\n", logical, ptr);
 		ix = curp->p_idx;
 	}
 
+	len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
+	BUG_ON(len < 0);
+	if (len > 0) {
+		ext_debug("insert new index %d: "
+				"move %d indices from 0x%p to 0x%p\n",
+				logical, len, ix, ix + 1);
+		memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
+	}
+
+	if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
+		EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
+		return -EIO;
+	}
+
 	ix->ei_block = cpu_to_le32(logical);
 	ext4_idx_store_pblock(ix, ptr);
 	le16_add_cpu(&curp->p_hdr->eh_entries, 1);
@@ -1042,16 +1034,14 @@
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 				 unsigned int flags,
-				 struct ext4_ext_path *path,
 				 struct ext4_extent *newext)
 {
-	struct ext4_ext_path *curp = path;
 	struct ext4_extent_header *neh;
 	struct buffer_head *bh;
 	ext4_fsblk_t newblock;
 	int err = 0;
 
-	newblock = ext4_ext_new_meta_block(handle, inode, path,
+	newblock = ext4_ext_new_meta_block(handle, inode, NULL,
 		newext, &err, flags);
 	if (newblock == 0)
 		return err;
@@ -1071,7 +1061,8 @@
 	}
 
 	/* move top-level index/leaf into new block */
-	memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data));
+	memmove(bh->b_data, EXT4_I(inode)->i_data,
+		sizeof(EXT4_I(inode)->i_data));
 
 	/* set size of new block */
 	neh = ext_block_hdr(bh);
@@ -1089,32 +1080,23 @@
 	if (err)
 		goto out;
 
-	/* create index in new top-level index: num,max,pointer */
-	err = ext4_ext_get_access(handle, inode, curp);
-	if (err)
-		goto out;
-
-	curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
-	curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
-	curp->p_hdr->eh_entries = cpu_to_le16(1);
-	curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
-
-	if (path[0].p_hdr->eh_depth)
-		curp->p_idx->ei_block =
-			EXT_FIRST_INDEX(path[0].p_hdr)->ei_block;
-	else
-		curp->p_idx->ei_block =
-			EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
-	ext4_idx_store_pblock(curp->p_idx, newblock);
-
+	/* Update top-level index: num,max,pointer */
 	neh = ext_inode_hdr(inode);
+	neh->eh_entries = cpu_to_le16(1);
+	ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
+	if (neh->eh_depth == 0) {
+		/* Root extent block becomes index block */
+		neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
+		EXT_FIRST_INDEX(neh)->ei_block =
+			EXT_FIRST_EXTENT(neh)->ee_block;
+	}
 	ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
 		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
 		  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
 		  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
 
-	neh->eh_depth = cpu_to_le16(path->p_depth + 1);
-	err = ext4_ext_dirty(handle, inode, curp);
+	neh->eh_depth = cpu_to_le16(neh->eh_depth + 1);
+	ext4_mark_inode_dirty(handle, inode);
 out:
 	brelse(bh);
 
@@ -1162,8 +1144,7 @@
 			err = PTR_ERR(path);
 	} else {
 		/* tree is full, time to grow in depth */
-		err = ext4_ext_grow_indepth(handle, inode, flags,
-					    path, newext);
+		err = ext4_ext_grow_indepth(handle, inode, flags, newext);
 		if (err)
 			goto out;
 
@@ -1235,9 +1216,9 @@
 			if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
 				EXT4_ERROR_INODE(inode,
 				  "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
-				  ix != NULL ? ix->ei_block : 0,
+				  ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
 				  EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
-				    EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
+		le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
 				  depth);
 				return -EIO;
 			}
@@ -1260,13 +1241,14 @@
 /*
  * search the closest allocated block to the right for *logical
  * and returns it at @logical + it's physical address at @phys
- * if *logical is the smallest allocated block, the function
+ * if *logical is the largest allocated block, the function
  * returns 0 at @phys
  * return value contains 0 (success) or error code
  */
 static int ext4_ext_search_right(struct inode *inode,
 				 struct ext4_ext_path *path,
-				 ext4_lblk_t *logical, ext4_fsblk_t *phys)
+				 ext4_lblk_t *logical, ext4_fsblk_t *phys,
+				 struct ext4_extent **ret_ex)
 {
 	struct buffer_head *bh = NULL;
 	struct ext4_extent_header *eh;
@@ -1308,9 +1290,7 @@
 				return -EIO;
 			}
 		}
-		*logical = le32_to_cpu(ex->ee_block);
-		*phys = ext4_ext_pblock(ex);
-		return 0;
+		goto found_extent;
 	}
 
 	if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
@@ -1323,9 +1303,7 @@
 	if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
 		/* next allocated block in this leaf */
 		ex++;
-		*logical = le32_to_cpu(ex->ee_block);
-		*phys = ext4_ext_pblock(ex);
-		return 0;
+		goto found_extent;
 	}
 
 	/* go up and search for index to the right */
@@ -1368,9 +1346,12 @@
 		return -EIO;
 	}
 	ex = EXT_FIRST_EXTENT(eh);
+found_extent:
 	*logical = le32_to_cpu(ex->ee_block);
 	*phys = ext4_ext_pblock(ex);
-	put_bh(bh);
+	*ret_ex = ex;
+	if (bh)
+		put_bh(bh);
 	return 0;
 }
 
@@ -1395,7 +1376,8 @@
 	while (depth >= 0) {
 		if (depth == path->p_depth) {
 			/* leaf */
-			if (path[depth].p_ext !=
+			if (path[depth].p_ext &&
+				path[depth].p_ext !=
 					EXT_LAST_EXTENT(path[depth].p_hdr))
 			  return le32_to_cpu(path[depth].p_ext[1].ee_block);
 		} else {
@@ -1623,7 +1605,8 @@
  * such that there will be no overlap, and then returns 1.
  * If there is no overlap found, it returns 0.
  */
-static unsigned int ext4_ext_check_overlap(struct inode *inode,
+static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
+					   struct inode *inode,
 					   struct ext4_extent *newext,
 					   struct ext4_ext_path *path)
 {
@@ -1637,6 +1620,7 @@
 	if (!path[depth].p_ext)
 		goto out;
 	b2 = le32_to_cpu(path[depth].p_ext->ee_block);
+	b2 &= ~(sbi->s_cluster_ratio - 1);
 
 	/*
 	 * get the next allocated block if the extent in the path
@@ -1646,6 +1630,7 @@
 		b2 = ext4_ext_next_allocated_block(path);
 		if (b2 == EXT_MAX_BLOCKS)
 			goto out;
+		b2 &= ~(sbi->s_cluster_ratio - 1);
 	}
 
 	/* check for wrap through zero on extent logical start block*/
@@ -1697,7 +1682,7 @@
 	/* try to insert block into found extent and return */
 	if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
 		&& ext4_can_extents_be_merged(inode, ex, newext)) {
-		ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
+		ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n",
 			  ext4_ext_is_uninitialized(newext),
 			  ext4_ext_get_actual_len(newext),
 			  le32_to_cpu(ex->ee_block),
@@ -1735,7 +1720,7 @@
 	if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
 		next = ext4_ext_next_leaf_block(path);
 	if (next != EXT_MAX_BLOCKS) {
-		ext_debug("next leaf block - %d\n", next);
+		ext_debug("next leaf block - %u\n", next);
 		BUG_ON(npath != NULL);
 		npath = ext4_ext_find_extent(inode, next, NULL);
 		if (IS_ERR(npath))
@@ -1773,46 +1758,51 @@
 
 	if (!nearex) {
 		/* there is no extent in this leaf, create first one */
-		ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
+		ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
 				le32_to_cpu(newext->ee_block),
 				ext4_ext_pblock(newext),
 				ext4_ext_is_uninitialized(newext),
 				ext4_ext_get_actual_len(newext));
-		path[depth].p_ext = EXT_FIRST_EXTENT(eh);
-	} else if (le32_to_cpu(newext->ee_block)
+		nearex = EXT_FIRST_EXTENT(eh);
+	} else {
+		if (le32_to_cpu(newext->ee_block)
 			   > le32_to_cpu(nearex->ee_block)) {
-/*		BUG_ON(newext->ee_block == nearex->ee_block); */
-		if (nearex != EXT_LAST_EXTENT(eh)) {
-			len = EXT_MAX_EXTENT(eh) - nearex;
-			len = (len - 1) * sizeof(struct ext4_extent);
-			len = len < 0 ? 0 : len;
-			ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
-					"move %d from 0x%p to 0x%p\n",
+			/* Insert after */
+			ext_debug("insert %u:%llu:[%d]%d before: "
+					"nearest %p\n",
 					le32_to_cpu(newext->ee_block),
 					ext4_ext_pblock(newext),
 					ext4_ext_is_uninitialized(newext),
 					ext4_ext_get_actual_len(newext),
-					nearex, len, nearex + 1, nearex + 2);
-			memmove(nearex + 2, nearex + 1, len);
+					nearex);
+			nearex++;
+		} else {
+			/* Insert before */
+			BUG_ON(newext->ee_block == nearex->ee_block);
+			ext_debug("insert %u:%llu:[%d]%d after: "
+					"nearest %p\n",
+					le32_to_cpu(newext->ee_block),
+					ext4_ext_pblock(newext),
+					ext4_ext_is_uninitialized(newext),
+					ext4_ext_get_actual_len(newext),
+					nearex);
 		}
-		path[depth].p_ext = nearex + 1;
-	} else {
-		BUG_ON(newext->ee_block == nearex->ee_block);
-		len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
-		len = len < 0 ? 0 : len;
-		ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
-				"move %d from 0x%p to 0x%p\n",
-				le32_to_cpu(newext->ee_block),
-				ext4_ext_pblock(newext),
-				ext4_ext_is_uninitialized(newext),
-				ext4_ext_get_actual_len(newext),
-				nearex, len, nearex, nearex + 1);
-		memmove(nearex + 1, nearex, len);
-		path[depth].p_ext = nearex;
+		len = EXT_LAST_EXTENT(eh) - nearex + 1;
+		if (len > 0) {
+			ext_debug("insert %u:%llu:[%d]%d: "
+					"move %d extents from 0x%p to 0x%p\n",
+					le32_to_cpu(newext->ee_block),
+					ext4_ext_pblock(newext),
+					ext4_ext_is_uninitialized(newext),
+					ext4_ext_get_actual_len(newext),
+					len, nearex, nearex + 1);
+			memmove(nearex + 1, nearex,
+				len * sizeof(struct ext4_extent));
+		}
 	}
 
 	le16_add_cpu(&eh->eh_entries, 1);
-	nearex = path[depth].p_ext;
+	path[depth].p_ext = nearex;
 	nearex->ee_block = newext->ee_block;
 	ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
 	nearex->ee_len = newext->ee_len;
@@ -1962,6 +1952,7 @@
 	struct ext4_ext_cache *cex;
 	BUG_ON(len == 0);
 	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+	trace_ext4_ext_put_in_cache(inode, block, len, start);
 	cex = &EXT4_I(inode)->i_cached_extent;
 	cex->ec_block = block;
 	cex->ec_len = len;
@@ -2063,6 +2054,7 @@
 		sbi->extent_cache_misses++;
 	else
 		sbi->extent_cache_hits++;
+	trace_ext4_ext_in_cache(inode, block, ret);
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 	return ret;
 }
@@ -2130,6 +2122,8 @@
 	if (err)
 		return err;
 	ext_debug("index is empty, remove it, free block %llu\n", leaf);
+	trace_ext4_ext_rm_idx(inode, leaf);
+
 	ext4_free_blocks(handle, inode, NULL, leaf, 1,
 			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
 	return err;
@@ -2158,7 +2152,7 @@
 			 *  need to account for leaf block credit
 			 *
 			 *  bitmaps and block group descriptor blocks
-			 *  and other metadat blocks still need to be
+			 *  and other metadata blocks still need to be
 			 *  accounted.
 			 */
 			/* 1 bitmap, 1 block group descriptor */
@@ -2195,14 +2189,40 @@
 }
 
 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
-				struct ext4_extent *ex,
-				ext4_lblk_t from, ext4_lblk_t to)
+			      struct ext4_extent *ex,
+			      ext4_fsblk_t *partial_cluster,
+			      ext4_lblk_t from, ext4_lblk_t to)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
+	ext4_fsblk_t pblk;
 	int flags = EXT4_FREE_BLOCKS_FORGET;
 
 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
 		flags |= EXT4_FREE_BLOCKS_METADATA;
+	/*
+	 * For bigalloc file systems, we never free a partial cluster
+	 * at the beginning of the extent.  Instead, we make a note
+	 * that we tried freeing the cluster, and check to see if we
+	 * need to free it on a subsequent call to ext4_remove_blocks,
+	 * or at the end of the ext4_truncate() operation.
+	 */
+	flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
+
+	trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);
+	/*
+	 * If we have a partial cluster, and it's different from the
+	 * cluster of the last block, we need to explicitly free the
+	 * partial cluster here.
+	 */
+	pblk = ext4_ext_pblock(ex) + ee_len - 1;
+	if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
+		ext4_free_blocks(handle, inode, NULL,
+				 EXT4_C2B(sbi, *partial_cluster),
+				 sbi->s_cluster_ratio, flags);
+		*partial_cluster = 0;
+	}
+
 #ifdef EXTENTS_STATS
 	{
 		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2222,12 +2242,24 @@
 	    && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
 		/* tail removal */
 		ext4_lblk_t num;
-		ext4_fsblk_t start;
 
 		num = le32_to_cpu(ex->ee_block) + ee_len - from;
-		start = ext4_ext_pblock(ex) + ee_len - num;
-		ext_debug("free last %u blocks starting %llu\n", num, start);
-		ext4_free_blocks(handle, inode, NULL, start, num, flags);
+		pblk = ext4_ext_pblock(ex) + ee_len - num;
+		ext_debug("free last %u blocks starting %llu\n", num, pblk);
+		ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
+		/*
+		 * If the block range to be freed didn't start at the
+		 * beginning of a cluster, and we removed the entire
+		 * extent, save the partial cluster here, since we
+		 * might need to delete if we determine that the
+		 * truncate operation has removed all of the blocks in
+		 * the cluster.
+		 */
+		if (pblk & (sbi->s_cluster_ratio - 1) &&
+		    (ee_len == num))
+			*partial_cluster = EXT4_B2C(sbi, pblk);
+		else
+			*partial_cluster = 0;
 	} else if (from == le32_to_cpu(ex->ee_block)
 		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
 		/* head removal */
@@ -2238,7 +2270,7 @@
 		start = ext4_ext_pblock(ex);
 
 		ext_debug("free first %u blocks starting %llu\n", num, start);
-		ext4_free_blocks(handle, inode, 0, start, num, flags);
+		ext4_free_blocks(handle, inode, NULL, start, num, flags);
 
 	} else {
 		printk(KERN_INFO "strange request: removal(2) "
@@ -2262,19 +2294,19 @@
  */
 static int
 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
-		struct ext4_ext_path *path, ext4_lblk_t start,
-		ext4_lblk_t end)
+		 struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster,
+		 ext4_lblk_t start, ext4_lblk_t end)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	int err = 0, correct_index = 0;
 	int depth = ext_depth(inode), credits;
 	struct ext4_extent_header *eh;
-	ext4_lblk_t a, b, block;
+	ext4_lblk_t a, b;
 	unsigned num;
 	ext4_lblk_t ex_ee_block;
 	unsigned short ex_ee_len;
 	unsigned uninitialized = 0;
 	struct ext4_extent *ex;
-	struct ext4_map_blocks map;
 
 	/* the header must be checked already in ext4_ext_remove_space() */
 	ext_debug("truncate since %u in leaf\n", start);
@@ -2291,6 +2323,8 @@
 	ex_ee_block = le32_to_cpu(ex->ee_block);
 	ex_ee_len = ext4_ext_get_actual_len(ex);
 
+	trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
+
 	while (ex >= EXT_FIRST_EXTENT(eh) &&
 			ex_ee_block + ex_ee_len > start) {
 
@@ -2315,86 +2349,18 @@
 			ex_ee_block = le32_to_cpu(ex->ee_block);
 			ex_ee_len = ext4_ext_get_actual_len(ex);
 			continue;
-		} else if (a != ex_ee_block &&
-			b != ex_ee_block + ex_ee_len - 1) {
-			/*
-			 * If this is a truncate, then this condition should
-			 * never happen because at least one of the end points
-			 * needs to be on the edge of the extent.
-			 */
-			if (end == EXT_MAX_BLOCKS - 1) {
-				ext_debug("  bad truncate %u:%u\n",
-						start, end);
-				block = 0;
-				num = 0;
-				err = -EIO;
-				goto out;
-			}
-			/*
-			 * else this is a hole punch, so the extent needs to
-			 * be split since neither edge of the hole is on the
-			 * extent edge
-			 */
-			else{
-				map.m_pblk = ext4_ext_pblock(ex);
-				map.m_lblk = ex_ee_block;
-				map.m_len = b - ex_ee_block;
-
-				err = ext4_split_extent(handle,
-					inode, path, &map, 0,
-					EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
-					EXT4_GET_BLOCKS_PRE_IO);
-
-				if (err < 0)
-					goto out;
-
-				ex_ee_len = ext4_ext_get_actual_len(ex);
-
-				b = ex_ee_block+ex_ee_len - 1 < end ?
-					ex_ee_block+ex_ee_len - 1 : end;
-
-				/* Then remove tail of this extent */
-				block = ex_ee_block;
-				num = a - block;
-			}
+		} else if (b != ex_ee_block + ex_ee_len - 1) {
+			EXT4_ERROR_INODE(inode,"  bad truncate %u:%u\n",
+					 start, end);
+			err = -EIO;
+			goto out;
 		} else if (a != ex_ee_block) {
 			/* remove tail of the extent */
-			block = ex_ee_block;
-			num = a - block;
-		} else if (b != ex_ee_block + ex_ee_len - 1) {
-			/* remove head of the extent */
-			block = b;
-			num =  ex_ee_block + ex_ee_len - b;
-
-			/*
-			 * If this is a truncate, this condition
-			 * should never happen
-			 */
-			if (end == EXT_MAX_BLOCKS - 1) {
-				ext_debug("  bad truncate %u:%u\n",
-					start, end);
-				err = -EIO;
-				goto out;
-			}
+			num = a - ex_ee_block;
 		} else {
 			/* remove whole extent: excellent! */
-			block = ex_ee_block;
 			num = 0;
-			if (a != ex_ee_block) {
-				ext_debug("  bad truncate %u:%u\n",
-					start, end);
-				err = -EIO;
-				goto out;
-			}
-
-			if (b != ex_ee_block + ex_ee_len - 1) {
-				ext_debug("  bad truncate %u:%u\n",
-					start, end);
-				err = -EIO;
-				goto out;
-			}
 		}
-
 		/*
 		 * 3 for leaf, sb, and inode plus 2 (bmap and group
 		 * descriptor) for each block group; assume two block
@@ -2416,23 +2382,15 @@
 		if (err)
 			goto out;
 
-		err = ext4_remove_blocks(handle, inode, ex, a, b);
+		err = ext4_remove_blocks(handle, inode, ex, partial_cluster,
+					 a, b);
 		if (err)
 			goto out;
 
-		if (num == 0) {
+		if (num == 0)
 			/* this extent is removed; mark slot entirely unused */
 			ext4_ext_store_pblock(ex, 0);
-		} else if (block != ex_ee_block) {
-			/*
-			 * If this was a head removal, then we need to update
-			 * the physical block since it is now at a different
-			 * location
-			 */
-			ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
-		}
 
-		ex->ee_block = cpu_to_le32(block);
 		ex->ee_len = cpu_to_le16(num);
 		/*
 		 * Do not mark uninitialized if all the blocks in the
@@ -2440,11 +2398,6 @@
 		 */
 		if (uninitialized && num)
 			ext4_ext_mark_uninitialized(ex);
-
-		err = ext4_ext_dirty(handle, inode, path + depth);
-		if (err)
-			goto out;
-
 		/*
 		 * If the extent was completely released,
 		 * we need to remove it from the leaf
@@ -2464,9 +2417,14 @@
 					sizeof(struct ext4_extent));
 			}
 			le16_add_cpu(&eh->eh_entries, -1);
-		}
+		} else
+			*partial_cluster = 0;
 
-		ext_debug("new extent: %u:%u:%llu\n", block, num,
+		err = ext4_ext_dirty(handle, inode, path + depth);
+		if (err)
+			goto out;
+
+		ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num,
 				ext4_ext_pblock(ex));
 		ex--;
 		ex_ee_block = le32_to_cpu(ex->ee_block);
@@ -2476,6 +2434,25 @@
 	if (correct_index && eh->eh_entries)
 		err = ext4_ext_correct_indexes(handle, inode, path);
 
+	/*
+	 * If there is still a entry in the leaf node, check to see if
+	 * it references the partial cluster.  This is the only place
+	 * where it could; if it doesn't, we can free the cluster.
+	 */
+	if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) &&
+	    (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
+	     *partial_cluster)) {
+		int flags = EXT4_FREE_BLOCKS_FORGET;
+
+		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+			flags |= EXT4_FREE_BLOCKS_METADATA;
+
+		ext4_free_blocks(handle, inode, NULL,
+				 EXT4_C2B(sbi, *partial_cluster),
+				 sbi->s_cluster_ratio, flags);
+		*partial_cluster = 0;
+	}
+
 	/* if this leaf is free, then we should
 	 * remove it from index block above */
 	if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
@@ -2511,6 +2488,7 @@
 	struct super_block *sb = inode->i_sb;
 	int depth = ext_depth(inode);
 	struct ext4_ext_path *path;
+	ext4_fsblk_t partial_cluster = 0;
 	handle_t *handle;
 	int i, err;
 
@@ -2524,6 +2502,8 @@
 again:
 	ext4_ext_invalidate_cache(inode);
 
+	trace_ext4_ext_remove_space(inode, start, depth);
+
 	/*
 	 * We start scanning from right side, freeing all the blocks
 	 * after i_size and walking into the tree depth-wise.
@@ -2546,7 +2526,8 @@
 		if (i == depth) {
 			/* this is leaf block */
 			err = ext4_ext_rm_leaf(handle, inode, path,
-					start, EXT_MAX_BLOCKS - 1);
+					       &partial_cluster, start,
+					       EXT_MAX_BLOCKS - 1);
 			/* root level has p_bh == NULL, brelse() eats this */
 			brelse(path[i].p_bh);
 			path[i].p_bh = NULL;
@@ -2618,6 +2599,24 @@
 		}
 	}
 
+	trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster,
+			path->p_hdr->eh_entries);
+
+	/* If we still have something in the partial cluster and we have removed
+	 * even the first extent, then we should free the blocks in the partial
+	 * cluster as well. */
+	if (partial_cluster && path->p_hdr->eh_entries == 0) {
+		int flags = EXT4_FREE_BLOCKS_FORGET;
+
+		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+			flags |= EXT4_FREE_BLOCKS_METADATA;
+
+		ext4_free_blocks(handle, inode, NULL,
+				 EXT4_C2B(EXT4_SB(sb), partial_cluster),
+				 EXT4_SB(sb)->s_cluster_ratio, flags);
+		partial_cluster = 0;
+	}
+
 	/* TODO: flexible tree reduction should be here */
 	if (path->p_hdr->eh_entries == 0) {
 		/*
@@ -2909,17 +2908,29 @@
  *   a> There is no split required: Entire extent should be initialized
  *   b> Splits in two extents: Write is happening at either end of the extent
  *   c> Splits in three extents: Somone is writing in middle of the extent
+ *
+ * Pre-conditions:
+ *  - The extent pointed to by 'path' is uninitialized.
+ *  - The extent pointed to by 'path' contains a superset
+ *    of the logical span [map->m_lblk, map->m_lblk + map->m_len).
+ *
+ * Post-conditions on success:
+ *  - the returned value is the number of blocks beyond map->l_lblk
+ *    that are allocated and initialized.
+ *    It is guaranteed to be >= map->m_len.
  */
 static int ext4_ext_convert_to_initialized(handle_t *handle,
 					   struct inode *inode,
 					   struct ext4_map_blocks *map,
 					   struct ext4_ext_path *path)
 {
+	struct ext4_extent_header *eh;
 	struct ext4_map_blocks split_map;
 	struct ext4_extent zero_ex;
 	struct ext4_extent *ex;
 	ext4_lblk_t ee_block, eof_block;
-	unsigned int allocated, ee_len, depth;
+	unsigned int ee_len, depth;
+	int allocated;
 	int err = 0;
 	int split_flag = 0;
 
@@ -2933,11 +2944,93 @@
 		eof_block = map->m_lblk + map->m_len;
 
 	depth = ext_depth(inode);
+	eh = path[depth].p_hdr;
 	ex = path[depth].p_ext;
 	ee_block = le32_to_cpu(ex->ee_block);
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (map->m_lblk - ee_block);
 
+	trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
+
+	/* Pre-conditions */
+	BUG_ON(!ext4_ext_is_uninitialized(ex));
+	BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
+	BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len);
+
+	/*
+	 * Attempt to transfer newly initialized blocks from the currently
+	 * uninitialized extent to its left neighbor. This is much cheaper
+	 * than an insertion followed by a merge as those involve costly
+	 * memmove() calls. This is the common case in steady state for
+	 * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append
+	 * writes.
+	 *
+	 * Limitations of the current logic:
+	 *  - L1: we only deal with writes at the start of the extent.
+	 *    The approach could be extended to writes at the end
+	 *    of the extent but this scenario was deemed less common.
+	 *  - L2: we do not deal with writes covering the whole extent.
+	 *    This would require removing the extent if the transfer
+	 *    is possible.
+	 *  - L3: we only attempt to merge with an extent stored in the
+	 *    same extent tree node.
+	 */
+	if ((map->m_lblk == ee_block) &&	/*L1*/
+		(map->m_len < ee_len) &&	/*L2*/
+		(ex > EXT_FIRST_EXTENT(eh))) {	/*L3*/
+		struct ext4_extent *prev_ex;
+		ext4_lblk_t prev_lblk;
+		ext4_fsblk_t prev_pblk, ee_pblk;
+		unsigned int prev_len, write_len;
+
+		prev_ex = ex - 1;
+		prev_lblk = le32_to_cpu(prev_ex->ee_block);
+		prev_len = ext4_ext_get_actual_len(prev_ex);
+		prev_pblk = ext4_ext_pblock(prev_ex);
+		ee_pblk = ext4_ext_pblock(ex);
+		write_len = map->m_len;
+
+		/*
+		 * A transfer of blocks from 'ex' to 'prev_ex' is allowed
+		 * upon those conditions:
+		 * - C1: prev_ex is initialized,
+		 * - C2: prev_ex is logically abutting ex,
+		 * - C3: prev_ex is physically abutting ex,
+		 * - C4: prev_ex can receive the additional blocks without
+		 *   overflowing the (initialized) length limit.
+		 */
+		if ((!ext4_ext_is_uninitialized(prev_ex)) &&		/*C1*/
+			((prev_lblk + prev_len) == ee_block) &&		/*C2*/
+			((prev_pblk + prev_len) == ee_pblk) &&		/*C3*/
+			(prev_len < (EXT_INIT_MAX_LEN - write_len))) {	/*C4*/
+			err = ext4_ext_get_access(handle, inode, path + depth);
+			if (err)
+				goto out;
+
+			trace_ext4_ext_convert_to_initialized_fastpath(inode,
+				map, ex, prev_ex);
+
+			/* Shift the start of ex by 'write_len' blocks */
+			ex->ee_block = cpu_to_le32(ee_block + write_len);
+			ext4_ext_store_pblock(ex, ee_pblk + write_len);
+			ex->ee_len = cpu_to_le16(ee_len - write_len);
+			ext4_ext_mark_uninitialized(ex); /* Restore the flag */
+
+			/* Extend prev_ex by 'write_len' blocks */
+			prev_ex->ee_len = cpu_to_le16(prev_len + write_len);
+
+			/* Mark the block containing both extents as dirty */
+			ext4_ext_dirty(handle, inode, path + depth);
+
+			/* Update path to point to the right extent */
+			path[depth].p_ext = prev_ex;
+
+			/* Result: number of initialized blocks past m_lblk */
+			allocated = write_len;
+			goto out;
+		}
+	}
+
 	WARN_ON(map->m_lblk < ee_block);
 	/*
 	 * It is safe to convert extent to initialized via explicit
@@ -3165,6 +3258,192 @@
 	return ext4_mark_inode_dirty(handle, inode);
 }
 
+/**
+ * ext4_find_delalloc_range: find delayed allocated block in the given range.
+ *
+ * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
+ * whether there are any buffers marked for delayed allocation. It returns '1'
+ * on the first delalloc'ed buffer head found. If no buffer head in the given
+ * range is marked for delalloc, it returns 0.
+ * lblk_start should always be <= lblk_end.
+ * search_hint_reverse is to indicate that searching in reverse from lblk_end to
+ * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
+ * block sooner). This is useful when blocks are truncated sequentially from
+ * lblk_start towards lblk_end.
+ */
+static int ext4_find_delalloc_range(struct inode *inode,
+				    ext4_lblk_t lblk_start,
+				    ext4_lblk_t lblk_end,
+				    int search_hint_reverse)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct buffer_head *head, *bh = NULL;
+	struct page *page;
+	ext4_lblk_t i, pg_lblk;
+	pgoff_t index;
+
+	/* reverse search wont work if fs block size is less than page size */
+	if (inode->i_blkbits < PAGE_CACHE_SHIFT)
+		search_hint_reverse = 0;
+
+	if (search_hint_reverse)
+		i = lblk_end;
+	else
+		i = lblk_start;
+
+	index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+	while ((i >= lblk_start) && (i <= lblk_end)) {
+		page = find_get_page(mapping, index);
+		if (!page)
+			goto nextpage;
+
+		if (!page_has_buffers(page))
+			goto nextpage;
+
+		head = page_buffers(page);
+		if (!head)
+			goto nextpage;
+
+		bh = head;
+		pg_lblk = index << (PAGE_CACHE_SHIFT -
+						inode->i_blkbits);
+		do {
+			if (unlikely(pg_lblk < lblk_start)) {
+				/*
+				 * This is possible when fs block size is less
+				 * than page size and our cluster starts/ends in
+				 * middle of the page. So we need to skip the
+				 * initial few blocks till we reach the 'lblk'
+				 */
+				pg_lblk++;
+				continue;
+			}
+
+			/* Check if the buffer is delayed allocated and that it
+			 * is not yet mapped. (when da-buffers are mapped during
+			 * their writeout, their da_mapped bit is set.)
+			 */
+			if (buffer_delay(bh) && !buffer_da_mapped(bh)) {
+				page_cache_release(page);
+				trace_ext4_find_delalloc_range(inode,
+						lblk_start, lblk_end,
+						search_hint_reverse,
+						1, i);
+				return 1;
+			}
+			if (search_hint_reverse)
+				i--;
+			else
+				i++;
+		} while ((i >= lblk_start) && (i <= lblk_end) &&
+				((bh = bh->b_this_page) != head));
+nextpage:
+		if (page)
+			page_cache_release(page);
+		/*
+		 * Move to next page. 'i' will be the first lblk in the next
+		 * page.
+		 */
+		if (search_hint_reverse)
+			index--;
+		else
+			index++;
+		i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	}
+
+	trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end,
+					search_hint_reverse, 0, 0);
+	return 0;
+}
+
+int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
+			       int search_hint_reverse)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	ext4_lblk_t lblk_start, lblk_end;
+	lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
+	lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
+
+	return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
+					search_hint_reverse);
+}
+
+/**
+ * Determines how many complete clusters (out of those specified by the 'map')
+ * are under delalloc and were reserved quota for.
+ * This function is called when we are writing out the blocks that were
+ * originally written with their allocation delayed, but then the space was
+ * allocated using fallocate() before the delayed allocation could be resolved.
+ * The cases to look for are:
+ * ('=' indicated delayed allocated blocks
+ *  '-' indicates non-delayed allocated blocks)
+ * (a) partial clusters towards beginning and/or end outside of allocated range
+ *     are not delalloc'ed.
+ *	Ex:
+ *	|----c---=|====c====|====c====|===-c----|
+ *	         |++++++ allocated ++++++|
+ *	==> 4 complete clusters in above example
+ *
+ * (b) partial cluster (outside of allocated range) towards either end is
+ *     marked for delayed allocation. In this case, we will exclude that
+ *     cluster.
+ *	Ex:
+ *	|----====c========|========c========|
+ *	     |++++++ allocated ++++++|
+ *	==> 1 complete clusters in above example
+ *
+ *	Ex:
+ *	|================c================|
+ *            |++++++ allocated ++++++|
+ *	==> 0 complete clusters in above example
+ *
+ * The ext4_da_update_reserve_space will be called only if we
+ * determine here that there were some "entire" clusters that span
+ * this 'allocated' range.
+ * In the non-bigalloc case, this function will just end up returning num_blks
+ * without ever calling ext4_find_delalloc_range.
+ */
+static unsigned int
+get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
+			   unsigned int num_blks)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
+	ext4_lblk_t lblk_from, lblk_to, c_offset;
+	unsigned int allocated_clusters = 0;
+
+	alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
+	alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
+
+	/* max possible clusters for this allocation */
+	allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
+
+	trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
+
+	/* Check towards left side */
+	c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
+	if (c_offset) {
+		lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
+		lblk_to = lblk_from + c_offset - 1;
+
+		if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
+			allocated_clusters--;
+	}
+
+	/* Now check towards right. */
+	c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
+	if (allocated_clusters && c_offset) {
+		lblk_from = lblk_start + num_blks;
+		lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
+
+		if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
+			allocated_clusters--;
+	}
+
+	return allocated_clusters;
+}
+
 static int
 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 			struct ext4_map_blocks *map,
@@ -3181,6 +3460,9 @@
 		  flags, allocated);
 	ext4_ext_show_leaf(inode, path);
 
+	trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated,
+						    newblock);
+
 	/* get_block() before submit the IO, split the extent */
 	if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
 		ret = ext4_split_unwritten_extents(handle, inode, map,
@@ -3190,10 +3472,9 @@
 		 * that this IO needs to conversion to written when IO is
 		 * completed
 		 */
-		if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
-			io->flag = EXT4_IO_END_UNWRITTEN;
-			atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
-		} else
+		if (io)
+			ext4_set_io_unwritten_flag(inode, io);
+		else
 			ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
 		if (ext4_should_dioread_nolock(inode))
 			map->m_flags |= EXT4_MAP_UNINIT;
@@ -3234,14 +3515,8 @@
 
 	/* buffered write, writepage time, convert*/
 	ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
-	if (ret >= 0) {
+	if (ret >= 0)
 		ext4_update_inode_fsync_trans(handle, inode, 1);
-		err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
-					 map->m_len);
-		if (err < 0)
-			goto out2;
-	}
-
 out:
 	if (ret <= 0) {
 		err = ret;
@@ -3270,11 +3545,24 @@
 	 * But fallocate would have already updated quota and block
 	 * count for this offset. So cancel these reservation
 	 */
-	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
-		ext4_da_update_reserve_space(inode, allocated, 0);
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
+		unsigned int reserved_clusters;
+		reserved_clusters = get_reserved_cluster_alloc(inode,
+				map->m_lblk, map->m_len);
+		if (reserved_clusters)
+			ext4_da_update_reserve_space(inode,
+						     reserved_clusters,
+						     0);
+	}
 
 map_out:
 	map->m_flags |= EXT4_MAP_MAPPED;
+	if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) {
+		err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
+					 map->m_len);
+		if (err < 0)
+			goto out2;
+	}
 out1:
 	if (allocated > map->m_len)
 		allocated = map->m_len;
@@ -3290,6 +3578,111 @@
 }
 
 /*
+ * get_implied_cluster_alloc - check to see if the requested
+ * allocation (in the map structure) overlaps with a cluster already
+ * allocated in an extent.
+ *	@sb	The filesystem superblock structure
+ *	@map	The requested lblk->pblk mapping
+ *	@ex	The extent structure which might contain an implied
+ *			cluster allocation
+ *
+ * This function is called by ext4_ext_map_blocks() after we failed to
+ * find blocks that were already in the inode's extent tree.  Hence,
+ * we know that the beginning of the requested region cannot overlap
+ * the extent from the inode's extent tree.  There are three cases we
+ * want to catch.  The first is this case:
+ *
+ *		 |--- cluster # N--|
+ *    |--- extent ---|	|---- requested region ---|
+ *			|==========|
+ *
+ * The second case that we need to test for is this one:
+ *
+ *   |--------- cluster # N ----------------|
+ *	   |--- requested region --|   |------- extent ----|
+ *	   |=======================|
+ *
+ * The third case is when the requested region lies between two extents
+ * within the same cluster:
+ *          |------------- cluster # N-------------|
+ * |----- ex -----|                  |---- ex_right ----|
+ *                  |------ requested region ------|
+ *                  |================|
+ *
+ * In each of the above cases, we need to set the map->m_pblk and
+ * map->m_len so it corresponds to the return the extent labelled as
+ * "|====|" from cluster #N, since it is already in use for data in
+ * cluster EXT4_B2C(sbi, map->m_lblk).	We will then return 1 to
+ * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
+ * as a new "allocated" block region.  Otherwise, we will return 0 and
+ * ext4_ext_map_blocks() will then allocate one or more new clusters
+ * by calling ext4_mb_new_blocks().
+ */
+static int get_implied_cluster_alloc(struct super_block *sb,
+				     struct ext4_map_blocks *map,
+				     struct ext4_extent *ex,
+				     struct ext4_ext_path *path)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+	ext4_lblk_t ex_cluster_start, ex_cluster_end;
+	ext4_lblk_t rr_cluster_start, rr_cluster_end;
+	ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
+	ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
+	unsigned short ee_len = ext4_ext_get_actual_len(ex);
+
+	/* The extent passed in that we are trying to match */
+	ex_cluster_start = EXT4_B2C(sbi, ee_block);
+	ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
+
+	/* The requested region passed into ext4_map_blocks() */
+	rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
+	rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
+
+	if ((rr_cluster_start == ex_cluster_end) ||
+	    (rr_cluster_start == ex_cluster_start)) {
+		if (rr_cluster_start == ex_cluster_end)
+			ee_start += ee_len - 1;
+		map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
+			c_offset;
+		map->m_len = min(map->m_len,
+				 (unsigned) sbi->s_cluster_ratio - c_offset);
+		/*
+		 * Check for and handle this case:
+		 *
+		 *   |--------- cluster # N-------------|
+		 *		       |------- extent ----|
+		 *	   |--- requested region ---|
+		 *	   |===========|
+		 */
+
+		if (map->m_lblk < ee_block)
+			map->m_len = min(map->m_len, ee_block - map->m_lblk);
+
+		/*
+		 * Check for the case where there is already another allocated
+		 * block to the right of 'ex' but before the end of the cluster.
+		 *
+		 *          |------------- cluster # N-------------|
+		 * |----- ex -----|                  |---- ex_right ----|
+		 *                  |------ requested region ------|
+		 *                  |================|
+		 */
+		if (map->m_lblk > ee_block) {
+			ext4_lblk_t next = ext4_ext_next_allocated_block(path);
+			map->m_len = min(map->m_len, next - map->m_lblk);
+		}
+
+		trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
+		return 1;
+	}
+
+	trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
+	return 0;
+}
+
+
+/*
  * Block allocation/map/preallocation routine for extents based files
  *
  *
@@ -3311,15 +3704,17 @@
 			struct ext4_map_blocks *map, int flags)
 {
 	struct ext4_ext_path *path = NULL;
-	struct ext4_extent newex, *ex;
+	struct ext4_extent newex, *ex, *ex2;
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	ext4_fsblk_t newblock = 0;
-	int err = 0, depth, ret;
-	unsigned int allocated = 0;
+	int free_on_err = 0, err = 0, depth, ret;
+	unsigned int allocated = 0, offset = 0;
+	unsigned int allocated_clusters = 0;
 	unsigned int punched_out = 0;
 	unsigned int result = 0;
 	struct ext4_allocation_request ar;
 	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
-	struct ext4_map_blocks punch_map;
+	ext4_lblk_t cluster_offset;
 
 	ext_debug("blocks %u/%u requested for inode %lu\n",
 		  map->m_lblk, map->m_len, inode->i_ino);
@@ -3329,6 +3724,10 @@
 	if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
 		ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
 		if (!newex.ee_start_lo && !newex.ee_start_hi) {
+			if ((sbi->s_cluster_ratio > 1) &&
+			    ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
+				map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+
 			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
 				/*
 				 * block isn't allocated yet and
@@ -3339,6 +3738,8 @@
 			/* we should allocate requested block */
 		} else {
 			/* block is already allocated */
+			if (sbi->s_cluster_ratio > 1)
+				map->m_flags |= EXT4_MAP_FROM_CLUSTER;
 			newblock = map->m_lblk
 				   - le32_to_cpu(newex.ee_block)
 				   + ext4_ext_pblock(&newex);
@@ -3384,8 +3785,14 @@
 		 * we split out initialized portions during a write.
 		 */
 		ee_len = ext4_ext_get_actual_len(ex);
+
+		trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
+
 		/* if found extent covers block, simply return it */
 		if (in_range(map->m_lblk, ee_block, ee_len)) {
+			struct ext4_map_blocks punch_map;
+			ext4_fsblk_t partial_cluster = 0;
+
 			newblock = map->m_lblk - ee_block + ee_start;
 			/* number of remaining blocks in the extent */
 			allocated = ee_len - (map->m_lblk - ee_block);
@@ -3469,7 +3876,8 @@
 			ext4_ext_invalidate_cache(inode);
 
 			err = ext4_ext_rm_leaf(handle, inode, path,
-				map->m_lblk, map->m_lblk + punched_out);
+					       &partial_cluster, map->m_lblk,
+					       map->m_lblk + punched_out);
 
 			if (!err && path->p_hdr->eh_entries == 0) {
 				/*
@@ -3492,6 +3900,10 @@
 		}
 	}
 
+	if ((sbi->s_cluster_ratio > 1) &&
+	    ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
+		map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+
 	/*
 	 * requested block isn't allocated yet;
 	 * we couldn't try to create block if create flag is zero
@@ -3504,9 +3916,25 @@
 		ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
 		goto out2;
 	}
+
 	/*
 	 * Okay, we need to do block allocation.
 	 */
+	map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
+	newex.ee_block = cpu_to_le32(map->m_lblk);
+	cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+
+	/*
+	 * If we are doing bigalloc, check to see if the extent returned
+	 * by ext4_ext_find_extent() implies a cluster we can use.
+	 */
+	if (cluster_offset && ex &&
+	    get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
+		ar.len = allocated = map->m_len;
+		newblock = map->m_pblk;
+		map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+		goto got_allocated_blocks;
+	}
 
 	/* find neighbour allocated blocks */
 	ar.lleft = map->m_lblk;
@@ -3514,10 +3942,21 @@
 	if (err)
 		goto out2;
 	ar.lright = map->m_lblk;
-	err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
+	ex2 = NULL;
+	err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
 	if (err)
 		goto out2;
 
+	/* Check if the extent after searching to the right implies a
+	 * cluster we can use. */
+	if ((sbi->s_cluster_ratio > 1) && ex2 &&
+	    get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
+		ar.len = allocated = map->m_len;
+		newblock = map->m_pblk;
+		map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+		goto got_allocated_blocks;
+	}
+
 	/*
 	 * See if request is beyond maximum number of blocks we can have in
 	 * a single extent. For an initialized extent this limit is
@@ -3532,9 +3971,8 @@
 		map->m_len = EXT_UNINIT_MAX_LEN;
 
 	/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
-	newex.ee_block = cpu_to_le32(map->m_lblk);
 	newex.ee_len = cpu_to_le16(map->m_len);
-	err = ext4_ext_check_overlap(inode, &newex, path);
+	err = ext4_ext_check_overlap(sbi, inode, &newex, path);
 	if (err)
 		allocated = ext4_ext_get_actual_len(&newex);
 	else
@@ -3544,7 +3982,18 @@
 	ar.inode = inode;
 	ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
 	ar.logical = map->m_lblk;
-	ar.len = allocated;
+	/*
+	 * We calculate the offset from the beginning of the cluster
+	 * for the logical block number, since when we allocate a
+	 * physical cluster, the physical block should start at the
+	 * same offset from the beginning of the cluster.  This is
+	 * needed so that future calls to get_implied_cluster_alloc()
+	 * work correctly.
+	 */
+	offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+	ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
+	ar.goal -= offset;
+	ar.logical -= offset;
 	if (S_ISREG(inode->i_mode))
 		ar.flags = EXT4_MB_HINT_DATA;
 	else
@@ -3557,9 +4006,15 @@
 		goto out2;
 	ext_debug("allocate new block: goal %llu, found %llu/%u\n",
 		  ar.goal, newblock, allocated);
+	free_on_err = 1;
+	allocated_clusters = ar.len;
+	ar.len = EXT4_C2B(sbi, ar.len) - offset;
+	if (ar.len > allocated)
+		ar.len = allocated;
 
+got_allocated_blocks:
 	/* try to insert new extent into found leaf and return */
-	ext4_ext_store_pblock(&newex, newblock);
+	ext4_ext_store_pblock(&newex, newblock + offset);
 	newex.ee_len = cpu_to_le16(ar.len);
 	/* Mark uninitialized */
 	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
@@ -3572,10 +4027,9 @@
 		 * that we need to perform conversion when IO is done.
 		 */
 		if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
-			if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
-				io->flag = EXT4_IO_END_UNWRITTEN;
-				atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
-			} else
+			if (io)
+				ext4_set_io_unwritten_flag(inode, io);
+			else
 				ext4_set_inode_state(inode,
 						     EXT4_STATE_DIO_UNWRITTEN);
 		}
@@ -3583,11 +4037,14 @@
 			map->m_flags |= EXT4_MAP_UNINIT;
 	}
 
-	err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
+	err = 0;
+	if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0)
+		err = check_eofblocks_fl(handle, inode, map->m_lblk,
+					 path, ar.len);
 	if (!err)
 		err = ext4_ext_insert_extent(handle, inode, path,
 					     &newex, flags);
-	if (err) {
+	if (err && free_on_err) {
 		int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
 			EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
 		/* free data blocks we just allocated */
@@ -3610,8 +4067,82 @@
 	 * Update reserved blocks/metadata blocks after successful
 	 * block allocation which had been deferred till now.
 	 */
-	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
-		ext4_da_update_reserve_space(inode, allocated, 1);
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
+		unsigned int reserved_clusters;
+		/*
+		 * Check how many clusters we had reserved this allocated range
+		 */
+		reserved_clusters = get_reserved_cluster_alloc(inode,
+						map->m_lblk, allocated);
+		if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
+			if (reserved_clusters) {
+				/*
+				 * We have clusters reserved for this range.
+				 * But since we are not doing actual allocation
+				 * and are simply using blocks from previously
+				 * allocated cluster, we should release the
+				 * reservation and not claim quota.
+				 */
+				ext4_da_update_reserve_space(inode,
+						reserved_clusters, 0);
+			}
+		} else {
+			BUG_ON(allocated_clusters < reserved_clusters);
+			/* We will claim quota for all newly allocated blocks.*/
+			ext4_da_update_reserve_space(inode, allocated_clusters,
+							1);
+			if (reserved_clusters < allocated_clusters) {
+				struct ext4_inode_info *ei = EXT4_I(inode);
+				int reservation = allocated_clusters -
+						  reserved_clusters;
+				/*
+				 * It seems we claimed few clusters outside of
+				 * the range of this allocation. We should give
+				 * it back to the reservation pool. This can
+				 * happen in the following case:
+				 *
+				 * * Suppose s_cluster_ratio is 4 (i.e., each
+				 *   cluster has 4 blocks. Thus, the clusters
+				 *   are [0-3],[4-7],[8-11]...
+				 * * First comes delayed allocation write for
+				 *   logical blocks 10 & 11. Since there were no
+				 *   previous delayed allocated blocks in the
+				 *   range [8-11], we would reserve 1 cluster
+				 *   for this write.
+				 * * Next comes write for logical blocks 3 to 8.
+				 *   In this case, we will reserve 2 clusters
+				 *   (for [0-3] and [4-7]; and not for [8-11] as
+				 *   that range has a delayed allocated blocks.
+				 *   Thus total reserved clusters now becomes 3.
+				 * * Now, during the delayed allocation writeout
+				 *   time, we will first write blocks [3-8] and
+				 *   allocate 3 clusters for writing these
+				 *   blocks. Also, we would claim all these
+				 *   three clusters above.
+				 * * Now when we come here to writeout the
+				 *   blocks [10-11], we would expect to claim
+				 *   the reservation of 1 cluster we had made
+				 *   (and we would claim it since there are no
+				 *   more delayed allocated blocks in the range
+				 *   [8-11]. But our reserved cluster count had
+				 *   already gone to 0.
+				 *
+				 *   Thus, at the step 4 above when we determine
+				 *   that there are still some unwritten delayed
+				 *   allocated blocks outside of our current
+				 *   block range, we should increment the
+				 *   reserved clusters count so that when the
+				 *   remaining blocks finally gets written, we
+				 *   could claim them.
+				 */
+				dquot_reserve_block(inode,
+						EXT4_C2B(sbi, reservation));
+				spin_lock(&ei->i_block_reservation_lock);
+				ei->i_reserved_data_blocks += reservation;
+				spin_unlock(&ei->i_block_reservation_lock);
+			}
+		}
+	}
 
 	/*
 	 * Cache the extent and update transaction to commit on fdatasync only
@@ -3634,12 +4165,12 @@
 		ext4_ext_drop_refs(path);
 		kfree(path);
 	}
-	trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
-		newblock, map->m_len, err ? err : allocated);
-
 	result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
 			punched_out : allocated;
 
+	trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
+		newblock, map->m_len, err ? err : result);
+
 	return err ? err : result;
 }
 
@@ -3649,6 +4180,7 @@
 	struct super_block *sb = inode->i_sb;
 	ext4_lblk_t last_block;
 	handle_t *handle;
+	loff_t page_len;
 	int err = 0;
 
 	/*
@@ -3665,8 +4197,16 @@
 	if (IS_ERR(handle))
 		return;
 
-	if (inode->i_size & (sb->s_blocksize - 1))
-		ext4_block_truncate_page(handle, mapping, inode->i_size);
+	if (inode->i_size % PAGE_CACHE_SIZE != 0) {
+		page_len = PAGE_CACHE_SIZE -
+			(inode->i_size & (PAGE_CACHE_SIZE - 1));
+
+		err = ext4_discard_partial_page_buffers(handle,
+			mapping, inode->i_size, page_len, 0);
+
+		if (err)
+			goto out_stop;
+	}
 
 	if (ext4_orphan_add(handle, inode))
 		goto out_stop;
@@ -3760,6 +4300,7 @@
 	int ret = 0;
 	int ret2 = 0;
 	int retries = 0;
+	int flags;
 	struct ext4_map_blocks map;
 	unsigned int credits, blkbits = inode->i_blkbits;
 
@@ -3796,6 +4337,16 @@
 		trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
 		return ret;
 	}
+	flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT;
+	if (mode & FALLOC_FL_KEEP_SIZE)
+		flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
+	/*
+	 * Don't normalize the request if it can fit in one extent so
+	 * that it doesn't get unnecessarily split into multiple
+	 * extents.
+	 */
+	if (len <= EXT_UNINIT_MAX_LEN << blkbits)
+		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 retry:
 	while (ret >= 0 && ret < max_blocks) {
 		map.m_lblk = map.m_lblk + ret;
@@ -3805,9 +4356,7 @@
 			ret = PTR_ERR(handle);
 			break;
 		}
-		ret = ext4_map_blocks(handle, inode, &map,
-				      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
-				      EXT4_GET_BLOCKS_NO_NORMALIZE);
+		ret = ext4_map_blocks(handle, inode, &map, flags);
 		if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
 			WARN_ON(ret <= 0);
@@ -4102,7 +4651,6 @@
 		return EXT_BREAK;
 	return EXT_CONTINUE;
 }
-
 /* fiemap flags we can handle specified here */
 #define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
 
@@ -4162,17 +4710,28 @@
 	struct address_space *mapping = inode->i_mapping;
 	struct ext4_map_blocks map;
 	handle_t *handle;
-	loff_t first_block_offset, last_block_offset, block_len;
-	loff_t first_page, last_page, first_page_offset, last_page_offset;
+	loff_t first_page, last_page, page_len;
+	loff_t first_page_offset, last_page_offset;
 	int ret, credits, blocks_released, err = 0;
 
+	/* No need to punch hole beyond i_size */
+	if (offset >= inode->i_size)
+		return 0;
+
+	/*
+	 * If the hole extends beyond i_size, set the hole
+	 * to end after the page that contains i_size
+	 */
+	if (offset + length > inode->i_size) {
+		length = inode->i_size +
+		   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
+		   offset;
+	}
+
 	first_block = (offset + sb->s_blocksize - 1) >>
 		EXT4_BLOCK_SIZE_BITS(sb);
 	last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
 
-	first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb);
-	last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb);
-
 	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	last_page = (offset + length) >> PAGE_CACHE_SHIFT;
 
@@ -4185,11 +4744,10 @@
 	 */
 	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 		err = filemap_write_and_wait_range(mapping,
-			first_page_offset == 0 ? 0 : first_page_offset-1,
-			last_page_offset);
+			offset, offset + length - 1);
 
-			if (err)
-				return err;
+		if (err)
+			return err;
 	}
 
 	/* Now release the pages */
@@ -4211,24 +4769,64 @@
 		goto out;
 
 	/*
-	 * Now we need to zero out the un block aligned data.
-	 * If the file is smaller than a block, just
-	 * zero out the middle
+	 * Now we need to zero out the non-page-aligned data in the
+	 * pages at the start and tail of the hole, and unmap the buffer
+	 * heads for the block aligned regions of the page that were
+	 * completely zeroed.
 	 */
-	if (first_block > last_block)
-		ext4_block_zero_page_range(handle, mapping, offset, length);
-	else {
-		/* zero out the head of the hole before the first block */
-		block_len  = first_block_offset - offset;
-		if (block_len > 0)
-			ext4_block_zero_page_range(handle, mapping,
-						   offset, block_len);
+	if (first_page > last_page) {
+		/*
+		 * If the file space being truncated is contained within a page
+		 * just zero out and unmap the middle of that page
+		 */
+		err = ext4_discard_partial_page_buffers(handle,
+			mapping, offset, length, 0);
 
-		/* zero out the tail of the hole after the last block */
-		block_len = offset + length - last_block_offset;
-		if (block_len > 0) {
-			ext4_block_zero_page_range(handle, mapping,
-					last_block_offset, block_len);
+		if (err)
+			goto out;
+	} else {
+		/*
+		 * zero out and unmap the partial page that contains
+		 * the start of the hole
+		 */
+		page_len  = first_page_offset - offset;
+		if (page_len > 0) {
+			err = ext4_discard_partial_page_buffers(handle, mapping,
+						   offset, page_len, 0);
+			if (err)
+				goto out;
+		}
+
+		/*
+		 * zero out and unmap the partial page that contains
+		 * the end of the hole
+		 */
+		page_len = offset + length - last_page_offset;
+		if (page_len > 0) {
+			err = ext4_discard_partial_page_buffers(handle, mapping,
+					last_page_offset, page_len, 0);
+			if (err)
+				goto out;
+		}
+	}
+
+
+	/*
+	 * If i_size is contained in the last page, we need to
+	 * unmap and zero the partial page after i_size
+	 */
+	if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
+	   inode->i_size % PAGE_CACHE_SIZE != 0) {
+
+		page_len = PAGE_CACHE_SIZE -
+			(inode->i_size & (PAGE_CACHE_SIZE - 1));
+
+		if (page_len > 0) {
+			err = ext4_discard_partial_page_buffers(handle,
+			  mapping, inode->i_size, page_len, 0);
+
+			if (err)
+				goto out;
 		}
 	}
 
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b9548f4..cb70f18 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -181,8 +181,8 @@
 		path.dentry = mnt->mnt_root;
 		cp = d_path(&path, buf, sizeof(buf));
 		if (!IS_ERR(cp)) {
-			memcpy(sbi->s_es->s_last_mounted, cp,
-			       sizeof(sbi->s_es->s_last_mounted));
+			strlcpy(sbi->s_es->s_last_mounted, cp,
+				sizeof(sbi->s_es->s_last_mounted));
 			ext4_mark_super_dirty(sb);
 		}
 	}
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 036f78f..00a2cb7 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -75,7 +75,7 @@
  * to written.
  * The function return the number of pending IOs on success.
  */
-extern int ext4_flush_completed_IO(struct inode *inode)
+int ext4_flush_completed_IO(struct inode *inode)
 {
 	ext4_io_end_t *io;
 	struct ext4_inode_info *ei = EXT4_I(inode);
@@ -83,14 +83,12 @@
 	int ret = 0;
 	int ret2 = 0;
 
-	if (list_empty(&ei->i_completed_io_list))
-		return ret;
-
 	dump_completed_IO(inode);
 	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
 	while (!list_empty(&ei->i_completed_io_list)){
 		io = list_entry(ei->i_completed_io_list.next,
 				ext4_io_end_t, list);
+		list_del_init(&io->list);
 		/*
 		 * Calling ext4_end_io_nolock() to convert completed
 		 * IO to written.
@@ -107,11 +105,9 @@
 		 */
 		spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
 		ret = ext4_end_io_nolock(io);
-		spin_lock_irqsave(&ei->i_completed_io_lock, flags);
 		if (ret < 0)
 			ret2 = ret;
-		else
-			list_del_init(&io->list);
+		spin_lock_irqsave(&ei->i_completed_io_lock, flags);
 	}
 	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
 	return (ret2 < 0) ? ret2 : 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ecc55bd..00beb4f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -78,7 +78,7 @@
 	 * allocation, essentially implementing a per-group read-only flag. */
 	if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
 		ext4_error(sb, "Checksum bad for group %u", block_group);
-		ext4_free_blks_set(sb, gdp, 0);
+		ext4_free_group_clusters_set(sb, gdp, 0);
 		ext4_free_inodes_set(sb, gdp, 0);
 		ext4_itable_unused_set(sb, gdp, 0);
 		memset(bh->b_data, 0xff, sb->s_blocksize);
@@ -293,121 +293,9 @@
 	ext4_std_error(sb, fatal);
 }
 
-/*
- * There are two policies for allocating an inode.  If the new inode is
- * a directory, then a forward search is made for a block group with both
- * free space and a low directory-to-inode ratio; if that fails, then of
- * the groups with above-average free space, that group with the fewest
- * directories already is chosen.
- *
- * For other inodes, search forward from the parent directory\'s block
- * group to find a free inode.
- */
-static int find_group_dir(struct super_block *sb, struct inode *parent,
-				ext4_group_t *best_group)
-{
-	ext4_group_t ngroups = ext4_get_groups_count(sb);
-	unsigned int freei, avefreei;
-	struct ext4_group_desc *desc, *best_desc = NULL;
-	ext4_group_t group;
-	int ret = -1;
-
-	freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
-	avefreei = freei / ngroups;
-
-	for (group = 0; group < ngroups; group++) {
-		desc = ext4_get_group_desc(sb, group, NULL);
-		if (!desc || !ext4_free_inodes_count(sb, desc))
-			continue;
-		if (ext4_free_inodes_count(sb, desc) < avefreei)
-			continue;
-		if (!best_desc ||
-		    (ext4_free_blks_count(sb, desc) >
-		     ext4_free_blks_count(sb, best_desc))) {
-			*best_group = group;
-			best_desc = desc;
-			ret = 0;
-		}
-	}
-	return ret;
-}
-
-#define free_block_ratio 10
-
-static int find_group_flex(struct super_block *sb, struct inode *parent,
-			   ext4_group_t *best_group)
-{
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct ext4_group_desc *desc;
-	struct flex_groups *flex_group = sbi->s_flex_groups;
-	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
-	ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
-	ext4_group_t ngroups = ext4_get_groups_count(sb);
-	int flex_size = ext4_flex_bg_size(sbi);
-	ext4_group_t best_flex = parent_fbg_group;
-	int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
-	int flexbg_free_blocks;
-	int flex_freeb_ratio;
-	ext4_group_t n_fbg_groups;
-	ext4_group_t i;
-
-	n_fbg_groups = (ngroups + flex_size - 1) >>
-		sbi->s_log_groups_per_flex;
-
-find_close_to_parent:
-	flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks);
-	flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
-	if (atomic_read(&flex_group[best_flex].free_inodes) &&
-	    flex_freeb_ratio > free_block_ratio)
-		goto found_flexbg;
-
-	if (best_flex && best_flex == parent_fbg_group) {
-		best_flex--;
-		goto find_close_to_parent;
-	}
-
-	for (i = 0; i < n_fbg_groups; i++) {
-		if (i == parent_fbg_group || i == parent_fbg_group - 1)
-			continue;
-
-		flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks);
-		flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
-
-		if (flex_freeb_ratio > free_block_ratio &&
-		    (atomic_read(&flex_group[i].free_inodes))) {
-			best_flex = i;
-			goto found_flexbg;
-		}
-
-		if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) ||
-		    ((atomic_read(&flex_group[i].free_blocks) >
-		      atomic_read(&flex_group[best_flex].free_blocks)) &&
-		     atomic_read(&flex_group[i].free_inodes)))
-			best_flex = i;
-	}
-
-	if (!atomic_read(&flex_group[best_flex].free_inodes) ||
-	    !atomic_read(&flex_group[best_flex].free_blocks))
-		return -1;
-
-found_flexbg:
-	for (i = best_flex * flex_size; i < ngroups &&
-		     i < (best_flex + 1) * flex_size; i++) {
-		desc = ext4_get_group_desc(sb, i, NULL);
-		if (ext4_free_inodes_count(sb, desc)) {
-			*best_group = i;
-			goto out;
-		}
-	}
-
-	return -1;
-out:
-	return 0;
-}
-
 struct orlov_stats {
 	__u32 free_inodes;
-	__u32 free_blocks;
+	__u32 free_clusters;
 	__u32 used_dirs;
 };
 
@@ -424,7 +312,7 @@
 
 	if (flex_size > 1) {
 		stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
-		stats->free_blocks = atomic_read(&flex_group[g].free_blocks);
+		stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
 		stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
 		return;
 	}
@@ -432,11 +320,11 @@
 	desc = ext4_get_group_desc(sb, g, NULL);
 	if (desc) {
 		stats->free_inodes = ext4_free_inodes_count(sb, desc);
-		stats->free_blocks = ext4_free_blks_count(sb, desc);
+		stats->free_clusters = ext4_free_group_clusters(sb, desc);
 		stats->used_dirs = ext4_used_dirs_count(sb, desc);
 	} else {
 		stats->free_inodes = 0;
-		stats->free_blocks = 0;
+		stats->free_clusters = 0;
 		stats->used_dirs = 0;
 	}
 }
@@ -471,10 +359,10 @@
 	ext4_group_t real_ngroups = ext4_get_groups_count(sb);
 	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
 	unsigned int freei, avefreei;
-	ext4_fsblk_t freeb, avefreeb;
+	ext4_fsblk_t freeb, avefreec;
 	unsigned int ndirs;
 	int max_dirs, min_inodes;
-	ext4_grpblk_t min_blocks;
+	ext4_grpblk_t min_clusters;
 	ext4_group_t i, grp, g, ngroups;
 	struct ext4_group_desc *desc;
 	struct orlov_stats stats;
@@ -490,9 +378,10 @@
 
 	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
 	avefreei = freei / ngroups;
-	freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-	avefreeb = freeb;
-	do_div(avefreeb, ngroups);
+	freeb = EXT4_C2B(sbi,
+		percpu_counter_read_positive(&sbi->s_freeclusters_counter));
+	avefreec = freeb;
+	do_div(avefreec, ngroups);
 	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 
 	if (S_ISDIR(mode) &&
@@ -518,7 +407,7 @@
 				continue;
 			if (stats.free_inodes < avefreei)
 				continue;
-			if (stats.free_blocks < avefreeb)
+			if (stats.free_clusters < avefreec)
 				continue;
 			grp = g;
 			ret = 0;
@@ -556,7 +445,7 @@
 	min_inodes = avefreei - inodes_per_group*flex_size / 4;
 	if (min_inodes < 1)
 		min_inodes = 1;
-	min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;
+	min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
 
 	/*
 	 * Start looking in the flex group where we last allocated an
@@ -575,7 +464,7 @@
 			continue;
 		if (stats.free_inodes < min_inodes)
 			continue;
-		if (stats.free_blocks < min_blocks)
+		if (stats.free_clusters < min_clusters)
 			continue;
 		goto found_flex_bg;
 	}
@@ -659,7 +548,7 @@
 	*group = parent_group;
 	desc = ext4_get_group_desc(sb, *group, NULL);
 	if (desc && ext4_free_inodes_count(sb, desc) &&
-			ext4_free_blks_count(sb, desc))
+	    ext4_free_group_clusters(sb, desc))
 		return 0;
 
 	/*
@@ -683,7 +572,7 @@
 			*group -= ngroups;
 		desc = ext4_get_group_desc(sb, *group, NULL);
 		if (desc && ext4_free_inodes_count(sb, desc) &&
-				ext4_free_blks_count(sb, desc))
+		    ext4_free_group_clusters(sb, desc))
 			return 0;
 	}
 
@@ -802,7 +691,7 @@
  * group to find a free inode.
  */
 struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
-			     const struct qstr *qstr, __u32 goal)
+			     const struct qstr *qstr, __u32 goal, uid_t *owner)
 {
 	struct super_block *sb;
 	struct buffer_head *inode_bitmap_bh = NULL;
@@ -816,8 +705,6 @@
 	int ret2, err = 0;
 	struct inode *ret;
 	ext4_group_t i;
-	int free = 0;
-	static int once = 1;
 	ext4_group_t flex_group;
 
 	/* Cannot create files in a deleted directory */
@@ -843,26 +730,9 @@
 		goto got_group;
 	}
 
-	if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
-		ret2 = find_group_flex(sb, dir, &group);
-		if (ret2 == -1) {
-			ret2 = find_group_other(sb, dir, &group, mode);
-			if (ret2 == 0 && once) {
-				once = 0;
-				printk(KERN_NOTICE "ext4: find_group_flex "
-				       "failed, fallback succeeded dir %lu\n",
-				       dir->i_ino);
-			}
-		}
-		goto got_group;
-	}
-
-	if (S_ISDIR(mode)) {
-		if (test_opt(sb, OLDALLOC))
-			ret2 = find_group_dir(sb, dir, &group);
-		else
-			ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
-	} else
+	if (S_ISDIR(mode))
+		ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
+	else
 		ret2 = find_group_other(sb, dir, &group, mode);
 
 got_group:
@@ -950,26 +820,21 @@
 			goto fail;
 		}
 
-		free = 0;
-		ext4_lock_group(sb, group);
+		BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
+		err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
+		brelse(block_bitmap_bh);
+
 		/* recheck and clear flag under lock if we still need to */
+		ext4_lock_group(sb, group);
 		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-			free = ext4_free_blocks_after_init(sb, group, gdp);
 			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-			ext4_free_blks_set(sb, gdp, free);
+			ext4_free_group_clusters_set(sb, gdp,
+				ext4_free_clusters_after_init(sb, group, gdp));
 			gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
 								gdp);
 		}
 		ext4_unlock_group(sb, group);
 
-		/* Don't need to dirty bitmap block if we didn't change it */
-		if (free) {
-			BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
-			err = ext4_handle_dirty_metadata(handle,
-							NULL, block_bitmap_bh);
-		}
-
-		brelse(block_bitmap_bh);
 		if (err)
 			goto fail;
 	}
@@ -987,8 +852,11 @@
 		flex_group = ext4_flex_group(sbi, group);
 		atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
 	}
-
-	if (test_opt(sb, GRPID)) {
+	if (owner) {
+		inode->i_mode = mode;
+		inode->i_uid = owner[0];
+		inode->i_gid = owner[1];
+	} else if (test_opt(sb, GRPID)) {
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = dir->i_gid;
@@ -1005,11 +873,7 @@
 	ei->i_dir_start_lookup = 0;
 	ei->i_disksize = 0;
 
-	/*
-	 * Don't inherit extent flag from directory, amongst others. We set
-	 * extent flag on newly created directory and file only if -o extent
-	 * mount option is specified
-	 */
+	/* Don't inherit extent flag from directory, amongst others. */
 	ei->i_flags =
 		ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
 	ei->i_file_acl = 0;
@@ -1235,7 +1099,7 @@
  * inode allocation from the current group, so we take alloc_sem lock, to
  * block ext4_claim_inode until we are finished.
  */
-extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
+int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 				 int barrier)
 {
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 0962642..3cfc73f 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -699,6 +699,13 @@
 	/*
 	 * Okay, we need to do block allocation.
 	*/
+	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+				       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+		EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
+				 "non-extent mapped inodes with bigalloc");
+		return -ENOSPC;
+	}
+
 	goal = ext4_find_goal(inode, map->m_lblk, partial);
 
 	/* the number of blocks need to allocate for [d,t]indirect blocks */
@@ -1343,7 +1350,9 @@
 	__le32 nr = 0;
 	int n = 0;
 	ext4_lblk_t last_block, max_block;
+	loff_t page_len;
 	unsigned blocksize = inode->i_sb->s_blocksize;
+	int err;
 
 	handle = start_transaction(inode);
 	if (IS_ERR(handle))
@@ -1354,9 +1363,16 @@
 	max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
 					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
 
-	if (inode->i_size & (blocksize - 1))
-		if (ext4_block_truncate_page(handle, mapping, inode->i_size))
+	if (inode->i_size % PAGE_CACHE_SIZE != 0) {
+		page_len = PAGE_CACHE_SIZE -
+			(inode->i_size & (PAGE_CACHE_SIZE - 1));
+
+		err = ext4_discard_partial_page_buffers(handle,
+			mapping, inode->i_size, page_len, 0);
+
+		if (err)
 			goto out_stop;
+	}
 
 	if (last_block != max_block) {
 		n = ext4_block_to_path(inode, last_block, offsets, NULL);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be6668b..cc5a6da 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -42,7 +42,6 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "ext4_extents.h"
 #include "truncate.h"
 
 #include <trace/events/ext4.h>
@@ -268,7 +267,7 @@
 	struct ext4_inode_info *ei = EXT4_I(inode);
 
 	spin_lock(&ei->i_block_reservation_lock);
-	trace_ext4_da_update_reserve_space(inode, used);
+	trace_ext4_da_update_reserve_space(inode, used, quota_claim);
 	if (unlikely(used > ei->i_reserved_data_blocks)) {
 		ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
 			 "with only %d reserved data blocks\n",
@@ -281,7 +280,7 @@
 	/* Update per-inode reservations */
 	ei->i_reserved_data_blocks -= used;
 	ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
-	percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+	percpu_counter_sub(&sbi->s_dirtyclusters_counter,
 			   used + ei->i_allocated_meta_blocks);
 	ei->i_allocated_meta_blocks = 0;
 
@@ -291,7 +290,7 @@
 		 * only when we have written all of the delayed
 		 * allocation blocks.
 		 */
-		percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+		percpu_counter_sub(&sbi->s_dirtyclusters_counter,
 				   ei->i_reserved_meta_blocks);
 		ei->i_reserved_meta_blocks = 0;
 		ei->i_da_metadata_calc_len = 0;
@@ -300,14 +299,14 @@
 
 	/* Update quota subsystem for data blocks */
 	if (quota_claim)
-		dquot_claim_block(inode, used);
+		dquot_claim_block(inode, EXT4_C2B(sbi, used));
 	else {
 		/*
 		 * We did fallocate with an offset that is already delayed
 		 * allocated. So on delayed allocated writeback we should
 		 * not re-claim the quota for fallocated blocks.
 		 */
-		dquot_release_reservation_block(inode, used);
+		dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
 	}
 
 	/*
@@ -399,6 +398,49 @@
 }
 
 /*
+ * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
+ */
+static void set_buffers_da_mapped(struct inode *inode,
+				   struct ext4_map_blocks *map)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct pagevec pvec;
+	int i, nr_pages;
+	pgoff_t index, end;
+
+	index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	end = (map->m_lblk + map->m_len - 1) >>
+		(PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+	pagevec_init(&pvec, 0);
+	while (index <= end) {
+		nr_pages = pagevec_lookup(&pvec, mapping, index,
+					  min(end - index + 1,
+					      (pgoff_t)PAGEVEC_SIZE));
+		if (nr_pages == 0)
+			break;
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+			struct buffer_head *bh, *head;
+
+			if (unlikely(page->mapping != mapping) ||
+			    !PageDirty(page))
+				break;
+
+			if (page_has_buffers(page)) {
+				bh = head = page_buffers(page);
+				do {
+					set_buffer_da_mapped(bh);
+					bh = bh->b_this_page;
+				} while (bh != head);
+			}
+			index++;
+		}
+		pagevec_release(&pvec);
+	}
+}
+
+/*
  * The ext4_map_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
  *
@@ -416,7 +458,7 @@
  * the buffer head is mapped.
  *
  * It returns 0 if plain look up failed (blocks have not been allocated), in
- * that casem, buffer head is unmapped
+ * that case, buffer head is unmapped
  *
  * It returns the error in case of allocation failure.
  */
@@ -435,9 +477,11 @@
 	 */
 	down_read((&EXT4_I(inode)->i_data_sem));
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-		retval = ext4_ext_map_blocks(handle, inode, map, 0);
+		retval = ext4_ext_map_blocks(handle, inode, map, flags &
+					     EXT4_GET_BLOCKS_KEEP_SIZE);
 	} else {
-		retval = ext4_ind_map_blocks(handle, inode, map, 0);
+		retval = ext4_ind_map_blocks(handle, inode, map, flags &
+					     EXT4_GET_BLOCKS_KEEP_SIZE);
 	}
 	up_read((&EXT4_I(inode)->i_data_sem));
 
@@ -455,7 +499,7 @@
 	 * Returns if the blocks have already allocated
 	 *
 	 * Note that if blocks have been preallocated
-	 * ext4_ext_get_block() returns th create = 0
+	 * ext4_ext_get_block() returns the create = 0
 	 * with buffer head unmapped.
 	 */
 	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
@@ -517,9 +561,17 @@
 			(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
 			ext4_da_update_reserve_space(inode, retval, 1);
 	}
-	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
 		ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
 
+		/* If we have successfully mapped the delayed allocated blocks,
+		 * set the BH_Da_Mapped bit on them. Its important to do this
+		 * under the protection of i_data_sem.
+		 */
+		if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
+			set_buffers_da_mapped(inode, map);
+	}
+
 	up_write((&EXT4_I(inode)->i_data_sem));
 	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
 		int ret = check_block_validity(inode, map);
@@ -909,7 +961,11 @@
 			ext4_orphan_add(handle, inode);
 		if (ret2 < 0)
 			ret = ret2;
+	} else {
+		unlock_page(page);
+		page_cache_release(page);
 	}
+
 	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
@@ -1037,14 +1093,14 @@
 }
 
 /*
- * Reserve a single block located at lblock
+ * Reserve a single cluster located at lblock
  */
 static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
 {
 	int retries = 0;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct ext4_inode_info *ei = EXT4_I(inode);
-	unsigned long md_needed;
+	unsigned int md_needed;
 	int ret;
 
 	/*
@@ -1054,7 +1110,8 @@
 	 */
 repeat:
 	spin_lock(&ei->i_block_reservation_lock);
-	md_needed = ext4_calc_metadata_amount(inode, lblock);
+	md_needed = EXT4_NUM_B2C(sbi,
+				 ext4_calc_metadata_amount(inode, lblock));
 	trace_ext4_da_reserve_space(inode, md_needed);
 	spin_unlock(&ei->i_block_reservation_lock);
 
@@ -1063,15 +1120,15 @@
 	 * us from metadata over-estimation, though we may go over by
 	 * a small amount in the end.  Here we just reserve for data.
 	 */
-	ret = dquot_reserve_block(inode, 1);
+	ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
 	if (ret)
 		return ret;
 	/*
 	 * We do still charge estimated metadata to the sb though;
 	 * we cannot afford to run out of free blocks.
 	 */
-	if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
-		dquot_release_reservation_block(inode, 1);
+	if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
+		dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
 		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
 			yield();
 			goto repeat;
@@ -1118,19 +1175,21 @@
 		 * We can release all of the reserved metadata blocks
 		 * only when we have written all of the delayed
 		 * allocation blocks.
+		 * Note that in case of bigalloc, i_reserved_meta_blocks,
+		 * i_reserved_data_blocks, etc. refer to number of clusters.
 		 */
-		percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+		percpu_counter_sub(&sbi->s_dirtyclusters_counter,
 				   ei->i_reserved_meta_blocks);
 		ei->i_reserved_meta_blocks = 0;
 		ei->i_da_metadata_calc_len = 0;
 	}
 
 	/* update fs dirty data blocks counter */
-	percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
+	percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
 
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
-	dquot_release_reservation_block(inode, to_free);
+	dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
 }
 
 static void ext4_da_page_release_reservation(struct page *page,
@@ -1139,6 +1198,9 @@
 	int to_release = 0;
 	struct buffer_head *head, *bh;
 	unsigned int curr_off = 0;
+	struct inode *inode = page->mapping->host;
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	int num_clusters;
 
 	head = page_buffers(page);
 	bh = head;
@@ -1148,10 +1210,24 @@
 		if ((offset <= curr_off) && (buffer_delay(bh))) {
 			to_release++;
 			clear_buffer_delay(bh);
+			clear_buffer_da_mapped(bh);
 		}
 		curr_off = next_off;
 	} while ((bh = bh->b_this_page) != head);
-	ext4_da_release_space(page->mapping->host, to_release);
+
+	/* If we have released all the blocks belonging to a cluster, then we
+	 * need to release the reserved space for that cluster. */
+	num_clusters = EXT4_NUM_B2C(sbi, to_release);
+	while (num_clusters > 0) {
+		ext4_fsblk_t lblk;
+		lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
+			((num_clusters - 1) << sbi->s_cluster_bits);
+		if (sbi->s_cluster_ratio == 1 ||
+		    !ext4_find_delalloc_cluster(inode, lblk, 1))
+			ext4_da_release_space(inode, 1);
+
+		num_clusters--;
+	}
 }
 
 /*
@@ -1253,6 +1329,8 @@
 						clear_buffer_delay(bh);
 						bh->b_blocknr = pblock;
 					}
+					if (buffer_da_mapped(bh))
+						clear_buffer_da_mapped(bh);
 					if (buffer_unwritten(bh) ||
 					    buffer_mapped(bh))
 						BUG_ON(bh->b_blocknr != pblock);
@@ -1346,12 +1424,15 @@
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	printk(KERN_CRIT "Total free blocks count %lld\n",
-	       ext4_count_free_blocks(inode->i_sb));
+	       EXT4_C2B(EXT4_SB(inode->i_sb),
+			ext4_count_free_clusters(inode->i_sb)));
 	printk(KERN_CRIT "Free/Dirty block details\n");
 	printk(KERN_CRIT "free_blocks=%lld\n",
-	       (long long) percpu_counter_sum(&sbi->s_freeblocks_counter));
+	       (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+		percpu_counter_sum(&sbi->s_freeclusters_counter)));
 	printk(KERN_CRIT "dirty_blocks=%lld\n",
-	       (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+	       (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+		percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
 	printk(KERN_CRIT "Block reservation details\n");
 	printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
 	       EXT4_I(inode)->i_reserved_data_blocks);
@@ -1430,8 +1511,7 @@
 		if (err == -EAGAIN)
 			goto submit_io;
 
-		if (err == -ENOSPC &&
-		    ext4_count_free_blocks(sb)) {
+		if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
 			mpd->retval = err;
 			goto submit_io;
 		}
@@ -1471,13 +1551,15 @@
 
 		for (i = 0; i < map.m_len; i++)
 			unmap_underlying_metadata(bdev, map.m_pblk + i);
-	}
 
-	if (ext4_should_order_data(mpd->inode)) {
-		err = ext4_jbd2_file_inode(handle, mpd->inode);
-		if (err)
-			/* This only happens if the journal is aborted */
-			return;
+		if (ext4_should_order_data(mpd->inode)) {
+			err = ext4_jbd2_file_inode(handle, mpd->inode);
+			if (err) {
+				/* Only if the journal is aborted */
+				mpd->retval = err;
+				goto submit_io;
+			}
+		}
 	}
 
 	/*
@@ -1584,6 +1666,66 @@
 }
 
 /*
+ * This function is grabs code from the very beginning of
+ * ext4_map_blocks, but assumes that the caller is from delayed write
+ * time. This function looks up the requested blocks and sets the
+ * buffer delay bit under the protection of i_data_sem.
+ */
+static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+			      struct ext4_map_blocks *map,
+			      struct buffer_head *bh)
+{
+	int retval;
+	sector_t invalid_block = ~((sector_t) 0xffff);
+
+	if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
+		invalid_block = ~0;
+
+	map->m_flags = 0;
+	ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
+		  "logical block %lu\n", inode->i_ino, map->m_len,
+		  (unsigned long) map->m_lblk);
+	/*
+	 * Try to see if we can get the block without requesting a new
+	 * file system block.
+	 */
+	down_read((&EXT4_I(inode)->i_data_sem));
+	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+		retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+	else
+		retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+
+	if (retval == 0) {
+		/*
+		 * XXX: __block_prepare_write() unmaps passed block,
+		 * is it OK?
+		 */
+		/* If the block was allocated from previously allocated cluster,
+		 * then we dont need to reserve it again. */
+		if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
+			retval = ext4_da_reserve_space(inode, iblock);
+			if (retval)
+				/* not enough space to reserve */
+				goto out_unlock;
+		}
+
+		/* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
+		 * and it should not appear on the bh->b_state.
+		 */
+		map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
+
+		map_bh(bh, inode->i_sb, invalid_block);
+		set_buffer_new(bh);
+		set_buffer_delay(bh);
+	}
+
+out_unlock:
+	up_read((&EXT4_I(inode)->i_data_sem));
+
+	return retval;
+}
+
+/*
  * This is a special get_blocks_t callback which is used by
  * ext4_da_write_begin().  It will either return mapped block or
  * reserve space for a single block.
@@ -1600,10 +1742,6 @@
 {
 	struct ext4_map_blocks map;
 	int ret = 0;
-	sector_t invalid_block = ~((sector_t) 0xffff);
-
-	if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
-		invalid_block = ~0;
 
 	BUG_ON(create == 0);
 	BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
@@ -1616,25 +1754,9 @@
 	 * preallocated blocks are unmapped but should treated
 	 * the same as allocated blocks.
 	 */
-	ret = ext4_map_blocks(NULL, inode, &map, 0);
-	if (ret < 0)
+	ret = ext4_da_map_blocks(inode, iblock, &map, bh);
+	if (ret <= 0)
 		return ret;
-	if (ret == 0) {
-		if (buffer_delay(bh))
-			return 0; /* Not sure this could or should happen */
-		/*
-		 * XXX: __block_write_begin() unmaps passed block, is it OK?
-		 */
-		ret = ext4_da_reserve_space(inode, iblock);
-		if (ret)
-			/* not enough space to reserve */
-			return ret;
-
-		map_bh(bh, inode->i_sb, invalid_block);
-		set_buffer_new(bh);
-		set_buffer_delay(bh);
-		return 0;
-	}
 
 	map_bh(bh, inode->i_sb, map.m_pblk);
 	bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
@@ -2050,6 +2172,7 @@
 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 	pgoff_t done_index = 0;
 	pgoff_t end;
+	struct blk_plug plug;
 
 	trace_ext4_da_writepages(inode, wbc);
 
@@ -2128,6 +2251,7 @@
 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
 		tag_pages_for_writeback(mapping, index, end);
 
+	blk_start_plug(&plug);
 	while (!ret && wbc->nr_to_write > 0) {
 
 		/*
@@ -2178,11 +2302,12 @@
 			ret = 0;
 		} else if (ret == MPAGE_DA_EXTENT_TAIL) {
 			/*
-			 * got one extent now try with
-			 * rest of the pages
+			 * Got one extent now try with rest of the pages.
+			 * If mpd.retval is set -EIO, journal is aborted.
+			 * So we don't need to write any more.
 			 */
 			pages_written += mpd.pages_written;
-			ret = 0;
+			ret = mpd.retval;
 			io_done = 1;
 		} else if (wbc->nr_to_write)
 			/*
@@ -2192,6 +2317,7 @@
 			 */
 			break;
 	}
+	blk_finish_plug(&plug);
 	if (!io_done && !cycled) {
 		cycled = 1;
 		index = 0;
@@ -2230,10 +2356,11 @@
 	 * Delalloc need an accurate free block accounting. So switch
 	 * to non delalloc when we are near to error range.
 	 */
-	free_blocks  = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-	dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
+	free_blocks  = EXT4_C2B(sbi,
+		percpu_counter_read_positive(&sbi->s_freeclusters_counter));
+	dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
 	if (2 * free_blocks < 3 * dirty_blocks ||
-		free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
+		free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
 		/*
 		 * free block count is less than 150% of dirty blocks
 		 * or free blocks is less than watermark
@@ -2259,6 +2386,7 @@
 	pgoff_t index;
 	struct inode *inode = mapping->host;
 	handle_t *handle;
+	loff_t page_len;
 
 	index = pos >> PAGE_CACHE_SHIFT;
 
@@ -2305,6 +2433,13 @@
 		 */
 		if (pos + len > inode->i_size)
 			ext4_truncate_failed_write(inode);
+	} else {
+		page_len = pos & (PAGE_CACHE_SIZE - 1);
+		if (page_len > 0) {
+			ret = ext4_discard_partial_page_buffers_no_lock(handle,
+				inode, page, pos - page_len, page_len,
+				EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
+		}
 	}
 
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2347,6 +2482,7 @@
 	loff_t new_i_size;
 	unsigned long start, end;
 	int write_mode = (int)(unsigned long)fsdata;
+	loff_t page_len;
 
 	if (write_mode == FALL_BACK_TO_NONDELALLOC) {
 		if (ext4_should_order_data(inode)) {
@@ -2395,6 +2531,16 @@
 	}
 	ret2 = generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
+
+	page_len = PAGE_CACHE_SIZE -
+			((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
+
+	if (page_len > 0) {
+		ret = ext4_discard_partial_page_buffers_no_lock(handle,
+			inode, page, pos + copied - 1, page_len,
+			EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
+	}
+
 	copied = ret2;
 	if (ret2 < 0)
 		ret = ret2;
@@ -2689,10 +2835,7 @@
 	 * but being more careful is always safe for the future change.
 	 */
 	inode = io_end->inode;
-	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
-		io_end->flag |= EXT4_IO_END_UNWRITTEN;
-		atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
-	}
+	ext4_set_io_unwritten_flag(inode, io_end);
 
 	/* Add the io_end to per-inode completed io list*/
 	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -2858,6 +3001,12 @@
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 
+	/*
+	 * If we are doing data journalling we don't support O_DIRECT
+	 */
+	if (ext4_should_journal_data(inode))
+		return 0;
+
 	trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -2927,6 +3076,7 @@
 	.bmap			= ext4_bmap,
 	.invalidatepage		= ext4_invalidatepage,
 	.releasepage		= ext4_releasepage,
+	.direct_IO		= ext4_direct_IO,
 	.is_partially_uptodate  = block_is_partially_uptodate,
 	.error_remove_page	= generic_error_remove_page,
 };
@@ -2963,6 +3113,227 @@
 		inode->i_mapping->a_ops = &ext4_journalled_aops;
 }
 
+
+/*
+ * ext4_discard_partial_page_buffers()
+ * Wrapper function for ext4_discard_partial_page_buffers_no_lock.
+ * This function finds and locks the page containing the offset
+ * "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
+ * Calling functions that already have the page locked should call
+ * ext4_discard_partial_page_buffers_no_lock directly.
+ */
+int ext4_discard_partial_page_buffers(handle_t *handle,
+		struct address_space *mapping, loff_t from,
+		loff_t length, int flags)
+{
+	struct inode *inode = mapping->host;
+	struct page *page;
+	int err = 0;
+
+	page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
+				   mapping_gfp_mask(mapping) & ~__GFP_FS);
+	if (!page)
+		return -ENOMEM;
+
+	err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
+		from, length, flags);
+
+	unlock_page(page);
+	page_cache_release(page);
+	return err;
+}
+
+/*
+ * ext4_discard_partial_page_buffers_no_lock()
+ * Zeros a page range of length 'length' starting from offset 'from'.
+ * Buffer heads that correspond to the block aligned regions of the
+ * zeroed range will be unmapped.  Unblock aligned regions
+ * will have the corresponding buffer head mapped if needed so that
+ * that region of the page can be updated with the partial zero out.
+ *
+ * This function assumes that the page has already been  locked.  The
+ * The range to be discarded must be contained with in the given page.
+ * If the specified range exceeds the end of the page it will be shortened
+ * to the end of the page that corresponds to 'from'.  This function is
+ * appropriate for updating a page and it buffer heads to be unmapped and
+ * zeroed for blocks that have been either released, or are going to be
+ * released.
+ *
+ * handle: The journal handle
+ * inode:  The files inode
+ * page:   A locked page that contains the offset "from"
+ * from:   The starting byte offset (from the begining of the file)
+ *         to begin discarding
+ * len:    The length of bytes to discard
+ * flags:  Optional flags that may be used:
+ *
+ *         EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
+ *         Only zero the regions of the page whose buffer heads
+ *         have already been unmapped.  This flag is appropriate
+ *         for updateing the contents of a page whose blocks may
+ *         have already been released, and we only want to zero
+ *         out the regions that correspond to those released blocks.
+ *
+ * Returns zero on sucess or negative on failure.
+ */
+int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+		struct inode *inode, struct page *page, loff_t from,
+		loff_t length, int flags)
+{
+	ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
+	unsigned int offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned int blocksize, max, pos;
+	ext4_lblk_t iblock;
+	struct buffer_head *bh;
+	int err = 0;
+
+	blocksize = inode->i_sb->s_blocksize;
+	max = PAGE_CACHE_SIZE - offset;
+
+	if (index != page->index)
+		return -EINVAL;
+
+	/*
+	 * correct length if it does not fall between
+	 * 'from' and the end of the page
+	 */
+	if (length > max || length < 0)
+		length = max;
+
+	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+
+	if (!page_has_buffers(page)) {
+		/*
+		 * If the range to be discarded covers a partial block
+		 * we need to get the page buffers.  This is because
+		 * partial blocks cannot be released and the page needs
+		 * to be updated with the contents of the block before
+		 * we write the zeros on top of it.
+		 */
+		if ((from & (blocksize - 1)) ||
+		    ((from + length) & (blocksize - 1))) {
+			create_empty_buffers(page, blocksize, 0);
+		} else {
+			/*
+			 * If there are no partial blocks,
+			 * there is nothing to update,
+			 * so we can return now
+			 */
+			return 0;
+		}
+	}
+
+	/* Find the buffer that contains "offset" */
+	bh = page_buffers(page);
+	pos = blocksize;
+	while (offset >= pos) {
+		bh = bh->b_this_page;
+		iblock++;
+		pos += blocksize;
+	}
+
+	pos = offset;
+	while (pos < offset + length) {
+		unsigned int end_of_block, range_to_discard;
+
+		err = 0;
+
+		/* The length of space left to zero and unmap */
+		range_to_discard = offset + length - pos;
+
+		/* The length of space until the end of the block */
+		end_of_block = blocksize - (pos & (blocksize-1));
+
+		/*
+		 * Do not unmap or zero past end of block
+		 * for this buffer head
+		 */
+		if (range_to_discard > end_of_block)
+			range_to_discard = end_of_block;
+
+
+		/*
+		 * Skip this buffer head if we are only zeroing unampped
+		 * regions of the page
+		 */
+		if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
+			buffer_mapped(bh))
+				goto next;
+
+		/* If the range is block aligned, unmap */
+		if (range_to_discard == blocksize) {
+			clear_buffer_dirty(bh);
+			bh->b_bdev = NULL;
+			clear_buffer_mapped(bh);
+			clear_buffer_req(bh);
+			clear_buffer_new(bh);
+			clear_buffer_delay(bh);
+			clear_buffer_unwritten(bh);
+			clear_buffer_uptodate(bh);
+			zero_user(page, pos, range_to_discard);
+			BUFFER_TRACE(bh, "Buffer discarded");
+			goto next;
+		}
+
+		/*
+		 * If this block is not completely contained in the range
+		 * to be discarded, then it is not going to be released. Because
+		 * we need to keep this block, we need to make sure this part
+		 * of the page is uptodate before we modify it by writeing
+		 * partial zeros on it.
+		 */
+		if (!buffer_mapped(bh)) {
+			/*
+			 * Buffer head must be mapped before we can read
+			 * from the block
+			 */
+			BUFFER_TRACE(bh, "unmapped");
+			ext4_get_block(inode, iblock, bh, 0);
+			/* unmapped? It's a hole - nothing to do */
+			if (!buffer_mapped(bh)) {
+				BUFFER_TRACE(bh, "still unmapped");
+				goto next;
+			}
+		}
+
+		/* Ok, it's mapped. Make sure it's up-to-date */
+		if (PageUptodate(page))
+			set_buffer_uptodate(bh);
+
+		if (!buffer_uptodate(bh)) {
+			err = -EIO;
+			ll_rw_block(READ, 1, &bh);
+			wait_on_buffer(bh);
+			/* Uhhuh. Read error. Complain and punt.*/
+			if (!buffer_uptodate(bh))
+				goto next;
+		}
+
+		if (ext4_should_journal_data(inode)) {
+			BUFFER_TRACE(bh, "get write access");
+			err = ext4_journal_get_write_access(handle, bh);
+			if (err)
+				goto next;
+		}
+
+		zero_user(page, pos, range_to_discard);
+
+		err = 0;
+		if (ext4_should_journal_data(inode)) {
+			err = ext4_handle_dirty_metadata(handle, inode, bh);
+		} else
+			mark_buffer_dirty(bh);
+
+		BUFFER_TRACE(bh, "Partial buffer zeroed");
+next:
+		bh = bh->b_this_page;
+		iblock++;
+		pos += range_to_discard;
+	}
+
+	return err;
+}
+
 /*
  * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
  * up to the end of the block which corresponds to `from'.
@@ -3005,7 +3376,7 @@
 	page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
 				   mapping_gfp_mask(mapping) & ~__GFP_FS);
 	if (!page)
-		return -EINVAL;
+		return -ENOMEM;
 
 	blocksize = inode->i_sb->s_blocksize;
 	max = blocksize - (offset & (blocksize - 1));
@@ -3074,11 +3445,8 @@
 	err = 0;
 	if (ext4_should_journal_data(inode)) {
 		err = ext4_handle_dirty_metadata(handle, inode, bh);
-	} else {
-		if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
-			err = ext4_jbd2_file_inode(handle, inode);
+	} else
 		mark_buffer_dirty(bh);
-	}
 
 unlock:
 	unlock_page(page);
@@ -3119,6 +3487,11 @@
 		return -ENOTSUPP;
 	}
 
+	if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
+		/* TODO: Add support for bigalloc file systems */
+		return -ENOTSUPP;
+	}
+
 	return ext4_ext_punch_hole(file, offset, length);
 }
 
@@ -4420,6 +4793,7 @@
 			  PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
 			unlock_page(page);
 			ret = VM_FAULT_SIGBUS;
+			ext4_journal_stop(handle);
 			goto out;
 		}
 		ext4_set_inode_state(inode, EXT4_STATE_JDATA);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f18bfe3..a567968 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -21,6 +21,7 @@
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
+	struct super_block *sb = inode->i_sb;
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int flags;
 
@@ -173,33 +174,8 @@
 		mnt_drop_write(filp->f_path.mnt);
 		return err;
 	}
-#ifdef CONFIG_JBD2_DEBUG
-	case EXT4_IOC_WAIT_FOR_READONLY:
-		/*
-		 * This is racy - by the time we're woken up and running,
-		 * the superblock could be released.  And the module could
-		 * have been unloaded.  So sue me.
-		 *
-		 * Returns 1 if it slept, else zero.
-		 */
-		{
-			struct super_block *sb = inode->i_sb;
-			DECLARE_WAITQUEUE(wait, current);
-			int ret = 0;
-
-			set_current_state(TASK_INTERRUPTIBLE);
-			add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
-			if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) {
-				schedule();
-				ret = 1;
-			}
-			remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
-			return ret;
-		}
-#endif
 	case EXT4_IOC_GROUP_EXTEND: {
 		ext4_fsblk_t n_blocks_count;
-		struct super_block *sb = inode->i_sb;
 		int err, err2=0;
 
 		err = ext4_resize_begin(sb);
@@ -209,6 +185,13 @@
 		if (get_user(n_blocks_count, (__u32 __user *)arg))
 			return -EFAULT;
 
+		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+			       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+			ext4_msg(sb, KERN_ERR,
+				 "Online resizing not supported with bigalloc");
+			return -EOPNOTSUPP;
+		}
+
 		err = mnt_want_write(filp->f_path.mnt);
 		if (err)
 			return err;
@@ -250,6 +233,13 @@
 			goto mext_out;
 		}
 
+		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+			       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+			ext4_msg(sb, KERN_ERR,
+				 "Online defrag not supported with bigalloc");
+			return -EOPNOTSUPP;
+		}
+
 		err = mnt_want_write(filp->f_path.mnt);
 		if (err)
 			goto mext_out;
@@ -270,7 +260,6 @@
 
 	case EXT4_IOC_GROUP_ADD: {
 		struct ext4_new_group_data input;
-		struct super_block *sb = inode->i_sb;
 		int err, err2=0;
 
 		err = ext4_resize_begin(sb);
@@ -281,6 +270,13 @@
 				sizeof(input)))
 			return -EFAULT;
 
+		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+			       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+			ext4_msg(sb, KERN_ERR,
+				 "Online resizing not supported with bigalloc");
+			return -EOPNOTSUPP;
+		}
+
 		err = mnt_want_write(filp->f_path.mnt);
 		if (err)
 			return err;
@@ -337,7 +333,6 @@
 
 	case FITRIM:
 	{
-		struct super_block *sb = inode->i_sb;
 		struct request_queue *q = bdev_get_queue(sb->s_bdev);
 		struct fstrim_range range;
 		int ret = 0;
@@ -348,7 +343,14 @@
 		if (!blk_queue_discard(q))
 			return -EOPNOTSUPP;
 
-		if (copy_from_user(&range, (struct fstrim_range *)arg,
+		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+			       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+			ext4_msg(sb, KERN_ERR,
+				 "FITRIM not supported with bigalloc");
+			return -EOPNOTSUPP;
+		}
+
+		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
 		    sizeof(range)))
 			return -EFAULT;
 
@@ -358,7 +360,7 @@
 		if (ret < 0)
 			return ret;
 
-		if (copy_to_user((struct fstrim_range *)arg, &range,
+		if (copy_to_user((struct fstrim_range __user *)arg, &range,
 		    sizeof(range)))
 			return -EFAULT;
 
@@ -396,11 +398,6 @@
 	case EXT4_IOC32_SETVERSION_OLD:
 		cmd = EXT4_IOC_SETVERSION_OLD;
 		break;
-#ifdef CONFIG_JBD2_DEBUG
-	case EXT4_IOC32_WAIT_FOR_READONLY:
-		cmd = EXT4_IOC_WAIT_FOR_READONLY;
-		break;
-#endif
 	case EXT4_IOC32_GETRSVSZ:
 		cmd = EXT4_IOC_GETRSVSZ;
 		break;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 17a5a57..e2d8be8 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -70,8 +70,8 @@
  *
  * pa_lstart -> the logical start block for this prealloc space
  * pa_pstart -> the physical start block for this prealloc space
- * pa_len    -> length for this prealloc space
- * pa_free   ->  free space available in this prealloc space
+ * pa_len    -> length for this prealloc space (in clusters)
+ * pa_free   ->  free space available in this prealloc space (in clusters)
  *
  * The inode preallocation space is used looking at the _logical_ start
  * block. If only the logical file block falls within the range of prealloc
@@ -126,7 +126,8 @@
  * list. In case of inode preallocation we follow a list of heuristics
  * based on file size. This can be found in ext4_mb_normalize_request. If
  * we are doing a group prealloc we try to normalize the request to
- * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
+ * sbi->s_mb_group_prealloc.  The default value of s_mb_group_prealloc is
+ * dependent on the cluster size; for non-bigalloc file systems, it is
  * 512 blocks. This can be tuned via
  * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
  * terms of number of blocks. If we have mounted the file system with -O
@@ -459,7 +460,7 @@
 			ext4_fsblk_t blocknr;
 
 			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
-			blocknr += first + i;
+			blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
 			ext4_grp_locked_error(sb, e4b->bd_group,
 					      inode ? inode->i_ino : 0,
 					      blocknr,
@@ -580,7 +581,7 @@
 				continue;
 			}
 
-			/* both bits in buddy2 must be 0 */
+			/* both bits in buddy2 must be 1 */
 			MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
 			MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
 
@@ -653,7 +654,7 @@
 	ext4_grpblk_t chunk;
 	unsigned short border;
 
-	BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
+	BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
 
 	border = 2 << sb->s_blocksize_bits;
 
@@ -705,7 +706,7 @@
 				void *buddy, void *bitmap, ext4_group_t group)
 {
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-	ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb);
+	ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
 	ext4_grpblk_t i = 0;
 	ext4_grpblk_t first;
 	ext4_grpblk_t len;
@@ -734,7 +735,7 @@
 
 	if (free != grp->bb_free) {
 		ext4_grp_locked_error(sb, group, 0, 0,
-				      "%u blocks in bitmap, %u in gd",
+				      "%u clusters in bitmap, %u in gd",
 				      free, grp->bb_free);
 		/*
 		 * If we intent to continue, we consider group descritor
@@ -1339,7 +1340,7 @@
 			ext4_fsblk_t blocknr;
 
 			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
-			blocknr += block;
+			blocknr += EXT4_C2B(EXT4_SB(sb), block);
 			ext4_grp_locked_error(sb, e4b->bd_group,
 					      inode ? inode->i_ino : 0,
 					      blocknr,
@@ -1390,7 +1391,6 @@
 {
 	int next = block;
 	int max;
-	int ord;
 	void *buddy;
 
 	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
@@ -1432,9 +1432,8 @@
 		if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
 			break;
 
-		ord = mb_find_order_for_block(e4b, next);
+		order = mb_find_order_for_block(e4b, next);
 
-		order = ord;
 		block = next >> order;
 		ex->fe_len += 1 << order;
 	}
@@ -1624,8 +1623,8 @@
 	struct ext4_free_extent *gex = &ac->ac_g_ex;
 
 	BUG_ON(ex->fe_len <= 0);
-	BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
-	BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+	BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
+	BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
 	BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
 
 	ac->ac_found++;
@@ -1823,15 +1822,15 @@
 
 	while (free && ac->ac_status == AC_STATUS_CONTINUE) {
 		i = mb_find_next_zero_bit(bitmap,
-						EXT4_BLOCKS_PER_GROUP(sb), i);
-		if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
+						EXT4_CLUSTERS_PER_GROUP(sb), i);
+		if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
 			/*
 			 * IF we have corrupt bitmap, we won't find any
 			 * free blocks even though group info says we
 			 * we have free blocks
 			 */
 			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
-					"%d free blocks as per "
+					"%d free clusters as per "
 					"group info. But bitmap says 0",
 					free);
 			break;
@@ -1841,7 +1840,7 @@
 		BUG_ON(ex.fe_len <= 0);
 		if (free < ex.fe_len) {
 			ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
-					"%d free blocks as per "
+					"%d free clusters as per "
 					"group info. But got %d blocks",
 					free, ex.fe_len);
 			/*
@@ -1887,7 +1886,7 @@
 	do_div(a, sbi->s_stripe);
 	i = (a * sbi->s_stripe) - first_group_block;
 
-	while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
+	while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
 		if (!mb_test_bit(i, bitmap)) {
 			max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
 			if (max >= sbi->s_stripe) {
@@ -2252,10 +2251,10 @@
 	 */
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		meta_group_info[i]->bb_free =
-			ext4_free_blocks_after_init(sb, group, desc);
+			ext4_free_clusters_after_init(sb, group, desc);
 	} else {
 		meta_group_info[i]->bb_free =
-			ext4_free_blks_count(sb, desc);
+			ext4_free_group_clusters(sb, desc);
 	}
 
 	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
@@ -2473,7 +2472,20 @@
 	sbi->s_mb_stats = MB_DEFAULT_STATS;
 	sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
 	sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
-	sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
+	/*
+	 * The default group preallocation is 512, which for 4k block
+	 * sizes translates to 2 megabytes.  However for bigalloc file
+	 * systems, this is probably too big (i.e, if the cluster size
+	 * is 1 megabyte, then group preallocation size becomes half a
+	 * gigabyte!).  As a default, we will keep a two megabyte
+	 * group pralloc size for cluster sizes up to 64k, and after
+	 * that, we will force a minimum group preallocation size of
+	 * 32 clusters.  This translates to 8 megs when the cluster
+	 * size is 256k, and 32 megs when the cluster size is 1 meg,
+	 * which seems reasonable as a default.
+	 */
+	sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
+				       sbi->s_cluster_bits, 32);
 	/*
 	 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
 	 * to the lowest multiple of s_stripe which is bigger than
@@ -2490,7 +2502,7 @@
 	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
 	if (sbi->s_locality_groups == NULL) {
 		ret = -ENOMEM;
-		goto out;
+		goto out_free_groupinfo_slab;
 	}
 	for_each_possible_cpu(i) {
 		struct ext4_locality_group *lg;
@@ -2503,9 +2515,8 @@
 
 	/* init file for buddy data */
 	ret = ext4_mb_init_backend(sb);
-	if (ret != 0) {
-		goto out;
-	}
+	if (ret != 0)
+		goto out_free_locality_groups;
 
 	if (sbi->s_proc)
 		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
@@ -2513,11 +2524,19 @@
 
 	if (sbi->s_journal)
 		sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+
+	return 0;
+
+out_free_locality_groups:
+	free_percpu(sbi->s_locality_groups);
+	sbi->s_locality_groups = NULL;
+out_free_groupinfo_slab:
+	ext4_groupinfo_destroy_slabs();
 out:
-	if (ret) {
-		kfree(sbi->s_mb_offsets);
-		kfree(sbi->s_mb_maxs);
-	}
+	kfree(sbi->s_mb_offsets);
+	sbi->s_mb_offsets = NULL;
+	kfree(sbi->s_mb_maxs);
+	sbi->s_mb_maxs = NULL;
 	return ret;
 }
 
@@ -2602,11 +2621,13 @@
 }
 
 static inline int ext4_issue_discard(struct super_block *sb,
-		ext4_group_t block_group, ext4_grpblk_t block, int count)
+		ext4_group_t block_group, ext4_grpblk_t cluster, int count)
 {
 	ext4_fsblk_t discard_block;
 
-	discard_block = block + ext4_group_first_block_no(sb, block_group);
+	discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
+			 ext4_group_first_block_no(sb, block_group));
+	count = EXT4_C2B(EXT4_SB(sb), count);
 	trace_ext4_discard_blocks(sb,
 			(unsigned long long) discard_block, count);
 	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
@@ -2633,7 +2654,7 @@
 
 		if (test_opt(sb, DISCARD))
 			ext4_issue_discard(sb, entry->group,
-					   entry->start_blk, entry->count);
+					   entry->start_cluster, entry->count);
 
 		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
 		/* we expect to find existing buddy because it's pinned */
@@ -2646,7 +2667,7 @@
 		ext4_lock_group(sb, entry->group);
 		/* Take it out of per group rb tree */
 		rb_erase(&entry->node, &(db->bb_free_root));
-		mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+		mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
 
 		/*
 		 * Clear the trimmed flag for the group so that the next
@@ -2752,7 +2773,7 @@
  */
 static noinline_for_stack int
 ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
-				handle_t *handle, unsigned int reserv_blks)
+				handle_t *handle, unsigned int reserv_clstrs)
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct ext4_group_desc *gdp;
@@ -2783,7 +2804,7 @@
 		goto out_err;
 
 	ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
-			ext4_free_blks_count(sb, gdp));
+			ext4_free_group_clusters(sb, gdp));
 
 	err = ext4_journal_get_write_access(handle, gdp_bh);
 	if (err)
@@ -2791,7 +2812,7 @@
 
 	block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
 
-	len = ac->ac_b_ex.fe_len;
+	len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
 	if (!ext4_data_block_valid(sbi, block, len)) {
 		ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
 			   "fs metadata\n", block, block+len);
@@ -2823,28 +2844,29 @@
 		      ac->ac_b_ex.fe_len);
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-		ext4_free_blks_set(sb, gdp,
-					ext4_free_blocks_after_init(sb,
-					ac->ac_b_ex.fe_group, gdp));
+		ext4_free_group_clusters_set(sb, gdp,
+					     ext4_free_clusters_after_init(sb,
+						ac->ac_b_ex.fe_group, gdp));
 	}
-	len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
-	ext4_free_blks_set(sb, gdp, len);
+	len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
+	ext4_free_group_clusters_set(sb, gdp, len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
 
 	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
+	percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
 	/*
 	 * Now reduce the dirty block count also. Should not go negative
 	 */
 	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
 		/* release all the reserved blocks if non delalloc */
-		percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
+		percpu_counter_sub(&sbi->s_dirtyclusters_counter,
+				   reserv_clstrs);
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi,
 							  ac->ac_b_ex.fe_group);
 		atomic_sub(ac->ac_b_ex.fe_len,
-			   &sbi->s_flex_groups[flex_group].free_blocks);
+			   &sbi->s_flex_groups[flex_group].free_clusters);
 	}
 
 	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -2886,6 +2908,7 @@
 ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 				struct ext4_allocation_request *ar)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	int bsbits, max;
 	ext4_lblk_t end;
 	loff_t size, orig_size, start_off;
@@ -2916,7 +2939,7 @@
 
 	/* first, let's learn actual file size
 	 * given current request is allocated */
-	size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
+	size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
 	size = size << bsbits;
 	if (size < i_size_read(ac->ac_inode))
 		size = i_size_read(ac->ac_inode);
@@ -2988,7 +3011,8 @@
 			continue;
 		}
 
-		pa_end = pa->pa_lstart + pa->pa_len;
+		pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
+						  pa->pa_len);
 
 		/* PA must not overlap original request */
 		BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
@@ -3018,9 +3042,11 @@
 	rcu_read_lock();
 	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
 		ext4_lblk_t pa_end;
+
 		spin_lock(&pa->pa_lock);
 		if (pa->pa_deleted == 0) {
-			pa_end = pa->pa_lstart + pa->pa_len;
+			pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
+							  pa->pa_len);
 			BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
 		}
 		spin_unlock(&pa->pa_lock);
@@ -3036,14 +3062,14 @@
 	}
 	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
 			start > ac->ac_o_ex.fe_logical);
-	BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+	BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
 
 	/* now prepare goal request */
 
 	/* XXX: is it better to align blocks WRT to logical
 	 * placement or satisfy big request as is */
 	ac->ac_g_ex.fe_logical = start;
-	ac->ac_g_ex.fe_len = size;
+	ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
 
 	/* define goal start in order to merge */
 	if (ar->pright && (ar->lright == (start + size))) {
@@ -3112,14 +3138,16 @@
 static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
 				struct ext4_prealloc_space *pa)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	ext4_fsblk_t start;
 	ext4_fsblk_t end;
 	int len;
 
 	/* found preallocated blocks, use them */
 	start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
-	end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len);
-	len = end - start;
+	end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
+		  start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
+	len = EXT4_NUM_B2C(sbi, end - start);
 	ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
 					&ac->ac_b_ex.fe_start);
 	ac->ac_b_ex.fe_len = len;
@@ -3127,7 +3155,7 @@
 	ac->ac_pa = pa;
 
 	BUG_ON(start < pa->pa_pstart);
-	BUG_ON(start + len > pa->pa_pstart + pa->pa_len);
+	BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
 	BUG_ON(pa->pa_free < len);
 	pa->pa_free -= len;
 
@@ -3193,6 +3221,7 @@
 static noinline_for_stack int
 ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	int order, i;
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
 	struct ext4_locality_group *lg;
@@ -3210,12 +3239,14 @@
 		/* all fields in this condition don't change,
 		 * so we can skip locking for them */
 		if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
-			ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
+		    ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
+					       EXT4_C2B(sbi, pa->pa_len)))
 			continue;
 
 		/* non-extent files can't have physical blocks past 2^32 */
 		if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
-			pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
+		    (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
+		     EXT4_MAX_BLOCK_FILE_PHYS))
 			continue;
 
 		/* found preallocated blocks, use them */
@@ -3291,7 +3322,7 @@
 
 	while (n) {
 		entry = rb_entry(n, struct ext4_free_data, node);
-		ext4_set_bits(bitmap, entry->start_blk, entry->count);
+		ext4_set_bits(bitmap, entry->start_cluster, entry->count);
 		n = rb_next(n);
 	}
 	return;
@@ -3312,7 +3343,6 @@
 	ext4_group_t groupnr;
 	ext4_grpblk_t start;
 	int preallocated = 0;
-	int count = 0;
 	int len;
 
 	/* all form of preallocation discards first load group,
@@ -3335,7 +3365,6 @@
 		BUG_ON(groupnr != group);
 		ext4_set_bits(bitmap, start, len);
 		preallocated += len;
-		count++;
 	}
 	mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
 }
@@ -3412,6 +3441,7 @@
 ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_prealloc_space *pa;
 	struct ext4_group_info *grp;
 	struct ext4_inode_info *ei;
@@ -3443,16 +3473,18 @@
 		winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
 
 		/* also, we should cover whole original request */
-		wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len;
+		wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
 
 		/* the smallest one defines real window */
 		win = min(winl, wins);
 
-		offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len;
+		offs = ac->ac_o_ex.fe_logical %
+			EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
 		if (offs && offs < win)
 			win = offs;
 
-		ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win;
+		ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
+			EXT4_B2C(sbi, win);
 		BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
 		BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
 	}
@@ -3477,7 +3509,7 @@
 	trace_ext4_mb_new_inode_pa(ac, pa);
 
 	ext4_mb_use_inode_pa(ac, pa);
-	atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
+	atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
 
 	ei = EXT4_I(ac->ac_inode);
 	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
@@ -3592,7 +3624,7 @@
 
 	BUG_ON(pa->pa_deleted == 0);
 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
-	grp_blk_start = pa->pa_pstart - bit;
+	grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
 	end = bit + pa->pa_len;
 
@@ -3607,7 +3639,8 @@
 		free += next - bit;
 
 		trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
-		trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit,
+		trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
+						    EXT4_C2B(sbi, bit)),
 					       next - bit);
 		mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
 		bit = next + 1;
@@ -3690,7 +3723,7 @@
 	}
 
 	if (needed == 0)
-		needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
+		needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
 
 	INIT_LIST_HEAD(&list);
 repeat:
@@ -3958,7 +3991,7 @@
 	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
 		return;
 
-	size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
+	size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
 	isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
 		>> bsbits;
 
@@ -3969,6 +4002,11 @@
 		return;
 	}
 
+	if (sbi->s_mb_group_prealloc <= 0) {
+		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+		return;
+	}
+
 	/* don't use group allocation for large files */
 	size = max(size, isize);
 	if (size > sbi->s_mb_stream_request) {
@@ -4007,8 +4045,8 @@
 	len = ar->len;
 
 	/* just a dirty hack to filter too big requests  */
-	if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10)
-		len = EXT4_BLOCKS_PER_GROUP(sb) - 10;
+	if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
+		len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
 
 	/* start searching from the goal */
 	goal = ar->goal;
@@ -4019,18 +4057,15 @@
 
 	/* set up allocation goals */
 	memset(ac, 0, sizeof(struct ext4_allocation_context));
-	ac->ac_b_ex.fe_logical = ar->logical;
+	ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
 	ac->ac_status = AC_STATUS_CONTINUE;
 	ac->ac_sb = sb;
 	ac->ac_inode = ar->inode;
-	ac->ac_o_ex.fe_logical = ar->logical;
+	ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
 	ac->ac_o_ex.fe_group = group;
 	ac->ac_o_ex.fe_start = block;
 	ac->ac_o_ex.fe_len = len;
-	ac->ac_g_ex.fe_logical = ar->logical;
-	ac->ac_g_ex.fe_group = group;
-	ac->ac_g_ex.fe_start = block;
-	ac->ac_g_ex.fe_len = len;
+	ac->ac_g_ex = ac->ac_o_ex;
 	ac->ac_flags = ar->flags;
 
 	/* we have to define context: we'll we work with a file or
@@ -4182,13 +4217,14 @@
  */
 static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	struct ext4_prealloc_space *pa = ac->ac_pa;
 	if (pa) {
 		if (pa->pa_type == MB_GROUP_PA) {
 			/* see comment in ext4_mb_use_group_pa() */
 			spin_lock(&pa->pa_lock);
-			pa->pa_pstart += ac->ac_b_ex.fe_len;
-			pa->pa_lstart += ac->ac_b_ex.fe_len;
+			pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
+			pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
 			pa->pa_free -= ac->ac_b_ex.fe_len;
 			pa->pa_len -= ac->ac_b_ex.fe_len;
 			spin_unlock(&pa->pa_lock);
@@ -4249,13 +4285,17 @@
 	struct super_block *sb;
 	ext4_fsblk_t block = 0;
 	unsigned int inquota = 0;
-	unsigned int reserv_blks = 0;
+	unsigned int reserv_clstrs = 0;
 
 	sb = ar->inode->i_sb;
 	sbi = EXT4_SB(sb);
 
 	trace_ext4_request_blocks(ar);
 
+	/* Allow to use superuser reservation for quota file */
+	if (IS_NOQUOTA(ar->inode))
+		ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
+
 	/*
 	 * For delayed allocation, we could skip the ENOSPC and
 	 * EDQUOT check, as blocks and quotas have been already
@@ -4269,7 +4309,7 @@
 		 * and verify allocation doesn't exceed the quota limits.
 		 */
 		while (ar->len &&
-			ext4_claim_free_blocks(sbi, ar->len, ar->flags)) {
+			ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
 
 			/* let others to free the space */
 			yield();
@@ -4279,12 +4319,14 @@
 			*errp = -ENOSPC;
 			return 0;
 		}
-		reserv_blks = ar->len;
+		reserv_clstrs = ar->len;
 		if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
-			dquot_alloc_block_nofail(ar->inode, ar->len);
+			dquot_alloc_block_nofail(ar->inode,
+						 EXT4_C2B(sbi, ar->len));
 		} else {
 			while (ar->len &&
-				dquot_alloc_block(ar->inode, ar->len)) {
+				dquot_alloc_block(ar->inode,
+						  EXT4_C2B(sbi, ar->len))) {
 
 				ar->flags |= EXT4_MB_HINT_NOPREALLOC;
 				ar->len--;
@@ -4328,7 +4370,7 @@
 			ext4_mb_new_preallocation(ac);
 	}
 	if (likely(ac->ac_status == AC_STATUS_FOUND)) {
-		*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
+		*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
 		if (*errp == -EAGAIN) {
 			/*
 			 * drop the reference that we took
@@ -4364,13 +4406,13 @@
 	if (ac)
 		kmem_cache_free(ext4_ac_cachep, ac);
 	if (inquota && ar->len < inquota)
-		dquot_free_block(ar->inode, inquota - ar->len);
+		dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
 	if (!ar->len) {
 		if (!ext4_test_inode_state(ar->inode,
 					   EXT4_STATE_DELALLOC_RESERVED))
 			/* release all the reserved blocks if non delalloc */
-			percpu_counter_sub(&sbi->s_dirtyblocks_counter,
-						reserv_blks);
+			percpu_counter_sub(&sbi->s_dirtyclusters_counter,
+						reserv_clstrs);
 	}
 
 	trace_ext4_allocate_blocks(ar, (unsigned long long)block);
@@ -4388,7 +4430,7 @@
 {
 	if ((entry1->t_tid == entry2->t_tid) &&
 	    (entry1->group == entry2->group) &&
-	    ((entry1->start_blk + entry1->count) == entry2->start_blk))
+	    ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
 		return 1;
 	return 0;
 }
@@ -4398,7 +4440,7 @@
 		      struct ext4_free_data *new_entry)
 {
 	ext4_group_t group = e4b->bd_group;
-	ext4_grpblk_t block;
+	ext4_grpblk_t cluster;
 	struct ext4_free_data *entry;
 	struct ext4_group_info *db = e4b->bd_info;
 	struct super_block *sb = e4b->bd_sb;
@@ -4411,7 +4453,7 @@
 	BUG_ON(e4b->bd_buddy_page == NULL);
 
 	new_node = &new_entry->node;
-	block = new_entry->start_blk;
+	cluster = new_entry->start_cluster;
 
 	if (!*n) {
 		/* first free block exent. We need to
@@ -4425,13 +4467,14 @@
 	while (*n) {
 		parent = *n;
 		entry = rb_entry(parent, struct ext4_free_data, node);
-		if (block < entry->start_blk)
+		if (cluster < entry->start_cluster)
 			n = &(*n)->rb_left;
-		else if (block >= (entry->start_blk + entry->count))
+		else if (cluster >= (entry->start_cluster + entry->count))
 			n = &(*n)->rb_right;
 		else {
 			ext4_grp_locked_error(sb, group, 0,
-				ext4_group_first_block_no(sb, group) + block,
+				ext4_group_first_block_no(sb, group) +
+				EXT4_C2B(sbi, cluster),
 				"Block already on to-be-freed list");
 			return 0;
 		}
@@ -4445,7 +4488,7 @@
 	if (node) {
 		entry = rb_entry(node, struct ext4_free_data, node);
 		if (can_merge(entry, new_entry)) {
-			new_entry->start_blk = entry->start_blk;
+			new_entry->start_cluster = entry->start_cluster;
 			new_entry->count += entry->count;
 			rb_erase(node, &(db->bb_free_root));
 			spin_lock(&sbi->s_md_lock);
@@ -4496,6 +4539,7 @@
 	ext4_group_t block_group;
 	struct ext4_sb_info *sbi;
 	struct ext4_buddy e4b;
+	unsigned int count_clusters;
 	int err = 0;
 	int ret;
 
@@ -4544,6 +4588,38 @@
 	if (!ext4_should_writeback_data(inode))
 		flags |= EXT4_FREE_BLOCKS_METADATA;
 
+	/*
+	 * If the extent to be freed does not begin on a cluster
+	 * boundary, we need to deal with partial clusters at the
+	 * beginning and end of the extent.  Normally we will free
+	 * blocks at the beginning or the end unless we are explicitly
+	 * requested to avoid doing so.
+	 */
+	overflow = block & (sbi->s_cluster_ratio - 1);
+	if (overflow) {
+		if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
+			overflow = sbi->s_cluster_ratio - overflow;
+			block += overflow;
+			if (count > overflow)
+				count -= overflow;
+			else
+				return;
+		} else {
+			block -= overflow;
+			count += overflow;
+		}
+	}
+	overflow = count & (sbi->s_cluster_ratio - 1);
+	if (overflow) {
+		if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
+			if (count > overflow)
+				count -= overflow;
+			else
+				return;
+		} else
+			count += sbi->s_cluster_ratio - overflow;
+	}
+
 do_more:
 	overflow = 0;
 	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4552,10 +4628,12 @@
 	 * Check to see if we are freeing blocks across a group
 	 * boundary.
 	 */
-	if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
-		overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
+	if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
+		overflow = EXT4_C2B(sbi, bit) + count -
+			EXT4_BLOCKS_PER_GROUP(sb);
 		count -= overflow;
 	}
+	count_clusters = EXT4_B2C(sbi, count);
 	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
 	if (!bitmap_bh) {
 		err = -EIO;
@@ -4570,9 +4648,9 @@
 	if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
 	    in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
 	    in_range(block, ext4_inode_table(sb, gdp),
-		      EXT4_SB(sb)->s_itb_per_group) ||
+		     EXT4_SB(sb)->s_itb_per_group) ||
 	    in_range(block + count - 1, ext4_inode_table(sb, gdp),
-		      EXT4_SB(sb)->s_itb_per_group)) {
+		     EXT4_SB(sb)->s_itb_per_group)) {
 
 		ext4_error(sb, "Freeing blocks in system zone - "
 			   "Block = %llu, count = %lu", block, count);
@@ -4597,11 +4675,11 @@
 #ifdef AGGRESSIVE_CHECK
 	{
 		int i;
-		for (i = 0; i < count; i++)
+		for (i = 0; i < count_clusters; i++)
 			BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
 	}
 #endif
-	trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
+	trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
 
 	err = ext4_mb_load_buddy(sb, block_group, &e4b);
 	if (err)
@@ -4618,13 +4696,13 @@
 			err = -ENOMEM;
 			goto error_return;
 		}
-		new_entry->start_blk = bit;
+		new_entry->start_cluster = bit;
 		new_entry->group  = block_group;
-		new_entry->count = count;
+		new_entry->count = count_clusters;
 		new_entry->t_tid = handle->h_transaction->t_tid;
 
 		ext4_lock_group(sb, block_group);
-		mb_clear_bits(bitmap_bh->b_data, bit, count);
+		mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
 		ext4_mb_free_metadata(handle, &e4b, new_entry);
 	} else {
 		/* need to update group_info->bb_free and bitmap
@@ -4632,25 +4710,29 @@
 		 * them with group lock_held
 		 */
 		ext4_lock_group(sb, block_group);
-		mb_clear_bits(bitmap_bh->b_data, bit, count);
-		mb_free_blocks(inode, &e4b, bit, count);
+		mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
+		mb_free_blocks(inode, &e4b, bit, count_clusters);
 	}
 
-	ret = ext4_free_blks_count(sb, gdp) + count;
-	ext4_free_blks_set(sb, gdp, ret);
+	ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
+	ext4_free_group_clusters_set(sb, gdp, ret);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
 	ext4_unlock_group(sb, block_group);
-	percpu_counter_add(&sbi->s_freeblocks_counter, count);
+	percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-		atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
+		atomic_add(count_clusters,
+			   &sbi->s_flex_groups[flex_group].free_clusters);
 	}
 
 	ext4_mb_unload_buddy(&e4b);
 
 	freed += count;
 
+	if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
+		dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
+
 	/* We dirtied the bitmap block */
 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
 	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4669,8 +4751,6 @@
 	}
 	ext4_mark_super_dirty(sb);
 error_return:
-	if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
-		dquot_free_block(inode, freed);
 	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
 	return;
@@ -4778,16 +4858,17 @@
 	ext4_lock_group(sb, block_group);
 	mb_clear_bits(bitmap_bh->b_data, bit, count);
 	mb_free_blocks(NULL, &e4b, bit, count);
-	blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
-	ext4_free_blks_set(sb, desc, blk_free_count);
+	blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
+	ext4_free_group_clusters_set(sb, desc, blk_free_count);
 	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
 	ext4_unlock_group(sb, block_group);
-	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
+	percpu_counter_add(&sbi->s_freeclusters_counter,
+			   EXT4_B2C(sbi, blocks_freed));
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-		atomic_add(blocks_freed,
-			   &sbi->s_flex_groups[flex_group].free_blocks);
+		atomic_add(EXT4_B2C(sbi, blocks_freed),
+			   &sbi->s_flex_groups[flex_group].free_clusters);
 	}
 
 	ext4_mb_unload_buddy(&e4b);
@@ -4948,7 +5029,7 @@
 	struct ext4_group_info *grp;
 	ext4_group_t first_group, last_group;
 	ext4_group_t group, ngroups = ext4_get_groups_count(sb);
-	ext4_grpblk_t cnt = 0, first_block, last_block;
+	ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
 	uint64_t start, len, minlen, trimmed = 0;
 	ext4_fsblk_t first_data_blk =
 			le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
@@ -4958,7 +5039,7 @@
 	len = range->len >> sb->s_blocksize_bits;
 	minlen = range->minlen >> sb->s_blocksize_bits;
 
-	if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
+	if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)))
 		return -EINVAL;
 	if (start + len <= first_data_blk)
 		goto out;
@@ -4969,11 +5050,11 @@
 
 	/* Determine first and last group to examine based on start and len */
 	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
-				     &first_group, &first_block);
+				     &first_group, &first_cluster);
 	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
-				     &last_group, &last_block);
+				     &last_group, &last_cluster);
 	last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
-	last_block = EXT4_BLOCKS_PER_GROUP(sb);
+	last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
 
 	if (first_group > last_group)
 		return -EINVAL;
@@ -4993,20 +5074,20 @@
 		 * change it for the last group in which case start +
 		 * len < EXT4_BLOCKS_PER_GROUP(sb).
 		 */
-		if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
-			last_block = first_block + len;
-		len -= last_block - first_block;
+		if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb))
+			last_cluster = first_cluster + len;
+		len -= last_cluster - first_cluster;
 
 		if (grp->bb_free >= minlen) {
-			cnt = ext4_trim_all_free(sb, group, first_block,
-						last_block, minlen);
+			cnt = ext4_trim_all_free(sb, group, first_cluster,
+						last_cluster, minlen);
 			if (cnt < 0) {
 				ret = cnt;
 				break;
 			}
 		}
 		trimmed += cnt;
-		first_block = 0;
+		first_cluster = 0;
 	}
 	range->len = trimmed * sb->s_blocksize;
 
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 9d4a636..47705f3 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -106,7 +106,7 @@
 	ext4_group_t group;
 
 	/* free block extent */
-	ext4_grpblk_t start_blk;
+	ext4_grpblk_t start_cluster;
 	ext4_grpblk_t count;
 
 	/* transaction which freed this extent */
@@ -139,9 +139,9 @@
 
 struct ext4_free_extent {
 	ext4_lblk_t fe_logical;
-	ext4_grpblk_t fe_start;
+	ext4_grpblk_t fe_start;	/* In cluster units */
 	ext4_group_t fe_group;
-	ext4_grpblk_t fe_len;
+	ext4_grpblk_t fe_len;	/* In cluster units */
 };
 
 /*
@@ -175,7 +175,7 @@
 	/* the best found extent */
 	struct ext4_free_extent ac_b_ex;
 
-	/* copy of the bext found extent taken before preallocation efforts */
+	/* copy of the best found extent taken before preallocation efforts */
 	struct ext4_free_extent ac_f_ex;
 
 	/* number of iterations done. we have to track to limit searching */
@@ -216,6 +216,7 @@
 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
 					struct ext4_free_extent *fex)
 {
-	return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start;
+	return ext4_group_first_block_no(sb, fex->fe_group) +
+		(fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
 }
 #endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 667cc88..16ac228 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -15,19 +15,18 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include "ext4_jbd2.h"
-#include "ext4_extents.h"
 
 /*
  * The contiguous blocks details which can be
  * represented by a single extent
  */
-struct list_blocks_struct {
-	ext4_lblk_t first_block, last_block;
+struct migrate_struct {
+	ext4_lblk_t first_block, last_block, curr_block;
 	ext4_fsblk_t first_pblock, last_pblock;
 };
 
 static int finish_range(handle_t *handle, struct inode *inode,
-				struct list_blocks_struct *lb)
+				struct migrate_struct *lb)
 
 {
 	int retval = 0, needed;
@@ -87,8 +86,7 @@
 }
 
 static int update_extent_range(handle_t *handle, struct inode *inode,
-				ext4_fsblk_t pblock, ext4_lblk_t blk_num,
-				struct list_blocks_struct *lb)
+			       ext4_fsblk_t pblock, struct migrate_struct *lb)
 {
 	int retval;
 	/*
@@ -96,9 +94,10 @@
 	 */
 	if (lb->first_pblock &&
 		(lb->last_pblock+1 == pblock) &&
-		(lb->last_block+1 == blk_num)) {
+		(lb->last_block+1 == lb->curr_block)) {
 		lb->last_pblock = pblock;
-		lb->last_block = blk_num;
+		lb->last_block = lb->curr_block;
+		lb->curr_block++;
 		return 0;
 	}
 	/*
@@ -106,64 +105,49 @@
 	 */
 	retval = finish_range(handle, inode, lb);
 	lb->first_pblock = lb->last_pblock = pblock;
-	lb->first_block = lb->last_block = blk_num;
-
+	lb->first_block = lb->last_block = lb->curr_block;
+	lb->curr_block++;
 	return retval;
 }
 
 static int update_ind_extent_range(handle_t *handle, struct inode *inode,
-				   ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
-				   struct list_blocks_struct *lb)
+				   ext4_fsblk_t pblock,
+				   struct migrate_struct *lb)
 {
 	struct buffer_head *bh;
 	__le32 *i_data;
 	int i, retval = 0;
-	ext4_lblk_t blk_count = *blk_nump;
 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 
-	if (!pblock) {
-		/* Only update the file block number */
-		*blk_nump += max_entries;
-		return 0;
-	}
-
 	bh = sb_bread(inode->i_sb, pblock);
 	if (!bh)
 		return -EIO;
 
 	i_data = (__le32 *)bh->b_data;
-	for (i = 0; i < max_entries; i++, blk_count++) {
+	for (i = 0; i < max_entries; i++) {
 		if (i_data[i]) {
 			retval = update_extent_range(handle, inode,
-						le32_to_cpu(i_data[i]),
-						blk_count, lb);
+						le32_to_cpu(i_data[i]), lb);
 			if (retval)
 				break;
+		} else {
+			lb->curr_block++;
 		}
 	}
-
-	/* Update the file block number */
-	*blk_nump = blk_count;
 	put_bh(bh);
 	return retval;
 
 }
 
 static int update_dind_extent_range(handle_t *handle, struct inode *inode,
-				    ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
-				    struct list_blocks_struct *lb)
+				    ext4_fsblk_t pblock,
+				    struct migrate_struct *lb)
 {
 	struct buffer_head *bh;
 	__le32 *i_data;
 	int i, retval = 0;
-	ext4_lblk_t blk_count = *blk_nump;
 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 
-	if (!pblock) {
-		/* Only update the file block number */
-		*blk_nump += max_entries * max_entries;
-		return 0;
-	}
 	bh = sb_bread(inode->i_sb, pblock);
 	if (!bh)
 		return -EIO;
@@ -172,38 +156,28 @@
 	for (i = 0; i < max_entries; i++) {
 		if (i_data[i]) {
 			retval = update_ind_extent_range(handle, inode,
-						le32_to_cpu(i_data[i]),
-						&blk_count, lb);
+						le32_to_cpu(i_data[i]), lb);
 			if (retval)
 				break;
 		} else {
 			/* Only update the file block number */
-			blk_count += max_entries;
+			lb->curr_block += max_entries;
 		}
 	}
-
-	/* Update the file block number */
-	*blk_nump = blk_count;
 	put_bh(bh);
 	return retval;
 
 }
 
 static int update_tind_extent_range(handle_t *handle, struct inode *inode,
-				     ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
-				     struct list_blocks_struct *lb)
+				    ext4_fsblk_t pblock,
+				    struct migrate_struct *lb)
 {
 	struct buffer_head *bh;
 	__le32 *i_data;
 	int i, retval = 0;
-	ext4_lblk_t blk_count = *blk_nump;
 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 
-	if (!pblock) {
-		/* Only update the file block number */
-		*blk_nump += max_entries * max_entries * max_entries;
-		return 0;
-	}
 	bh = sb_bread(inode->i_sb, pblock);
 	if (!bh)
 		return -EIO;
@@ -212,16 +186,14 @@
 	for (i = 0; i < max_entries; i++) {
 		if (i_data[i]) {
 			retval = update_dind_extent_range(handle, inode,
-						le32_to_cpu(i_data[i]),
-						&blk_count, lb);
+						le32_to_cpu(i_data[i]), lb);
 			if (retval)
 				break;
-		} else
+		} else {
 			/* Only update the file block number */
-			blk_count += max_entries * max_entries;
+			lb->curr_block += max_entries * max_entries;
+		}
 	}
-	/* Update the file block number */
-	*blk_nump = blk_count;
 	put_bh(bh);
 	return retval;
 
@@ -462,12 +434,12 @@
 	handle_t *handle;
 	int retval = 0, i;
 	__le32 *i_data;
-	ext4_lblk_t blk_count = 0;
 	struct ext4_inode_info *ei;
 	struct inode *tmp_inode = NULL;
-	struct list_blocks_struct lb;
+	struct migrate_struct lb;
 	unsigned long max_entries;
 	__u32 goal;
+	uid_t owner[2];
 
 	/*
 	 * If the filesystem does not support extents, or the inode
@@ -495,10 +467,12 @@
 	}
 	goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
 		EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
+	owner[0] = inode->i_uid;
+	owner[1] = inode->i_gid;
 	tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
-				   S_IFREG, NULL, goal);
+				   S_IFREG, NULL, goal, owner);
 	if (IS_ERR(tmp_inode)) {
-		retval = -ENOMEM;
+		retval = PTR_ERR(inode);
 		ext4_journal_stop(handle);
 		return retval;
 	}
@@ -551,35 +525,32 @@
 
 	/* 32 bit block address 4 bytes */
 	max_entries = inode->i_sb->s_blocksize >> 2;
-	for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
+	for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
 		if (i_data[i]) {
 			retval = update_extent_range(handle, tmp_inode,
-						le32_to_cpu(i_data[i]),
-						blk_count, &lb);
+						le32_to_cpu(i_data[i]), &lb);
 			if (retval)
 				goto err_out;
-		}
+		} else
+			lb.curr_block++;
 	}
 	if (i_data[EXT4_IND_BLOCK]) {
 		retval = update_ind_extent_range(handle, tmp_inode,
-					le32_to_cpu(i_data[EXT4_IND_BLOCK]),
-					&blk_count, &lb);
+				le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
 			if (retval)
 				goto err_out;
 	} else
-		blk_count +=  max_entries;
+		lb.curr_block += max_entries;
 	if (i_data[EXT4_DIND_BLOCK]) {
 		retval = update_dind_extent_range(handle, tmp_inode,
-					le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
-					&blk_count, &lb);
+				le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
 			if (retval)
 				goto err_out;
 	} else
-		blk_count += max_entries * max_entries;
+		lb.curr_block += max_entries * max_entries;
 	if (i_data[EXT4_TIND_BLOCK]) {
 		retval = update_tind_extent_range(handle, tmp_inode,
-					le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
-					&blk_count, &lb);
+				le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
 			if (retval)
 				goto err_out;
 	}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 9bdef3f..7ea4ba4 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -109,7 +109,7 @@
 	mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 	bdevname(bh->b_bdev, mmp->mmp_bdevname);
 
-	memcpy(mmp->mmp_nodename, init_utsname()->sysname,
+	memcpy(mmp->mmp_nodename, init_utsname()->nodename,
 	       sizeof(mmp->mmp_nodename));
 
 	while (!kthread_should_stop()) {
@@ -125,8 +125,9 @@
 		 * Don't spew too many error messages. Print one every
 		 * (s_mmp_update_interval * 60) seconds.
 		 */
-		if (retval && (failed_writes % 60) == 0) {
-			ext4_error(sb, "Error writing to MMP block");
+		if (retval) {
+			if ((failed_writes % 60) == 0)
+				ext4_error(sb, "Error writing to MMP block");
 			failed_writes++;
 		}
 
@@ -295,7 +296,8 @@
 	/*
 	 * write a new random sequence number.
 	 */
-	mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
+	seq = mmp_new_seq();
+	mmp->mmp_seq = cpu_to_le32(seq);
 
 	retval = write_mmp_block(bh);
 	if (retval)
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index f57455a..c5826c6 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -17,7 +17,6 @@
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include "ext4_jbd2.h"
-#include "ext4_extents.h"
 #include "ext4.h"
 
 /**
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5f7fb46..aa4c782 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1586,7 +1586,7 @@
 			dxtrace(dx_show_index("node", frames[1].entries));
 			dxtrace(dx_show_index("node",
 			       ((struct dx_node *) bh2->b_data)->entries));
-			err = ext4_handle_dirty_metadata(handle, inode, bh2);
+			err = ext4_handle_dirty_metadata(handle, dir, bh2);
 			if (err)
 				goto journal_error;
 			brelse (bh2);
@@ -1612,7 +1612,7 @@
 			if (err)
 				goto journal_error;
 		}
-		err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
+		err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
 		if (err) {
 			ext4_std_error(inode->i_sb, err);
 			goto cleanup;
@@ -1707,9 +1707,8 @@
  */
 static void ext4_dec_count(handle_t *handle, struct inode *inode)
 {
-	drop_nlink(inode);
-	if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
-		inc_nlink(inode);
+	if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
+		drop_nlink(inode);
 }
 
 
@@ -1756,7 +1755,7 @@
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
-	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
+	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		inode->i_op = &ext4_file_inode_operations;
@@ -1792,7 +1791,7 @@
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
-	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
+	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		init_special_inode(inode, inode->i_mode, rdev);
@@ -1832,7 +1831,7 @@
 		ext4_handle_sync(handle);
 
 	inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
-			       &dentry->d_name, 0);
+			       &dentry->d_name, 0, NULL);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_stop;
@@ -1863,7 +1862,7 @@
 	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
 	set_nlink(inode, 2);
 	BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
-	err = ext4_handle_dirty_metadata(handle, dir, dir_block);
+	err = ext4_handle_dirty_metadata(handle, inode, dir_block);
 	if (err)
 		goto out_clear_inode;
 	err = ext4_mark_inode_dirty(handle, inode);
@@ -2279,7 +2278,7 @@
 		ext4_handle_sync(handle);
 
 	inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
-			       &dentry->d_name, 0);
+			       &dentry->d_name, 0, NULL);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_stop;
@@ -2530,7 +2529,7 @@
 		PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
 						cpu_to_le32(new_dir->i_ino);
 		BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
-		retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
+		retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
 		if (retval) {
 			ext4_std_error(old_dir->i_sb, retval);
 			goto end_rename;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 92f38ee..7ce1d0b 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -70,7 +70,6 @@
 void ext4_free_io_end(ext4_io_end_t *io)
 {
 	int i;
-	wait_queue_head_t *wq;
 
 	BUG_ON(!io);
 	if (io->page)
@@ -78,56 +77,43 @@
 	for (i = 0; i < io->num_io_pages; i++)
 		put_io_page(io->pages[i]);
 	io->num_io_pages = 0;
-	wq = ext4_ioend_wq(io->inode);
-	if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
-	    waitqueue_active(wq))
-		wake_up_all(wq);
+	if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
+		wake_up_all(ext4_ioend_wq(io->inode));
 	kmem_cache_free(io_end_cachep, io);
 }
 
 /*
  * check a range of space and convert unwritten extents to written.
+ *
+ * Called with inode->i_mutex; we depend on this when we manipulate
+ * io->flag, since we could otherwise race with ext4_flush_completed_IO()
  */
 int ext4_end_io_nolock(ext4_io_end_t *io)
 {
 	struct inode *inode = io->inode;
 	loff_t offset = io->offset;
 	ssize_t size = io->size;
-	wait_queue_head_t *wq;
 	int ret = 0;
 
 	ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
 		   "list->prev 0x%p\n",
 		   io, inode->i_ino, io->list.next, io->list.prev);
 
-	if (list_empty(&io->list))
-		return ret;
-
-	if (!(io->flag & EXT4_IO_END_UNWRITTEN))
-		return ret;
-
 	ret = ext4_convert_unwritten_extents(inode, offset, size);
 	if (ret < 0) {
-		printk(KERN_EMERG "%s: failed to convert unwritten "
-			"extents to written extents, error is %d "
-			"io is still on inode %lu aio dio list\n",
-		       __func__, ret, inode->i_ino);
-		return ret;
+		ext4_msg(inode->i_sb, KERN_EMERG,
+			 "failed to convert unwritten extents to written "
+			 "extents -- potential data loss!  "
+			 "(inode %lu, offset %llu, size %zd, error %d)",
+			 inode->i_ino, offset, size, ret);
 	}
 
 	if (io->iocb)
 		aio_complete(io->iocb, io->result, 0);
-	/* clear the DIO AIO unwritten flag */
-	if (io->flag & EXT4_IO_END_UNWRITTEN) {
-		io->flag &= ~EXT4_IO_END_UNWRITTEN;
-		/* Wake up anyone waiting on unwritten extent conversion */
-		wq = ext4_ioend_wq(io->inode);
-		if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
-		    waitqueue_active(wq)) {
-			wake_up_all(wq);
-		}
-	}
 
+	/* Wake up anyone waiting on unwritten extent conversion */
+	if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
+		wake_up_all(ext4_ioend_wq(io->inode));
 	return ret;
 }
 
@@ -140,9 +126,15 @@
 	struct inode		*inode = io->inode;
 	struct ext4_inode_info	*ei = EXT4_I(inode);
 	unsigned long		flags;
-	int			ret;
+
+	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+	if (list_empty(&io->list)) {
+		spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+		goto free;
+	}
 
 	if (!mutex_trylock(&inode->i_mutex)) {
+		spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
 		/*
 		 * Requeue the work instead of waiting so that the work
 		 * items queued after this can be processed.
@@ -159,17 +151,11 @@
 		io->flag |= EXT4_IO_END_QUEUED;
 		return;
 	}
-	ret = ext4_end_io_nolock(io);
-	if (ret < 0) {
-		mutex_unlock(&inode->i_mutex);
-		return;
-	}
-
-	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
-	if (!list_empty(&io->list))
-		list_del_init(&io->list);
+	list_del_init(&io->list);
 	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+	(void) ext4_end_io_nolock(io);
 	mutex_unlock(&inode->i_mutex);
+free:
 	ext4_free_io_end(io);
 }
 
@@ -350,10 +336,8 @@
 	if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
 	    (io_end->pages[io_end->num_io_pages-1] != io_page))
 		goto submit_and_retry;
-	if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
-		io_end->flag |= EXT4_IO_END_UNWRITTEN;
-		atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
-	}
+	if (buffer_uninit(bh))
+		ext4_set_io_unwritten_flag(inode, io_end);
 	io->io_end->size += bh->b_size;
 	io->io_next_block++;
 	ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 707d3f1..996780a 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -875,7 +875,7 @@
 	ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
 	ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
 	ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
-	ext4_free_blks_set(sb, gdp, input->free_blocks_count);
+	ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
 	ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
 	gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
@@ -937,8 +937,8 @@
 		input->reserved_blocks);
 
 	/* Update the free space counts */
-	percpu_counter_add(&sbi->s_freeblocks_counter,
-			   input->free_blocks_count);
+	percpu_counter_add(&sbi->s_freeclusters_counter,
+			   EXT4_B2C(sbi, input->free_blocks_count));
 	percpu_counter_add(&sbi->s_freeinodes_counter,
 			   EXT4_INODES_PER_GROUP(sb));
 
@@ -946,8 +946,8 @@
 	    sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group;
 		flex_group = ext4_flex_group(sbi, input->group);
-		atomic_add(input->free_blocks_count,
-			   &sbi->s_flex_groups[flex_group].free_blocks);
+		atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
+			   &sbi->s_flex_groups[flex_group].free_clusters);
 		atomic_add(EXT4_INODES_PER_GROUP(sb),
 			   &sbi->s_flex_groups[flex_group].free_inodes);
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 44d0c8d..9953d80 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -45,6 +45,7 @@
 #include <linux/freezer.h>
 
 #include "ext4.h"
+#include "ext4_extents.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -163,8 +164,8 @@
 		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 
-__u32 ext4_free_blks_count(struct super_block *sb,
-			      struct ext4_group_desc *bg)
+__u32 ext4_free_group_clusters(struct super_block *sb,
+			       struct ext4_group_desc *bg)
 {
 	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
@@ -219,8 +220,8 @@
 		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 }
 
-void ext4_free_blks_set(struct super_block *sb,
-			  struct ext4_group_desc *bg, __u32 count)
+void ext4_free_group_clusters_set(struct super_block *sb,
+				  struct ext4_group_desc *bg, __u32 count)
 {
 	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
@@ -414,6 +415,22 @@
 	ext4_commit_super(sb, 1);
 }
 
+/*
+ * The del_gendisk() function uninitializes the disk-specific data
+ * structures, including the bdi structure, without telling anyone
+ * else.  Once this happens, any attempt to call mark_buffer_dirty()
+ * (for example, by ext4_commit_super), will cause a kernel OOPS.
+ * This is a kludge to prevent these oops until we can put in a proper
+ * hook in del_gendisk() to inform the VFS and file system layers.
+ */
+static int block_device_ejected(struct super_block *sb)
+{
+	struct inode *bd_inode = sb->s_bdev->bd_inode;
+	struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
+
+	return bdi->dev == NULL;
+}
+
 
 /* Deal with the reporting of failure conditions on a filesystem such as
  * inconsistencies detected or read IO failures.
@@ -821,10 +838,10 @@
 		brelse(sbi->s_group_desc[i]);
 	ext4_kvfree(sbi->s_group_desc);
 	ext4_kvfree(sbi->s_flex_groups);
-	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+	percpu_counter_destroy(&sbi->s_freeclusters_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
-	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
+	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 	brelse(sbi->s_sbh);
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
@@ -1057,8 +1074,6 @@
 		seq_puts(seq, ",nouid32");
 	if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
 		seq_puts(seq, ",debug");
-	if (test_opt(sb, OLDALLOC))
-		seq_puts(seq, ",oldalloc");
 #ifdef CONFIG_EXT4_FS_XATTR
 	if (test_opt(sb, XATTR_USER))
 		seq_puts(seq, ",user_xattr");
@@ -1567,10 +1582,12 @@
 			set_opt(sb, DEBUG);
 			break;
 		case Opt_oldalloc:
-			set_opt(sb, OLDALLOC);
+			ext4_msg(sb, KERN_WARNING,
+				 "Ignoring deprecated oldalloc option");
 			break;
 		case Opt_orlov:
-			clear_opt(sb, OLDALLOC);
+			ext4_msg(sb, KERN_WARNING,
+				 "Ignoring deprecated orlov option");
 			break;
 #ifdef CONFIG_EXT4_FS_XATTR
 		case Opt_user_xattr:
@@ -1801,6 +1818,7 @@
 			break;
 		case Opt_nodelalloc:
 			clear_opt(sb, DELALLOC);
+			clear_opt2(sb, EXPLICIT_DELALLOC);
 			break;
 		case Opt_mblk_io_submit:
 			set_opt(sb, MBLK_IO_SUBMIT);
@@ -1817,6 +1835,7 @@
 			break;
 		case Opt_delalloc:
 			set_opt(sb, DELALLOC);
+			set_opt2(sb, EXPLICIT_DELALLOC);
 			break;
 		case Opt_block_validity:
 			set_opt(sb, BLOCK_VALIDITY);
@@ -1935,7 +1954,7 @@
 		res = MS_RDONLY;
 	}
 	if (read_only)
-		return res;
+		goto done;
 	if (!(sbi->s_mount_state & EXT4_VALID_FS))
 		ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
 			 "running e2fsck is recommended");
@@ -1966,6 +1985,7 @@
 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 
 	ext4_commit_super(sb, 1);
+done:
 	if (test_opt(sb, DEBUG))
 		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
 				"bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
@@ -2015,8 +2035,8 @@
 		flex_group = ext4_flex_group(sbi, i);
 		atomic_add(ext4_free_inodes_count(sb, gdp),
 			   &sbi->s_flex_groups[flex_group].free_inodes);
-		atomic_add(ext4_free_blks_count(sb, gdp),
-			   &sbi->s_flex_groups[flex_group].free_blocks);
+		atomic_add(ext4_free_group_clusters(sb, gdp),
+			   &sbi->s_flex_groups[flex_group].free_clusters);
 		atomic_add(ext4_used_dirs_count(sb, gdp),
 			   &sbi->s_flex_groups[flex_group].used_dirs);
 	}
@@ -2134,7 +2154,8 @@
 	if (NULL != first_not_zeroed)
 		*first_not_zeroed = grp;
 
-	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
+	ext4_free_blocks_count_set(sbi->s_es,
+				   EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
 	sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
 	return 1;
 }
@@ -2454,7 +2475,8 @@
 					      char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%llu\n",
-			(s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+		(s64) EXT4_C2B(sbi,
+			percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
 }
 
 static ssize_t session_write_kbytes_show(struct ext4_attr *a,
@@ -2682,6 +2704,13 @@
 			return 0;
 		}
 	}
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
+	    !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+		ext4_msg(sb, KERN_ERR,
+			 "Can't support bigalloc feature without "
+			 "extents feature\n");
+		return 0;
+	}
 	return 1;
 }
 
@@ -3087,10 +3116,10 @@
 	char *cp;
 	const char *descr;
 	int ret = -ENOMEM;
-	int blocksize;
+	int blocksize, clustersize;
 	unsigned int db_count;
 	unsigned int i;
-	int needs_recovery, has_huge_files;
+	int needs_recovery, has_huge_files, has_bigalloc;
 	__u64 blocks_count;
 	int err;
 	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@@ -3224,6 +3253,33 @@
 			   &journal_ioprio, NULL, 0))
 		goto failed_mount;
 
+	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+		printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
+			    "with data=journal disables delayed "
+			    "allocation and O_DIRECT support!\n");
+		if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+			ext4_msg(sb, KERN_ERR, "can't mount with "
+				 "both data=journal and delalloc");
+			goto failed_mount;
+		}
+		if (test_opt(sb, DIOREAD_NOLOCK)) {
+			ext4_msg(sb, KERN_ERR, "can't mount with "
+				 "both data=journal and delalloc");
+			goto failed_mount;
+		}
+		if (test_opt(sb, DELALLOC))
+			clear_opt(sb, DELALLOC);
+	}
+
+	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+	if (test_opt(sb, DIOREAD_NOLOCK)) {
+		if (blocksize < PAGE_SIZE) {
+			ext4_msg(sb, KERN_ERR, "can't mount with "
+				 "dioread_nolock if block size != PAGE_SIZE");
+			goto failed_mount;
+		}
+	}
+
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
 		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
 
@@ -3265,8 +3321,6 @@
 	if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
 		goto failed_mount;
 
-	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
-
 	if (blocksize < EXT4_MIN_BLOCK_SIZE ||
 	    blocksize > EXT4_MAX_BLOCK_SIZE) {
 		ext4_msg(sb, KERN_ERR,
@@ -3369,12 +3423,53 @@
 		sb->s_dirt = 1;
 	}
 
-	if (sbi->s_blocks_per_group > blocksize * 8) {
-		ext4_msg(sb, KERN_ERR,
-		       "#blocks per group too big: %lu",
-		       sbi->s_blocks_per_group);
-		goto failed_mount;
+	/* Handle clustersize */
+	clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
+	has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
+				EXT4_FEATURE_RO_COMPAT_BIGALLOC);
+	if (has_bigalloc) {
+		if (clustersize < blocksize) {
+			ext4_msg(sb, KERN_ERR,
+				 "cluster size (%d) smaller than "
+				 "block size (%d)", clustersize, blocksize);
+			goto failed_mount;
+		}
+		sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
+			le32_to_cpu(es->s_log_block_size);
+		sbi->s_clusters_per_group =
+			le32_to_cpu(es->s_clusters_per_group);
+		if (sbi->s_clusters_per_group > blocksize * 8) {
+			ext4_msg(sb, KERN_ERR,
+				 "#clusters per group too big: %lu",
+				 sbi->s_clusters_per_group);
+			goto failed_mount;
+		}
+		if (sbi->s_blocks_per_group !=
+		    (sbi->s_clusters_per_group * (clustersize / blocksize))) {
+			ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
+				 "clusters per group (%lu) inconsistent",
+				 sbi->s_blocks_per_group,
+				 sbi->s_clusters_per_group);
+			goto failed_mount;
+		}
+	} else {
+		if (clustersize != blocksize) {
+			ext4_warning(sb, "fragment/cluster size (%d) != "
+				     "block size (%d)", clustersize,
+				     blocksize);
+			clustersize = blocksize;
+		}
+		if (sbi->s_blocks_per_group > blocksize * 8) {
+			ext4_msg(sb, KERN_ERR,
+				 "#blocks per group too big: %lu",
+				 sbi->s_blocks_per_group);
+			goto failed_mount;
+		}
+		sbi->s_clusters_per_group = sbi->s_blocks_per_group;
+		sbi->s_cluster_bits = 0;
 	}
+	sbi->s_cluster_ratio = clustersize / blocksize;
+
 	if (sbi->s_inodes_per_group > blocksize * 8) {
 		ext4_msg(sb, KERN_ERR,
 		       "#inodes per group too big: %lu",
@@ -3446,10 +3541,8 @@
 		goto failed_mount;
 	}
 
-#ifdef CONFIG_PROC_FS
 	if (ext4_proc_root)
 		sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
-#endif
 
 	bgl_lock_init(sbi->s_blockgroup_lock);
 
@@ -3483,8 +3576,8 @@
 	sbi->s_err_report.function = print_daily_error_info;
 	sbi->s_err_report.data = (unsigned long) sb;
 
-	err = percpu_counter_init(&sbi->s_freeblocks_counter,
-			ext4_count_free_blocks(sb));
+	err = percpu_counter_init(&sbi->s_freeclusters_counter,
+			ext4_count_free_clusters(sb));
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_freeinodes_counter,
 				ext4_count_free_inodes(sb));
@@ -3494,7 +3587,7 @@
 				ext4_count_dirs(sb));
 	}
 	if (!err) {
-		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
 	}
 	if (err) {
 		ext4_msg(sb, KERN_ERR, "insufficient memory");
@@ -3609,13 +3702,13 @@
 	 * The journal may have updated the bg summary counts, so we
 	 * need to update the global counters.
 	 */
-	percpu_counter_set(&sbi->s_freeblocks_counter,
-			   ext4_count_free_blocks(sb));
+	percpu_counter_set(&sbi->s_freeclusters_counter,
+			   ext4_count_free_clusters(sb));
 	percpu_counter_set(&sbi->s_freeinodes_counter,
 			   ext4_count_free_inodes(sb));
 	percpu_counter_set(&sbi->s_dirs_counter,
 			   ext4_count_dirs(sb));
-	percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
+	percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
 
 no_journal:
 	/*
@@ -3679,25 +3772,6 @@
 			 "available");
 	}
 
-	if (test_opt(sb, DELALLOC) &&
-	    (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
-		ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
-			 "requested data journaling mode");
-		clear_opt(sb, DELALLOC);
-	}
-	if (test_opt(sb, DIOREAD_NOLOCK)) {
-		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
-			ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
-				"option - requested data journaling mode");
-			clear_opt(sb, DIOREAD_NOLOCK);
-		}
-		if (sb->s_blocksize < PAGE_SIZE) {
-			ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
-				"option - block size is too small");
-			clear_opt(sb, DIOREAD_NOLOCK);
-		}
-	}
-
 	err = ext4_setup_system_zone(sb);
 	if (err) {
 		ext4_msg(sb, KERN_ERR, "failed to initialize system "
@@ -3710,22 +3784,19 @@
 	if (err) {
 		ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
 			 err);
-		goto failed_mount4;
+		goto failed_mount5;
 	}
 
 	err = ext4_register_li_request(sb, first_not_zeroed);
 	if (err)
-		goto failed_mount4;
+		goto failed_mount6;
 
 	sbi->s_kobj.kset = ext4_kset;
 	init_completion(&sbi->s_kobj_unregister);
 	err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
 				   "%s", sb->s_id);
-	if (err) {
-		ext4_mb_release(sb);
-		ext4_ext_release(sb);
-		goto failed_mount4;
-	};
+	if (err)
+		goto failed_mount7;
 
 	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
 	ext4_orphan_cleanup(sb, es);
@@ -3759,13 +3830,19 @@
 		ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
 	goto failed_mount;
 
+failed_mount7:
+	ext4_unregister_li_request(sb);
+failed_mount6:
+	ext4_ext_release(sb);
+failed_mount5:
+	ext4_mb_release(sb);
+	ext4_release_system_zone(sb);
 failed_mount4:
 	iput(root);
 	sb->s_root = NULL;
 	ext4_msg(sb, KERN_ERR, "mount failed");
 	destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
 failed_mount_wq:
-	ext4_release_system_zone(sb);
 	if (sbi->s_journal) {
 		jbd2_journal_destroy(sbi->s_journal);
 		sbi->s_journal = NULL;
@@ -3774,10 +3851,10 @@
 	del_timer(&sbi->s_err_report);
 	if (sbi->s_flex_groups)
 		ext4_kvfree(sbi->s_flex_groups);
-	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+	percpu_counter_destroy(&sbi->s_freeclusters_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
-	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
+	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 	if (sbi->s_mmp_tsk)
 		kthread_stop(sbi->s_mmp_tsk);
 failed_mount2:
@@ -4064,7 +4141,7 @@
 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
 	int error = 0;
 
-	if (!sbh)
+	if (!sbh || block_device_ejected(sb))
 		return error;
 	if (buffer_write_io_error(sbh)) {
 		/*
@@ -4100,8 +4177,9 @@
 	else
 		es->s_kbytes_written =
 			cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
-	ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
-					   &EXT4_SB(sb)->s_freeblocks_counter));
+	ext4_free_blocks_count_set(es,
+			EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
+				&EXT4_SB(sb)->s_freeclusters_counter)));
 	es->s_free_inodes_count =
 		cpu_to_le32(percpu_counter_sum_positive(
 				&EXT4_SB(sb)->s_freeinodes_counter));
@@ -4506,16 +4584,34 @@
 	return err;
 }
 
+/*
+ * Note: calculating the overhead so we can be compatible with
+ * historical BSD practice is quite difficult in the face of
+ * clusters/bigalloc.  This is because multiple metadata blocks from
+ * different block group can end up in the same allocation cluster.
+ * Calculating the exact overhead in the face of clustered allocation
+ * requires either O(all block bitmaps) in memory or O(number of block
+ * groups**2) in time.  We will still calculate the superblock for
+ * older file systems --- and if we come across with a bigalloc file
+ * system with zero in s_overhead_clusters the estimate will be close to
+ * correct especially for very large cluster sizes --- but for newer
+ * file systems, it's better to calculate this figure once at mkfs
+ * time, and store it in the superblock.  If the superblock value is
+ * present (even for non-bigalloc file systems), we will use it.
+ */
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_super_block *es = sbi->s_es;
+	struct ext4_group_desc *gdp;
 	u64 fsid;
 	s64 bfree;
 
 	if (test_opt(sb, MINIX_DF)) {
 		sbi->s_overhead_last = 0;
+	} else if (es->s_overhead_clusters) {
+		sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
 	} else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
 		ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 		ext4_fsblk_t overhead = 0;
@@ -4530,24 +4626,16 @@
 		 * All of the blocks before first_data_block are
 		 * overhead
 		 */
-		overhead = le32_to_cpu(es->s_first_data_block);
+		overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
 
 		/*
-		 * Add the overhead attributed to the superblock and
-		 * block group descriptors.  If the sparse superblocks
-		 * feature is turned on, then not all groups have this.
+		 * Add the overhead found in each block group
 		 */
 		for (i = 0; i < ngroups; i++) {
-			overhead += ext4_bg_has_super(sb, i) +
-				ext4_bg_num_gdb(sb, i);
+			gdp = ext4_get_group_desc(sb, i, NULL);
+			overhead += ext4_num_overhead_clusters(sb, i, gdp);
 			cond_resched();
 		}
-
-		/*
-		 * Every block group has an inode bitmap, a block
-		 * bitmap, and an inode table.
-		 */
-		overhead += ngroups * (2 + sbi->s_itb_per_group);
 		sbi->s_overhead_last = overhead;
 		smp_wmb();
 		sbi->s_blocks_last = ext4_blocks_count(es);
@@ -4555,11 +4643,12 @@
 
 	buf->f_type = EXT4_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
-	bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
-		       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
+	buf->f_blocks = (ext4_blocks_count(es) -
+			 EXT4_C2B(sbi, sbi->s_overhead_last));
+	bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
+		percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
 	/* prevent underflow in case that few free space is available */
-	buf->f_bfree = max_t(s64, bfree, 0);
+	buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
 	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
 	if (buf->f_bfree < ext4_r_blocks_count(es))
 		buf->f_bavail = 0;
@@ -4980,13 +5069,11 @@
 		return err;
 	err = ext4_init_system_zone();
 	if (err)
-		goto out7;
+		goto out6;
 	ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
 	if (!ext4_kset)
-		goto out6;
-	ext4_proc_root = proc_mkdir("fs/ext4", NULL);
-	if (!ext4_proc_root)
 		goto out5;
+	ext4_proc_root = proc_mkdir("fs/ext4", NULL);
 
 	err = ext4_init_feat_adverts();
 	if (err)
@@ -5022,12 +5109,12 @@
 out3:
 	ext4_exit_feat_adverts();
 out4:
-	remove_proc_entry("fs/ext4", NULL);
-out5:
+	if (ext4_proc_root)
+		remove_proc_entry("fs/ext4", NULL);
 	kset_unregister(ext4_kset);
-out6:
+out5:
 	ext4_exit_system_zone();
-out7:
+out6:
 	ext4_exit_pageio();
 	return err;
 }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c757adc..93a00d8 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -820,8 +820,14 @@
 			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 				goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
 
+			/*
+			 * take i_data_sem because we will test
+			 * i_delalloc_reserved_flag in ext4_mb_new_blocks
+			 */
+			down_read((&EXT4_I(inode)->i_data_sem));
 			block = ext4_new_meta_blocks(handle, inode, goal, 0,
 						     NULL, &error);
+			up_read((&EXT4_I(inode)->i_data_sem));
 			if (error)
 				goto cleanup;
 
@@ -985,11 +991,7 @@
 	no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
 	ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
 
-	error = ext4_get_inode_loc(inode, &is.iloc);
-	if (error)
-		goto cleanup;
-
-	error = ext4_journal_get_write_access(handle, is.iloc.bh);
+	error = ext4_reserve_inode_write(handle, inode, &is.iloc);
 	if (error)
 		goto cleanup;
 
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index d51a9838..dd7bc38 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -16,7 +16,6 @@
 #include <sys/vfs.h>
 #include "hostfs.h"
 #include "os.h"
-#include "user.h"
 #include <utime.h>
 
 static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 9fe061f..fea8dd6 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1135,6 +1135,14 @@
 		goto out;
 	}
 
+	if (be32_to_cpu(sb->s_first) == 0 ||
+	    be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
+		printk(KERN_WARNING
+			"JBD: Invalid start block of journal: %u\n",
+			be32_to_cpu(sb->s_first));
+		goto out;
+	}
+
 	return 0;
 
 out:
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index eef6979..68d704d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -352,7 +352,7 @@
 	J_ASSERT(commit_transaction->t_state == T_RUNNING);
 
 	trace_jbd2_start_commit(journal, commit_transaction);
-	jbd_debug(1, "JBD: starting commit of transaction %d\n",
+	jbd_debug(1, "JBD2: starting commit of transaction %d\n",
 			commit_transaction->t_tid);
 
 	write_lock(&journal->j_state_lock);
@@ -427,7 +427,7 @@
 	__jbd2_journal_clean_checkpoint_list(journal);
 	spin_unlock(&journal->j_list_lock);
 
-	jbd_debug (3, "JBD: commit phase 1\n");
+	jbd_debug(3, "JBD2: commit phase 1\n");
 
 	/*
 	 * Switch to a new revoke table.
@@ -447,7 +447,7 @@
 	wake_up(&journal->j_wait_transaction_locked);
 	write_unlock(&journal->j_state_lock);
 
-	jbd_debug (3, "JBD: commit phase 2\n");
+	jbd_debug(3, "JBD2: commit phase 2\n");
 
 	/*
 	 * Now start flushing things to disk, in the order they appear
@@ -462,7 +462,7 @@
 					  WRITE_SYNC);
 	blk_finish_plug(&plug);
 
-	jbd_debug(3, "JBD: commit phase 2\n");
+	jbd_debug(3, "JBD2: commit phase 2\n");
 
 	/*
 	 * Way to go: we have now written out all of the data for a
@@ -522,7 +522,7 @@
 
 			J_ASSERT (bufs == 0);
 
-			jbd_debug(4, "JBD: get descriptor\n");
+			jbd_debug(4, "JBD2: get descriptor\n");
 
 			descriptor = jbd2_journal_get_descriptor_buffer(journal);
 			if (!descriptor) {
@@ -531,7 +531,7 @@
 			}
 
 			bh = jh2bh(descriptor);
-			jbd_debug(4, "JBD: got buffer %llu (%p)\n",
+			jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
 				(unsigned long long)bh->b_blocknr, bh->b_data);
 			header = (journal_header_t *)&bh->b_data[0];
 			header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
@@ -625,7 +625,7 @@
 		    commit_transaction->t_buffers == NULL ||
 		    space_left < tag_bytes + 16) {
 
-			jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
+			jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
 
 			/* Write an end-of-descriptor marker before
                            submitting the IOs.  "tag" still points to
@@ -707,7 +707,7 @@
 	   so we incur less scheduling load.
 	*/
 
-	jbd_debug(3, "JBD: commit phase 3\n");
+	jbd_debug(3, "JBD2: commit phase 3\n");
 
 	/*
 	 * akpm: these are BJ_IO, and j_list_lock is not needed.
@@ -771,7 +771,7 @@
 
 	J_ASSERT (commit_transaction->t_shadow_list == NULL);
 
-	jbd_debug(3, "JBD: commit phase 4\n");
+	jbd_debug(3, "JBD2: commit phase 4\n");
 
 	/* Here we wait for the revoke record and descriptor record buffers */
  wait_for_ctlbuf:
@@ -801,7 +801,7 @@
 	if (err)
 		jbd2_journal_abort(journal, err);
 
-	jbd_debug(3, "JBD: commit phase 5\n");
+	jbd_debug(3, "JBD2: commit phase 5\n");
 	write_lock(&journal->j_state_lock);
 	J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
 	commit_transaction->t_state = T_COMMIT_JFLUSH;
@@ -830,7 +830,7 @@
            transaction can be removed from any checkpoint list it was on
            before. */
 
-	jbd_debug(3, "JBD: commit phase 6\n");
+	jbd_debug(3, "JBD2: commit phase 6\n");
 
 	J_ASSERT(list_empty(&commit_transaction->t_inode_list));
 	J_ASSERT(commit_transaction->t_buffers == NULL);
@@ -964,7 +964,7 @@
 
 	/* Done with this transaction! */
 
-	jbd_debug(3, "JBD: commit phase 7\n");
+	jbd_debug(3, "JBD2: commit phase 7\n");
 
 	J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
 
@@ -1039,7 +1039,7 @@
 		journal->j_commit_callback(journal, commit_transaction);
 
 	trace_jbd2_end_commit(journal, commit_transaction);
-	jbd_debug(1, "JBD: commit %d complete, head %d\n",
+	jbd_debug(1, "JBD2: commit %d complete, head %d\n",
 		  journal->j_commit_sequence, journal->j_tail_sequence);
 	if (to_free)
 		kfree(commit_transaction);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f24df13..0fa0123 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -491,7 +491,7 @@
 		 */
 
 		journal->j_commit_request = target;
-		jbd_debug(1, "JBD: requesting commit %d/%d\n",
+		jbd_debug(1, "JBD2: requesting commit %d/%d\n",
 			  journal->j_commit_request,
 			  journal->j_commit_sequence);
 		wake_up(&journal->j_wait_commit);
@@ -500,7 +500,7 @@
 		/* This should never happen, but if it does, preserve
 		   the evidence before kjournald goes into a loop and
 		   increments j_commit_sequence beyond all recognition. */
-		WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
+		WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
 			  journal->j_commit_request,
 			  journal->j_commit_sequence,
 			  target, journal->j_running_transaction ? 
@@ -645,7 +645,7 @@
 	}
 #endif
 	while (tid_gt(tid, journal->j_commit_sequence)) {
-		jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
+		jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
 				  tid, journal->j_commit_sequence);
 		wake_up(&journal->j_wait_commit);
 		read_unlock(&journal->j_state_lock);
@@ -1093,7 +1093,7 @@
 	first = be32_to_cpu(sb->s_first);
 	last = be32_to_cpu(sb->s_maxlen);
 	if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
-		printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n",
+		printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
 		       first, last);
 		journal_fail_superblock(journal);
 		return -EINVAL;
@@ -1139,7 +1139,7 @@
 	 */
 	if (sb->s_start == 0 && journal->j_tail_sequence ==
 				journal->j_transaction_sequence) {
-		jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
+		jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
 			"(start %ld, seq %d, errno %d)\n",
 			journal->j_tail, journal->j_tail_sequence,
 			journal->j_errno);
@@ -1163,7 +1163,7 @@
 	}
 
 	read_lock(&journal->j_state_lock);
-	jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
+	jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n",
 		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
 
 	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1216,8 +1216,8 @@
 		ll_rw_block(READ, 1, &bh);
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
-			printk (KERN_ERR
-				"JBD: IO error reading journal superblock\n");
+			printk(KERN_ERR
+				"JBD2: IO error reading journal superblock\n");
 			goto out;
 		}
 	}
@@ -1228,7 +1228,7 @@
 
 	if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
 	    sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
-		printk(KERN_WARNING "JBD: no valid journal superblock found\n");
+		printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
 		goto out;
 	}
 
@@ -1240,14 +1240,22 @@
 		journal->j_format_version = 2;
 		break;
 	default:
-		printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
+		printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
 		goto out;
 	}
 
 	if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
 		journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
 	else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
-		printk (KERN_WARNING "JBD: journal file too short\n");
+		printk(KERN_WARNING "JBD2: journal file too short\n");
+		goto out;
+	}
+
+	if (be32_to_cpu(sb->s_first) == 0 ||
+	    be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
+		printk(KERN_WARNING
+			"JBD2: Invalid start block of journal: %u\n",
+			be32_to_cpu(sb->s_first));
 		goto out;
 	}
 
@@ -1310,8 +1318,8 @@
 		     ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
 		    (sb->s_feature_incompat &
 		     ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
-			printk (KERN_WARNING
-				"JBD: Unrecognised features on journal\n");
+			printk(KERN_WARNING
+				"JBD2: Unrecognised features on journal\n");
 			return -EINVAL;
 		}
 	}
@@ -1346,7 +1354,7 @@
 	return 0;
 
 recovery_error:
-	printk (KERN_WARNING "JBD: recovery failed\n");
+	printk(KERN_WARNING "JBD2: recovery failed\n");
 	return -EIO;
 }
 
@@ -1577,7 +1585,7 @@
 	struct buffer_head *bh;
 
 	printk(KERN_WARNING
-		"JBD: Converting superblock from version 1 to 2.\n");
+		"JBD2: Converting superblock from version 1 to 2.\n");
 
 	/* Pre-initialise new fields to zero */
 	offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
@@ -1694,7 +1702,7 @@
 	if (!journal->j_tail)
 		goto no_recovery;
 
-	printk (KERN_WARNING "JBD: %s recovery information on journal\n",
+	printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
 		write ? "Clearing" : "Ignoring");
 
 	err = jbd2_journal_skip_recovery(journal);
@@ -2020,7 +2028,7 @@
 	retval = 0;
 	if (!jbd2_journal_head_cache) {
 		retval = -ENOMEM;
-		printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
+		printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
 	}
 	return retval;
 }
@@ -2383,7 +2391,7 @@
 #ifdef CONFIG_JBD2_DEBUG
 	int n = atomic_read(&nr_journal_heads);
 	if (n)
-		printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+		printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
 #endif
 	jbd2_remove_debugfs_entry();
 	jbd2_remove_jbd_stats_proc_entry();
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 1cad869..da6d7ba 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -89,7 +89,7 @@
 		err = jbd2_journal_bmap(journal, next, &blocknr);
 
 		if (err) {
-			printk (KERN_ERR "JBD: bad block at offset %u\n",
+			printk(KERN_ERR "JBD2: bad block at offset %u\n",
 				next);
 			goto failed;
 		}
@@ -138,14 +138,14 @@
 	*bhp = NULL;
 
 	if (offset >= journal->j_maxlen) {
-		printk(KERN_ERR "JBD: corrupted journal superblock\n");
+		printk(KERN_ERR "JBD2: corrupted journal superblock\n");
 		return -EIO;
 	}
 
 	err = jbd2_journal_bmap(journal, offset, &blocknr);
 
 	if (err) {
-		printk (KERN_ERR "JBD: bad block at offset %u\n",
+		printk(KERN_ERR "JBD2: bad block at offset %u\n",
 			offset);
 		return err;
 	}
@@ -163,7 +163,7 @@
 	}
 
 	if (!buffer_uptodate(bh)) {
-		printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
+		printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
 			offset);
 		brelse(bh);
 		return -EIO;
@@ -251,10 +251,10 @@
 	if (!err)
 		err = do_one_pass(journal, &info, PASS_REPLAY);
 
-	jbd_debug(1, "JBD: recovery, exit status %d, "
+	jbd_debug(1, "JBD2: recovery, exit status %d, "
 		  "recovered transactions %u to %u\n",
 		  err, info.start_transaction, info.end_transaction);
-	jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
+	jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
 		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
 
 	/* Restart the log at the next transaction ID, thus invalidating
@@ -293,14 +293,14 @@
 	err = do_one_pass(journal, &info, PASS_SCAN);
 
 	if (err) {
-		printk(KERN_ERR "JBD: error %d scanning journal\n", err);
+		printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
 		++journal->j_transaction_sequence;
 	} else {
 #ifdef CONFIG_JBD2_DEBUG
 		int dropped = info.end_transaction - 
 			be32_to_cpu(journal->j_superblock->s_sequence);
 		jbd_debug(1,
-			  "JBD: ignoring %d transaction%s from the journal.\n",
+			  "JBD2: ignoring %d transaction%s from the journal.\n",
 			  dropped, (dropped == 1) ? "" : "s");
 #endif
 		journal->j_transaction_sequence = ++info.end_transaction;
@@ -338,7 +338,7 @@
 		wrap(journal, *next_log_block);
 		err = jread(&obh, journal, io_block);
 		if (err) {
-			printk(KERN_ERR "JBD: IO error %d recovering block "
+			printk(KERN_ERR "JBD2: IO error %d recovering block "
 				"%lu in log\n", err, io_block);
 			return 1;
 		} else {
@@ -411,7 +411,7 @@
 		 * either the next descriptor block or the final commit
 		 * record. */
 
-		jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
+		jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
 		err = jread(&bh, journal, next_log_block);
 		if (err)
 			goto failed;
@@ -491,8 +491,8 @@
 					/* Recover what we can, but
 					 * report failure at the end. */
 					success = err;
-					printk (KERN_ERR
-						"JBD: IO error %d recovering "
+					printk(KERN_ERR
+						"JBD2: IO error %d recovering "
 						"block %ld in log\n",
 						err, io_block);
 				} else {
@@ -520,7 +520,7 @@
 							journal->j_blocksize);
 					if (nbh == NULL) {
 						printk(KERN_ERR
-						       "JBD: Out of memory "
+						       "JBD2: Out of memory "
 						       "during recovery.\n");
 						err = -ENOMEM;
 						brelse(bh);
@@ -689,7 +689,7 @@
 		/* It's really bad news if different passes end up at
 		 * different places (but possible due to IO errors). */
 		if (info->end_transaction != next_commit_ID) {
-			printk (KERN_ERR "JBD: recovery pass %d ended at "
+			printk(KERN_ERR "JBD2: recovery pass %d ended at "
 				"transaction %u, expected %u\n",
 				pass, next_commit_ID, info->end_transaction);
 			if (!success)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 2d71094..a0e41a4 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -27,6 +27,7 @@
 #include <linux/highmem.h>
 #include <linux/hrtimer.h>
 #include <linux/backing-dev.h>
+#include <linux/bug.h>
 #include <linux/module.h>
 
 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
@@ -115,7 +116,7 @@
  */
 
 static int start_this_handle(journal_t *journal, handle_t *handle,
-			     int gfp_mask)
+			     gfp_t gfp_mask)
 {
 	transaction_t	*transaction, *new_transaction = NULL;
 	tid_t		tid;
@@ -124,7 +125,7 @@
 	unsigned long ts = jiffies;
 
 	if (nblocks > journal->j_max_transaction_buffers) {
-		printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+		printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
 		       current->comm, nblocks,
 		       journal->j_max_transaction_buffers);
 		return -ENOSPC;
@@ -320,7 +321,7 @@
  * Return a pointer to a newly allocated handle, or an ERR_PTR() value
  * on failure.
  */
-handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
+handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
 {
 	handle_t *handle = journal_current_handle();
 	int err;
@@ -443,7 +444,7 @@
  * transaction capabable of guaranteeing the requested number of
  * credits.
  */
-int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
+int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -563,7 +564,7 @@
 	char b[BDEVNAME_SIZE];
 
 	printk(KERN_WARNING
-	       "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
+	       "JBD2: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
 	       "There's a risk of filesystem corruption in case of system "
 	       "crash.\n",
 	       bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
@@ -1049,6 +1050,10 @@
  * mark dirty metadata which needs to be journaled as part of the current
  * transaction.
  *
+ * The buffer must have previously had jbd2_journal_get_write_access()
+ * called so that it has a valid journal_head attached to the buffer
+ * head.
+ *
  * The buffer is placed on the transaction's metadata list and is marked
  * as belonging to the transaction.
  *
@@ -1065,11 +1070,16 @@
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
 	struct journal_head *jh = bh2jh(bh);
+	int ret = 0;
 
 	jbd_debug(5, "journal_head %p\n", jh);
 	JBUFFER_TRACE(jh, "entry");
 	if (is_handle_aborted(handle))
 		goto out;
+	if (!buffer_jbd(bh)) {
+		ret = -EUCLEAN;
+		goto out;
+	}
 
 	jbd_lock_bh_state(bh);
 
@@ -1093,8 +1103,20 @@
 	 */
 	if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
 		JBUFFER_TRACE(jh, "fastpath");
-		J_ASSERT_JH(jh, jh->b_transaction ==
-					journal->j_running_transaction);
+		if (unlikely(jh->b_transaction !=
+			     journal->j_running_transaction)) {
+			printk(KERN_EMERG "JBD: %s: "
+			       "jh->b_transaction (%llu, %p, %u) != "
+			       "journal->j_running_transaction (%p, %u)",
+			       journal->j_devname,
+			       (unsigned long long) bh->b_blocknr,
+			       jh->b_transaction,
+			       jh->b_transaction ? jh->b_transaction->t_tid : 0,
+			       journal->j_running_transaction,
+			       journal->j_running_transaction ?
+			       journal->j_running_transaction->t_tid : 0);
+			ret = -EINVAL;
+		}
 		goto out_unlock_bh;
 	}
 
@@ -1108,9 +1130,32 @@
 	 */
 	if (jh->b_transaction != transaction) {
 		JBUFFER_TRACE(jh, "already on other transaction");
-		J_ASSERT_JH(jh, jh->b_transaction ==
-					journal->j_committing_transaction);
-		J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
+		if (unlikely(jh->b_transaction !=
+			     journal->j_committing_transaction)) {
+			printk(KERN_EMERG "JBD: %s: "
+			       "jh->b_transaction (%llu, %p, %u) != "
+			       "journal->j_committing_transaction (%p, %u)",
+			       journal->j_devname,
+			       (unsigned long long) bh->b_blocknr,
+			       jh->b_transaction,
+			       jh->b_transaction ? jh->b_transaction->t_tid : 0,
+			       journal->j_committing_transaction,
+			       journal->j_committing_transaction ?
+			       journal->j_committing_transaction->t_tid : 0);
+			ret = -EINVAL;
+		}
+		if (unlikely(jh->b_next_transaction != transaction)) {
+			printk(KERN_EMERG "JBD: %s: "
+			       "jh->b_next_transaction (%llu, %p, %u) != "
+			       "transaction (%p, %u)",
+			       journal->j_devname,
+			       (unsigned long long) bh->b_blocknr,
+			       jh->b_next_transaction,
+			       jh->b_next_transaction ?
+			       jh->b_next_transaction->t_tid : 0,
+			       transaction, transaction->t_tid);
+			ret = -EINVAL;
+		}
 		/* And this case is illegal: we can't reuse another
 		 * transaction's data buffer, ever. */
 		goto out_unlock_bh;
@@ -1127,7 +1172,8 @@
 	jbd_unlock_bh_state(bh);
 out:
 	JBUFFER_TRACE(jh, "exit");
-	return 0;
+	WARN_ON(ret);	/* All errors are bugs, so dump the stack */
+	return ret;
 }
 
 /*
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 10b6be3..aae0edb 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -363,12 +363,15 @@
 	}
 
 	sb = quotactl_block(special);
-	if (IS_ERR(sb))
-		return PTR_ERR(sb);
+	if (IS_ERR(sb)) {
+		ret = PTR_ERR(sb);
+		goto out;
+	}
 
 	ret = do_quotactl(sb, type, cmds, id, addr, pathp);
 
 	drop_super(sb);
+out:
 	if (pathp && !IS_ERR(pathp))
 		path_put(pathp);
 	return ret;
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 95518a9..987585b 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -59,8 +59,8 @@
 	int nr_groups = bitmap->s_nr_groups;
 
 	if (block_group >= nr_groups) {
-		udf_debug("block_group (%d) > nr_groups (%d)\n", block_group,
-			  nr_groups);
+		udf_debug("block_group (%d) > nr_groups (%d)\n",
+			  block_group, nr_groups);
 	}
 
 	if (bitmap->s_block_bitmap[block_group]) {
@@ -126,8 +126,9 @@
 	if (bloc->logicalBlockNum + count < count ||
 	    (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
 		udf_debug("%d < %d || %d + %d > %d\n",
-			  bloc->logicalBlockNum, 0, bloc->logicalBlockNum,
-			  count, partmap->s_partition_len);
+			  bloc->logicalBlockNum, 0,
+			  bloc->logicalBlockNum, count,
+			  partmap->s_partition_len);
 		goto error_return;
 	}
 
@@ -155,7 +156,7 @@
 			if (udf_set_bit(bit + i, bh->b_data)) {
 				udf_debug("bit %ld already set\n", bit + i);
 				udf_debug("byte=%2x\n",
-					((char *)bh->b_data)[(bit + i) >> 3]);
+					  ((char *)bh->b_data)[(bit + i) >> 3]);
 			}
 		}
 		udf_add_free_space(sb, sbi->s_partition, count);
@@ -369,7 +370,8 @@
 	if (bloc->logicalBlockNum + count < count ||
 	    (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
 		udf_debug("%d < %d || %d + %d > %d\n",
-			  bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count,
+			  bloc->logicalBlockNum, 0,
+			  bloc->logicalBlockNum, count,
 			  partmap->s_partition_len);
 		goto error_return;
 	}
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 2ffdb67..3e44f57 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -162,8 +162,8 @@
 	int padlen;
 
 	if ((!buffer) || (!offset)) {
-		udf_debug("invalidparms\n, buffer=%p, offset=%p\n", buffer,
-			  offset);
+		udf_debug("invalidparms, buffer=%p, offset=%p\n",
+			  buffer, offset);
 		return NULL;
 	}
 
@@ -201,7 +201,7 @@
 	struct short_ad *sa;
 
 	if ((!ptr) || (!offset)) {
-		printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n");
+		pr_err("%s: invalidparms\n", __func__);
 		return NULL;
 	}
 
@@ -223,7 +223,7 @@
 	struct long_ad *la;
 
 	if ((!ptr) || (!offset)) {
-		printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n");
+		pr_err("%s: invalidparms\n", __func__);
 		return NULL;
 	}
 
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 6e73f1d..4fd1d80 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -37,6 +37,7 @@
 #include <linux/writeback.h>
 #include <linux/slab.h>
 #include <linux/crc-itu-t.h>
+#include <linux/mpage.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
@@ -83,12 +84,10 @@
 	end_writeback(inode);
 	if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
 	    inode->i_size != iinfo->i_lenExtents) {
-		printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has "
-			"inode size %llu different from extent length %llu. "
-			"Filesystem need not be standards compliant.\n",
-			inode->i_sb->s_id, inode->i_ino, inode->i_mode,
-			(unsigned long long)inode->i_size,
-			(unsigned long long)iinfo->i_lenExtents);
+		udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
+			 inode->i_ino, inode->i_mode,
+			 (unsigned long long)inode->i_size,
+			 (unsigned long long)iinfo->i_lenExtents);
 	}
 	kfree(iinfo->i_ext.i_data);
 	iinfo->i_ext.i_data = NULL;
@@ -104,7 +103,13 @@
 
 static int udf_readpage(struct file *file, struct page *page)
 {
-	return block_read_full_page(page, udf_get_block);
+	return mpage_readpage(page, udf_get_block);
+}
+
+static int udf_readpages(struct file *file, struct address_space *mapping,
+			struct list_head *pages, unsigned nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, udf_get_block);
 }
 
 static int udf_write_begin(struct file *file, struct address_space *mapping,
@@ -139,6 +144,7 @@
 
 const struct address_space_operations udf_aops = {
 	.readpage	= udf_readpage,
+	.readpages	= udf_readpages,
 	.writepage	= udf_writepage,
 	.write_begin		= udf_write_begin,
 	.write_end		= generic_write_end,
@@ -1169,16 +1175,15 @@
 	 */
 	bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident);
 	if (!bh) {
-		printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n",
-		       inode->i_ino);
+		udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino);
 		make_bad_inode(inode);
 		return;
 	}
 
 	if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE &&
 	    ident != TAG_IDENT_USE) {
-		printk(KERN_ERR "udf: udf_read_inode(ino %ld) "
-				"failed ident=%d\n", inode->i_ino, ident);
+		udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n",
+			inode->i_ino, ident);
 		brelse(bh);
 		make_bad_inode(inode);
 		return;
@@ -1218,8 +1223,8 @@
 		}
 		brelse(ibh);
 	} else if (fe->icbTag.strategyType != cpu_to_le16(4)) {
-		printk(KERN_ERR "udf: unsupported strategy type: %d\n",
-		       le16_to_cpu(fe->icbTag.strategyType));
+		udf_err(inode->i_sb, "unsupported strategy type: %d\n",
+			le16_to_cpu(fe->icbTag.strategyType));
 		brelse(bh);
 		make_bad_inode(inode);
 		return;
@@ -1415,9 +1420,8 @@
 		udf_debug("METADATA BITMAP FILE-----\n");
 		break;
 	default:
-		printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown "
-				"file type=%d\n", inode->i_ino,
-				fe->icbTag.fileType);
+		udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n",
+			inode->i_ino, fe->icbTag.fileType);
 		make_bad_inode(inode);
 		return;
 	}
@@ -1440,8 +1444,8 @@
 	iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL);
 
 	if (!iinfo->i_ext.i_data) {
-		printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) "
-				"no free memory\n", inode->i_ino);
+		udf_err(inode->i_sb, "(ino %ld) no free memory\n",
+			inode->i_ino);
 		return -ENOMEM;
 	}
 
@@ -1691,9 +1695,8 @@
 	if (do_sync) {
 		sync_dirty_buffer(bh);
 		if (buffer_write_io_error(bh)) {
-			printk(KERN_WARNING "IO error syncing udf inode "
-				"[%s:%08lx]\n", inode->i_sb->s_id,
-				inode->i_ino);
+			udf_warn(inode->i_sb, "IO error syncing udf inode [%08lx]\n",
+				 inode->i_ino);
 			err = -EIO;
 		}
 	}
@@ -1984,8 +1987,7 @@
 		*elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK;
 		break;
 	default:
-		udf_debug("alloc_type = %d unsupported\n",
-				iinfo->i_alloc_type);
+		udf_debug("alloc_type = %d unsupported\n", iinfo->i_alloc_type);
 		return -1;
 	}
 
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 43e24a3..6583fe9 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -38,7 +38,7 @@
 
 	if (i == 0) {
 		udf_debug("XA disk: %s, vol_desc_start=%d\n",
-			  (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba);
+			  ms_info.xa_flag ? "yes" : "no", ms_info.addr.lba);
 		if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
 			vol_desc_start = ms_info.addr.lba;
 	} else {
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 9215700..c175b4d 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -204,6 +204,7 @@
 {
 	struct tag *tag_p;
 	struct buffer_head *bh = NULL;
+	u8 checksum;
 
 	/* Read the block */
 	if (block == 0xFFFFFFFF)
@@ -211,8 +212,8 @@
 
 	bh = udf_tread(sb, block);
 	if (!bh) {
-		udf_debug("block=%d, location=%d: read failed\n",
-			  block, location);
+		udf_err(sb, "read failed, block=%u, location=%d\n",
+			block, location);
 		return NULL;
 	}
 
@@ -227,16 +228,18 @@
 	}
 
 	/* Verify the tag checksum */
-	if (udf_tag_checksum(tag_p) != tag_p->tagChecksum) {
-		printk(KERN_ERR "udf: tag checksum failed block %d\n", block);
+	checksum = udf_tag_checksum(tag_p);
+	if (checksum != tag_p->tagChecksum) {
+		udf_err(sb, "tag checksum failed, block %u: 0x%02x != 0x%02x\n",
+			block, checksum, tag_p->tagChecksum);
 		goto error_out;
 	}
 
 	/* Verify the tag version */
 	if (tag_p->descVersion != cpu_to_le16(0x0002U) &&
 	    tag_p->descVersion != cpu_to_le16(0x0003U)) {
-		udf_debug("tag version 0x%04x != 0x0002 || 0x0003 block %d\n",
-			  le16_to_cpu(tag_p->descVersion), block);
+		udf_err(sb, "tag version 0x%04x != 0x0002 || 0x0003, block %u\n",
+			le16_to_cpu(tag_p->descVersion), block);
 		goto error_out;
 	}
 
@@ -248,8 +251,8 @@
 		return bh;
 
 	udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block,
-	    le16_to_cpu(tag_p->descCRC), le16_to_cpu(tag_p->descCRCLength));
-
+		  le16_to_cpu(tag_p->descCRC),
+		  le16_to_cpu(tag_p->descCRCLength));
 error_out:
 	brelse(bh);
 	return NULL;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index f1c64c6..4639e13 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -796,9 +796,8 @@
 	if (retval)
 		goto end_rmdir;
 	if (inode->i_nlink != 2)
-		udf_warning(inode->i_sb, "udf_rmdir",
-			    "empty directory has nlink != 2 (%d)",
-			    inode->i_nlink);
+		udf_warn(inode->i_sb, "empty directory has nlink != 2 (%d)\n",
+			 inode->i_nlink);
 	clear_nlink(inode);
 	inode->i_size = 0;
 	inode_dec_link_count(dir);
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index a71090e..d6caf01 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -33,8 +33,8 @@
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct udf_part_map *map;
 	if (partition >= sbi->s_partitions) {
-		udf_debug("block=%d, partition=%d, offset=%d: "
-			  "invalid partition\n", block, partition, offset);
+		udf_debug("block=%d, partition=%d, offset=%d: invalid partition\n",
+			  block, partition, offset);
 		return 0xFFFFFFFF;
 	}
 	map = &sbi->s_partmaps[partition];
@@ -60,8 +60,8 @@
 	vdata = &map->s_type_specific.s_virtual;
 
 	if (block > vdata->s_num_entries) {
-		udf_debug("Trying to access block beyond end of VAT "
-			  "(%d max %d)\n", block, vdata->s_num_entries);
+		udf_debug("Trying to access block beyond end of VAT (%d max %d)\n",
+			  block, vdata->s_num_entries);
 		return 0xFFFFFFFF;
 	}
 
@@ -321,9 +321,14 @@
 	/* We shouldn't mount such media... */
 	BUG_ON(!inode);
 	retblk = udf_try_read_meta(inode, block, partition, offset);
-	if (retblk == 0xFFFFFFFF) {
-		udf_warning(sb, __func__, "error reading from METADATA, "
-			"trying to read from MIRROR");
+	if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) {
+		udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n");
+		if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) {
+			mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
+				mdata->s_mirror_file_loc, map->s_partition_num);
+			mdata->s_flags |= MF_MIRROR_FE_LOADED;
+		}
+
 		inode = mdata->s_mirror_fe;
 		if (!inode)
 			return 0xFFFFFFFF;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 7b27b06..e185253 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -75,8 +75,6 @@
 
 #define UDF_DEFAULT_BLOCKSIZE 2048
 
-static char error_buf[1024];
-
 /* These are the "meat" - everything else is stuffing */
 static int udf_fill_super(struct super_block *, void *, int);
 static void udf_put_super(struct super_block *);
@@ -92,8 +90,6 @@
 static unsigned int udf_count_free(struct super_block *);
 static int udf_statfs(struct dentry *, struct kstatfs *);
 static int udf_show_options(struct seq_file *, struct vfsmount *);
-static void udf_error(struct super_block *sb, const char *function,
-		      const char *fmt, ...);
 
 struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
 {
@@ -244,9 +240,8 @@
 	sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
 				  GFP_KERNEL);
 	if (!sbi->s_partmaps) {
-		udf_error(sb, __func__,
-			  "Unable to allocate space for %d partition maps",
-			  count);
+		udf_err(sb, "Unable to allocate space for %d partition maps\n",
+			count);
 		sbi->s_partitions = 0;
 		return -ENOMEM;
 	}
@@ -550,8 +545,7 @@
 			uopt->dmode = option & 0777;
 			break;
 		default:
-			printk(KERN_ERR "udf: bad mount option \"%s\" "
-			       "or missing value\n", p);
+			pr_err("bad mount option \"%s\" or missing value\n", p);
 			return 0;
 		}
 	}
@@ -645,20 +639,16 @@
 				udf_debug("ISO9660 Boot Record found\n");
 				break;
 			case 1:
-				udf_debug("ISO9660 Primary Volume Descriptor "
-					  "found\n");
+				udf_debug("ISO9660 Primary Volume Descriptor found\n");
 				break;
 			case 2:
-				udf_debug("ISO9660 Supplementary Volume "
-					  "Descriptor found\n");
+				udf_debug("ISO9660 Supplementary Volume Descriptor found\n");
 				break;
 			case 3:
-				udf_debug("ISO9660 Volume Partition Descriptor "
-					  "found\n");
+				udf_debug("ISO9660 Volume Partition Descriptor found\n");
 				break;
 			case 255:
-				udf_debug("ISO9660 Volume Descriptor Set "
-					  "Terminator found\n");
+				udf_debug("ISO9660 Volume Descriptor Set Terminator found\n");
 				break;
 			default:
 				udf_debug("ISO9660 VRS (%u) found\n",
@@ -809,8 +799,7 @@
 			      pvoldesc->recordingDateAndTime)) {
 #ifdef UDFFS_DEBUG
 		struct timestamp *ts = &pvoldesc->recordingDateAndTime;
-		udf_debug("recording time %04u/%02u/%02u"
-			  " %02u:%02u (%x)\n",
+		udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n",
 			  le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
 			  ts->minute, le16_to_cpu(ts->typeAndTimezone));
 #endif
@@ -821,7 +810,7 @@
 			strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
 				outstr->u_len > 31 ? 31 : outstr->u_len);
 			udf_debug("volIdent[] = '%s'\n",
-					UDF_SB(sb)->s_volume_ident);
+				  UDF_SB(sb)->s_volume_ident);
 		}
 
 	if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128))
@@ -837,64 +826,57 @@
 	return ret;
 }
 
+struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
+					u32 meta_file_loc, u32 partition_num)
+{
+	struct kernel_lb_addr addr;
+	struct inode *metadata_fe;
+
+	addr.logicalBlockNum = meta_file_loc;
+	addr.partitionReferenceNum = partition_num;
+
+	metadata_fe = udf_iget(sb, &addr);
+
+	if (metadata_fe == NULL)
+		udf_warn(sb, "metadata inode efe not found\n");
+	else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) {
+		udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n");
+		iput(metadata_fe);
+		metadata_fe = NULL;
+	}
+
+	return metadata_fe;
+}
+
 static int udf_load_metadata_files(struct super_block *sb, int partition)
 {
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct udf_part_map *map;
 	struct udf_meta_data *mdata;
 	struct kernel_lb_addr addr;
-	int fe_error = 0;
 
 	map = &sbi->s_partmaps[partition];
 	mdata = &map->s_type_specific.s_metadata;
 
 	/* metadata address */
-	addr.logicalBlockNum =  mdata->s_meta_file_loc;
-	addr.partitionReferenceNum = map->s_partition_num;
-
 	udf_debug("Metadata file location: block = %d part = %d\n",
-			  addr.logicalBlockNum, addr.partitionReferenceNum);
+		  mdata->s_meta_file_loc, map->s_partition_num);
 
-	mdata->s_metadata_fe = udf_iget(sb, &addr);
+	mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb,
+		mdata->s_meta_file_loc, map->s_partition_num);
 
 	if (mdata->s_metadata_fe == NULL) {
-		udf_warning(sb, __func__, "metadata inode efe not found, "
-				"will try mirror inode.");
-		fe_error = 1;
-	} else if (UDF_I(mdata->s_metadata_fe)->i_alloc_type !=
-		 ICBTAG_FLAG_AD_SHORT) {
-		udf_warning(sb, __func__, "metadata inode efe does not have "
-			"short allocation descriptors!");
-		fe_error = 1;
-		iput(mdata->s_metadata_fe);
-		mdata->s_metadata_fe = NULL;
-	}
+		/* mirror file entry */
+		udf_debug("Mirror metadata file location: block = %d part = %d\n",
+			  mdata->s_mirror_file_loc, map->s_partition_num);
 
-	/* mirror file entry */
-	addr.logicalBlockNum = mdata->s_mirror_file_loc;
-	addr.partitionReferenceNum = map->s_partition_num;
+		mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
+			mdata->s_mirror_file_loc, map->s_partition_num);
 
-	udf_debug("Mirror metadata file location: block = %d part = %d\n",
-			  addr.logicalBlockNum, addr.partitionReferenceNum);
-
-	mdata->s_mirror_fe = udf_iget(sb, &addr);
-
-	if (mdata->s_mirror_fe == NULL) {
-		if (fe_error) {
-			udf_error(sb, __func__, "mirror inode efe not found "
-			"and metadata inode is missing too, exiting...");
+		if (mdata->s_mirror_fe == NULL) {
+			udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
 			goto error_exit;
-		} else
-			udf_warning(sb, __func__, "mirror inode efe not found,"
-					" but metadata inode is OK");
-	} else if (UDF_I(mdata->s_mirror_fe)->i_alloc_type !=
-		 ICBTAG_FLAG_AD_SHORT) {
-		udf_warning(sb, __func__, "mirror inode efe does not have "
-			"short allocation descriptors!");
-		iput(mdata->s_mirror_fe);
-		mdata->s_mirror_fe = NULL;
-		if (fe_error)
-			goto error_exit;
+		}
 	}
 
 	/*
@@ -907,18 +889,15 @@
 		addr.partitionReferenceNum = map->s_partition_num;
 
 		udf_debug("Bitmap file location: block = %d part = %d\n",
-			addr.logicalBlockNum, addr.partitionReferenceNum);
+			  addr.logicalBlockNum, addr.partitionReferenceNum);
 
 		mdata->s_bitmap_fe = udf_iget(sb, &addr);
 
 		if (mdata->s_bitmap_fe == NULL) {
 			if (sb->s_flags & MS_RDONLY)
-				udf_warning(sb, __func__, "bitmap inode efe "
-					"not found but it's ok since the disc"
-					" is mounted read-only");
+				udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n");
 			else {
-				udf_error(sb, __func__, "bitmap inode efe not "
-					"found and attempted read-write mount");
+				udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n");
 				goto error_exit;
 			}
 		}
@@ -971,9 +950,8 @@
 		bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
 
 	if (bitmap == NULL) {
-		udf_error(sb, __func__,
-			  "Unable to allocate space for bitmap "
-			  "and %d buffer_head pointers", nr_groups);
+		udf_err(sb, "Unable to allocate space for bitmap and %d buffer_head pointers\n",
+			nr_groups);
 		return NULL;
 	}
 
@@ -1003,10 +981,9 @@
 	if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE))
 		map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE;
 
-	udf_debug("Partition (%d type %x) starts at physical %d, "
-		  "block length %d\n", p_index,
-		  map->s_partition_type, map->s_partition_root,
-		  map->s_partition_len);
+	udf_debug("Partition (%d type %x) starts at physical %d, block length %d\n",
+		  p_index, map->s_partition_type,
+		  map->s_partition_root, map->s_partition_len);
 
 	if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) &&
 	    strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03))
@@ -1023,12 +1000,12 @@
 		map->s_uspace.s_table = udf_iget(sb, &loc);
 		if (!map->s_uspace.s_table) {
 			udf_debug("cannot load unallocSpaceTable (part %d)\n",
-					p_index);
+				  p_index);
 			return 1;
 		}
 		map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
 		udf_debug("unallocSpaceTable (part %d) @ %ld\n",
-				p_index, map->s_uspace.s_table->i_ino);
+			  p_index, map->s_uspace.s_table->i_ino);
 	}
 
 	if (phd->unallocSpaceBitmap.extLength) {
@@ -1041,8 +1018,8 @@
 		bitmap->s_extPosition = le32_to_cpu(
 				phd->unallocSpaceBitmap.extPosition);
 		map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP;
-		udf_debug("unallocSpaceBitmap (part %d) @ %d\n", p_index,
-						bitmap->s_extPosition);
+		udf_debug("unallocSpaceBitmap (part %d) @ %d\n",
+			  p_index, bitmap->s_extPosition);
 	}
 
 	if (phd->partitionIntegrityTable.extLength)
@@ -1058,13 +1035,13 @@
 		map->s_fspace.s_table = udf_iget(sb, &loc);
 		if (!map->s_fspace.s_table) {
 			udf_debug("cannot load freedSpaceTable (part %d)\n",
-				p_index);
+				  p_index);
 			return 1;
 		}
 
 		map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
 		udf_debug("freedSpaceTable (part %d) @ %ld\n",
-				p_index, map->s_fspace.s_table->i_ino);
+			  p_index, map->s_fspace.s_table->i_ino);
 	}
 
 	if (phd->freedSpaceBitmap.extLength) {
@@ -1077,8 +1054,8 @@
 		bitmap->s_extPosition = le32_to_cpu(
 				phd->freedSpaceBitmap.extPosition);
 		map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
-		udf_debug("freedSpaceBitmap (part %d) @ %d\n", p_index,
-					bitmap->s_extPosition);
+		udf_debug("freedSpaceBitmap (part %d) @ %d\n",
+			  p_index, bitmap->s_extPosition);
 	}
 	return 0;
 }
@@ -1118,11 +1095,9 @@
 	udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block);
 	if (!sbi->s_vat_inode &&
 	    sbi->s_last_block != blocks - 1) {
-		printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the"
-		       " last recorded block (%lu), retrying with the last "
-		       "block of the device (%lu).\n",
-		       (unsigned long)sbi->s_last_block,
-		       (unsigned long)blocks - 1);
+		pr_notice("Failed to read VAT inode from the last recorded block (%lu), retrying with the last block of the device (%lu).\n",
+			  (unsigned long)sbi->s_last_block,
+			  (unsigned long)blocks - 1);
 		udf_find_vat_block(sb, p_index, type1_index, blocks - 1);
 	}
 	if (!sbi->s_vat_inode)
@@ -1220,8 +1195,8 @@
 	if (map->s_partition_type == UDF_METADATA_MAP25) {
 		ret = udf_load_metadata_files(sb, i);
 		if (ret) {
-			printk(KERN_ERR "UDF-fs: error loading MetaData "
-			"partition map %d\n", i);
+			udf_err(sb, "error loading MetaData partition map %d\n",
+				i);
 			goto out_bh;
 		}
 	} else {
@@ -1234,9 +1209,7 @@
 		 * overwrite blocks instead of relocating them).
 		 */
 		sb->s_flags |= MS_RDONLY;
-		printk(KERN_NOTICE "UDF-fs: Filesystem marked read-only "
-			"because writing to pseudooverwrite partition is "
-			"not implemented.\n");
+		pr_notice("Filesystem marked read-only because writing to pseudooverwrite partition is not implemented\n");
 	}
 out_bh:
 	/* In case loading failed, we handle cleanup in udf_fill_super */
@@ -1344,9 +1317,8 @@
 				struct metadataPartitionMap *mdm =
 						(struct metadataPartitionMap *)
 						&(lvd->partitionMaps[offset]);
-				udf_debug("Parsing Logical vol part %d "
-					"type %d  id=%s\n", i, type,
-					UDF_ID_METADATA);
+				udf_debug("Parsing Logical vol part %d type %d  id=%s\n",
+					  i, type, UDF_ID_METADATA);
 
 				map->s_partition_type = UDF_METADATA_MAP25;
 				map->s_partition_func = udf_get_pblock_meta25;
@@ -1361,25 +1333,24 @@
 					le32_to_cpu(mdm->allocUnitSize);
 				mdata->s_align_unit_size =
 					le16_to_cpu(mdm->alignUnitSize);
-				mdata->s_dup_md_flag 	 =
-					mdm->flags & 0x01;
+				if (mdm->flags & 0x01)
+					mdata->s_flags |= MF_DUPLICATE_MD;
 
 				udf_debug("Metadata Ident suffix=0x%x\n",
-					(le16_to_cpu(
-					 ((__le16 *)
-					      mdm->partIdent.identSuffix)[0])));
+					  le16_to_cpu(*(__le16 *)
+						      mdm->partIdent.identSuffix));
 				udf_debug("Metadata part num=%d\n",
-					le16_to_cpu(mdm->partitionNum));
+					  le16_to_cpu(mdm->partitionNum));
 				udf_debug("Metadata part alloc unit size=%d\n",
-					le32_to_cpu(mdm->allocUnitSize));
+					  le32_to_cpu(mdm->allocUnitSize));
 				udf_debug("Metadata file loc=%d\n",
-					le32_to_cpu(mdm->metadataFileLoc));
+					  le32_to_cpu(mdm->metadataFileLoc));
 				udf_debug("Mirror file loc=%d\n",
-				       le32_to_cpu(mdm->metadataMirrorFileLoc));
+					  le32_to_cpu(mdm->metadataMirrorFileLoc));
 				udf_debug("Bitmap file loc=%d\n",
-				       le32_to_cpu(mdm->metadataBitmapFileLoc));
-				udf_debug("Duplicate Flag: %d %d\n",
-					mdata->s_dup_md_flag, mdm->flags);
+					  le32_to_cpu(mdm->metadataBitmapFileLoc));
+				udf_debug("Flags: %d %d\n",
+					  mdata->s_flags, mdm->flags);
 			} else {
 				udf_debug("Unknown ident: %s\n",
 					  upm2->partIdent.ident);
@@ -1389,16 +1360,15 @@
 			map->s_partition_num = le16_to_cpu(upm2->partitionNum);
 		}
 		udf_debug("Partition (%d:%d) type %d on volume %d\n",
-			  i, map->s_partition_num, type,
-			  map->s_volumeseqnum);
+			  i, map->s_partition_num, type, map->s_volumeseqnum);
 	}
 
 	if (fileset) {
 		struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]);
 
 		*fileset = lelb_to_cpu(la->extLocation);
-		udf_debug("FileSet found in LogicalVolDesc at block=%d, "
-			  "partition=%d\n", fileset->logicalBlockNum,
+		udf_debug("FileSet found in LogicalVolDesc at block=%d, partition=%d\n",
+			  fileset->logicalBlockNum,
 			  fileset->partitionReferenceNum);
 	}
 	if (lvd->integritySeqExt.extLength)
@@ -1478,9 +1448,9 @@
 
 		bh = udf_read_tagged(sb, block, block, &ident);
 		if (!bh) {
-			printk(KERN_ERR "udf: Block %Lu of volume descriptor "
-			       "sequence is corrupted or we could not read "
-			       "it.\n", (unsigned long long)block);
+			udf_err(sb,
+				"Block %llu of volume descriptor sequence is corrupted or we could not read it\n",
+				(unsigned long long)block);
 			return 1;
 		}
 
@@ -1553,7 +1523,7 @@
 	 * in a suitable order
 	 */
 	if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) {
-		printk(KERN_ERR "udf: Primary Volume Descriptor not found!\n");
+		udf_err(sb, "Primary Volume Descriptor not found!\n");
 		return 1;
 	}
 	if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block))
@@ -1740,7 +1710,7 @@
 
 	if (!sb_set_blocksize(sb, uopt->blocksize)) {
 		if (!silent)
-			printk(KERN_WARNING "UDF-fs: Bad block size\n");
+			udf_warn(sb, "Bad block size\n");
 		return 0;
 	}
 	sbi->s_last_block = uopt->lastblock;
@@ -1749,12 +1719,11 @@
 		nsr_off = udf_check_vsd(sb);
 		if (!nsr_off) {
 			if (!silent)
-				printk(KERN_WARNING "UDF-fs: No VRS found\n");
+				udf_warn(sb, "No VRS found\n");
 			return 0;
 		}
 		if (nsr_off == -1)
-			udf_debug("Failed to read byte 32768. Assuming open "
-				  "disc. Skipping validity check\n");
+			udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n");
 		if (!sbi->s_last_block)
 			sbi->s_last_block = udf_get_last_block(sb);
 	} else {
@@ -1765,7 +1734,7 @@
 	sbi->s_anchor = uopt->anchor;
 	if (!udf_find_anchor(sb, fileset)) {
 		if (!silent)
-			printk(KERN_WARNING "UDF-fs: No anchor found\n");
+			udf_warn(sb, "No anchor found\n");
 		return 0;
 	}
 	return 1;
@@ -1937,8 +1906,7 @@
 
 	if (uopt.flags & (1 << UDF_FLAG_UTF8) &&
 	    uopt.flags & (1 << UDF_FLAG_NLS_MAP)) {
-		udf_error(sb, "udf_read_super",
-			  "utf8 cannot be combined with iocharset\n");
+		udf_err(sb, "utf8 cannot be combined with iocharset\n");
 		goto error_out;
 	}
 #ifdef CONFIG_UDF_NLS
@@ -1987,15 +1955,14 @@
 		ret = udf_load_vrs(sb, &uopt, silent, &fileset);
 		if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
 			if (!silent)
-				printk(KERN_NOTICE
-				       "UDF-fs: Rescanning with blocksize "
-				       "%d\n", UDF_DEFAULT_BLOCKSIZE);
+				pr_notice("Rescanning with blocksize %d\n",
+					  UDF_DEFAULT_BLOCKSIZE);
 			uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
 			ret = udf_load_vrs(sb, &uopt, silent, &fileset);
 		}
 	}
 	if (!ret) {
-		printk(KERN_WARNING "UDF-fs: No partition found (1)\n");
+		udf_warn(sb, "No partition found (1)\n");
 		goto error_out;
 	}
 
@@ -2010,10 +1977,9 @@
 				le16_to_cpu(lvidiu->maxUDFWriteRev); */
 
 		if (minUDFReadRev > UDF_MAX_READ_VERSION) {
-			printk(KERN_ERR "UDF-fs: minUDFReadRev=%x "
-					"(max is %x)\n",
-			       le16_to_cpu(lvidiu->minUDFReadRev),
-			       UDF_MAX_READ_VERSION);
+			udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
+				le16_to_cpu(lvidiu->minUDFReadRev),
+				UDF_MAX_READ_VERSION);
 			goto error_out;
 		} else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION)
 			sb->s_flags |= MS_RDONLY;
@@ -2027,28 +1993,27 @@
 	}
 
 	if (!sbi->s_partitions) {
-		printk(KERN_WARNING "UDF-fs: No partition found (2)\n");
+		udf_warn(sb, "No partition found (2)\n");
 		goto error_out;
 	}
 
 	if (sbi->s_partmaps[sbi->s_partition].s_partition_flags &
 			UDF_PART_FLAG_READ_ONLY) {
-		printk(KERN_NOTICE "UDF-fs: Partition marked readonly; "
-				   "forcing readonly mount\n");
+		pr_notice("Partition marked readonly; forcing readonly mount\n");
 		sb->s_flags |= MS_RDONLY;
 	}
 
 	if (udf_find_fileset(sb, &fileset, &rootdir)) {
-		printk(KERN_WARNING "UDF-fs: No fileset found\n");
+		udf_warn(sb, "No fileset found\n");
 		goto error_out;
 	}
 
 	if (!silent) {
 		struct timestamp ts;
 		udf_time_to_disk_stamp(&ts, sbi->s_record_time);
-		udf_info("UDF: Mounting volume '%s', "
-			 "timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
-			 sbi->s_volume_ident, le16_to_cpu(ts.year), ts.month, ts.day,
+		udf_info("Mounting volume '%s', timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
+			 sbi->s_volume_ident,
+			 le16_to_cpu(ts.year), ts.month, ts.day,
 			 ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone));
 	}
 	if (!(sb->s_flags & MS_RDONLY))
@@ -2059,8 +2024,7 @@
 	/* perhaps it's not extensible enough, but for now ... */
 	inode = udf_iget(sb, &rootdir);
 	if (!inode) {
-		printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, "
-				"partition=%d\n",
+		udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n",
 		       rootdir.logicalBlockNum, rootdir.partitionReferenceNum);
 		goto error_out;
 	}
@@ -2068,7 +2032,7 @@
 	/* Allocate a dentry for the root inode */
 	sb->s_root = d_alloc_root(inode);
 	if (!sb->s_root) {
-		printk(KERN_ERR "UDF-fs: Couldn't allocate root dentry\n");
+		udf_err(sb, "Couldn't allocate root dentry\n");
 		iput(inode);
 		goto error_out;
 	}
@@ -2096,32 +2060,40 @@
 	return -EINVAL;
 }
 
-static void udf_error(struct super_block *sb, const char *function,
-		      const char *fmt, ...)
+void _udf_err(struct super_block *sb, const char *function,
+	      const char *fmt, ...)
 {
+	struct va_format vaf;
 	va_list args;
 
-	if (!(sb->s_flags & MS_RDONLY)) {
-		/* mark sb error */
+	/* mark sb error */
+	if (!(sb->s_flags & MS_RDONLY))
 		sb->s_dirt = 1;
-	}
+
 	va_start(args, fmt);
-	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_err("error (device %s): %s: %pV", sb->s_id, function, &vaf);
+
 	va_end(args);
-	printk(KERN_CRIT "UDF-fs error (device %s): %s: %s\n",
-		sb->s_id, function, error_buf);
 }
 
-void udf_warning(struct super_block *sb, const char *function,
-		 const char *fmt, ...)
+void _udf_warn(struct super_block *sb, const char *function,
+	       const char *fmt, ...)
 {
+	struct va_format vaf;
 	va_list args;
 
 	va_start(args, fmt);
-	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_warn("warning (device %s): %s: %pV", sb->s_id, function, &vaf);
+
 	va_end(args);
-	printk(KERN_WARNING "UDF-fs warning (device %s): %s: %s\n",
-	       sb->s_id, function, error_buf);
 }
 
 static void udf_put_super(struct super_block *sb)
@@ -2213,11 +2185,11 @@
 	bh = udf_read_ptagged(sb, &loc, 0, &ident);
 
 	if (!bh) {
-		printk(KERN_ERR "udf: udf_count_free failed\n");
+		udf_err(sb, "udf_count_free failed\n");
 		goto out;
 	} else if (ident != TAG_IDENT_SBD) {
 		brelse(bh);
-		printk(KERN_ERR "udf: udf_count_free failed\n");
+		udf_err(sb, "udf_count_free failed\n");
 		goto out;
 	}
 
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 8424308..4b98fee 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -95,23 +95,21 @@
 		lbcount += elen;
 		if (lbcount > inode->i_size) {
 			if (lbcount - inode->i_size >= inode->i_sb->s_blocksize)
-				printk(KERN_WARNING
-				       "udf_truncate_tail_extent(): Too long "
-				       "extent after EOF in inode %u: i_size: "
-				       "%Ld lbcount: %Ld extent %u+%u\n",
-				       (unsigned)inode->i_ino,
-				       (long long)inode->i_size,
-				       (long long)lbcount,
-				       (unsigned)eloc.logicalBlockNum,
-				       (unsigned)elen);
+				udf_warn(inode->i_sb,
+					 "Too long extent after EOF in inode %u: i_size: %lld lbcount: %lld extent %u+%u\n",
+					 (unsigned)inode->i_ino,
+					 (long long)inode->i_size,
+					 (long long)lbcount,
+					 (unsigned)eloc.logicalBlockNum,
+					 (unsigned)elen);
 			nelen = elen - (lbcount - inode->i_size);
 			epos.offset -= adsize;
 			extent_trunc(inode, &epos, &eloc, etype, elen, nelen);
 			epos.offset += adsize;
 			if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
-				printk(KERN_ERR "udf_truncate_tail_extent(): "
-				       "Extent after EOF in inode %u.\n",
-				       (unsigned)inode->i_ino);
+				udf_err(inode->i_sb,
+					"Extent after EOF in inode %u\n",
+					(unsigned)inode->i_ino);
 			break;
 		}
 	}
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 4858c19..5142a82 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -54,13 +54,16 @@
 
 #pragma pack(1) /* XXX(hch): Why?  This file just defines in-core structures */
 
+#define MF_DUPLICATE_MD		0x01
+#define MF_MIRROR_FE_LOADED	0x02
+
 struct udf_meta_data {
 	__u32	s_meta_file_loc;
 	__u32	s_mirror_file_loc;
 	__u32	s_bitmap_file_loc;
 	__u32	s_alloc_unit_size;
 	__u16	s_align_unit_size;
-	__u8 	s_dup_md_flag;
+	int	s_flags;
 	struct inode *s_metadata_fe;
 	struct inode *s_mirror_fe;
 	struct inode *s_bitmap_fe;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index dc8a8dc..f34e6fc 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -1,6 +1,8 @@
 #ifndef __UDF_DECL_H
 #define __UDF_DECL_H
 
+#define pr_fmt(fmt) "UDF-fs: " fmt
+
 #include "ecma_167.h"
 #include "osta_udf.h"
 
@@ -16,23 +18,30 @@
 #define UDF_PREALLOCATE
 #define UDF_DEFAULT_PREALLOC_BLOCKS	8
 
+extern __printf(3, 4) void _udf_err(struct super_block *sb,
+		const char *function, const char *fmt, ...);
+#define udf_err(sb, fmt, ...)					\
+	_udf_err(sb, __func__, fmt, ##__VA_ARGS__)
+
+extern __printf(3, 4) void _udf_warn(struct super_block *sb,
+		const char *function, const char *fmt, ...);
+#define udf_warn(sb, fmt, ...)					\
+	_udf_warn(sb, __func__, fmt, ##__VA_ARGS__)
+
+#define udf_info(fmt, ...)					\
+	pr_info("INFO " fmt, ##__VA_ARGS__)
+
 #undef UDFFS_DEBUG
 
 #ifdef UDFFS_DEBUG
-#define udf_debug(f, a...) \
-do { \
-	printk(KERN_DEBUG "UDF-fs DEBUG %s:%d:%s: ", \
-		__FILE__, __LINE__, __func__); \
-	printk(f, ##a); \
-} while (0)
+#define udf_debug(fmt, ...)					\
+	printk(KERN_DEBUG pr_fmt("%s:%d:%s: " fmt),		\
+	       __FILE__, __LINE__, __func__, ##__VA_ARGS__)
 #else
-#define udf_debug(f, a...) /**/
+#define udf_debug(fmt, ...)					\
+	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
-#define udf_info(f, a...) \
-	printk(KERN_INFO "UDF-fs INFO " f, ##a);
-
-
 #define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) )
 #define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) )
 
@@ -112,8 +121,6 @@
 
 /* super.c */
 
-extern __printf(3, 4) void udf_warning(struct super_block *, const char *,
-					const char *, ...);
 static inline void udf_updated_lvid(struct super_block *sb)
 {
 	struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh;
@@ -126,6 +133,8 @@
 	UDF_SB(sb)->s_lvid_dirty = 1;
 }
 extern u64 lvid_get_unique_id(struct super_block *sb);
+struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
+					u32 meta_file_loc, u32 partition_num);
 
 /* namei.c */
 extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index b8c828c..1f11483 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -34,9 +34,10 @@
  * http://www.boulder.nist.gov/timefreq/pubs/bulletin/leapsecond.htm
  */
 
+#include "udfdecl.h"
+
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include "udfdecl.h"
 
 #define EPOCH_YEAR 1970
 
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index d03a90b..44b815e 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -114,7 +114,7 @@
 	cmp_id = ocu_i->u_cmpID;
 	if (cmp_id != 8 && cmp_id != 16) {
 		memset(utf_o, 0, sizeof(struct ustr));
-		printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n",
+		pr_err("unknown compression code (%d) stri=%s\n",
 		       cmp_id, ocu_i->u_name);
 		return 0;
 	}
@@ -242,7 +242,7 @@
 	if (utf_cnt) {
 error_out:
 		ocu[++u_len] = '?';
-		printk(KERN_DEBUG "udf: bad UTF-8 character\n");
+		printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
 	}
 
 	ocu[length - 1] = (uint8_t)u_len + 1;
@@ -267,7 +267,7 @@
 	cmp_id = ocu_i->u_cmpID;
 	if (cmp_id != 8 && cmp_id != 16) {
 		memset(utf_o, 0, sizeof(struct ustr));
-		printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n",
+		pr_err("unknown compression code (%d) stri=%s\n",
 		       cmp_id, ocu_i->u_name);
 		return 0;
 	}
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 53792bf..ce1b719 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -197,8 +197,8 @@
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\
-			   EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\
-			   EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\
+			   EXT2_SYNC_FL | EXT2_NODUMP_FL |\
+			   EXT2_NOATIME_FL | EXT2_COMPRBLK_FL |\
 			   EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\
 			   EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL)
 
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 81965cc..dec9911 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -180,8 +180,8 @@
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
-			   EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\
-			   EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\
+			   EXT3_SYNC_FL | EXT3_NODUMP_FL |\
+			   EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
 			   EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
 			   EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
 
@@ -381,7 +381,7 @@
  * Mount flags
  */
 #define EXT3_MOUNT_CHECK		0x00001	/* Do mount-time checks */
-#define EXT3_MOUNT_OLDALLOC		0x00002  /* Don't use the new Orlov allocator */
+/* EXT3_MOUNT_OLDALLOC was there */
 #define EXT3_MOUNT_GRPID		0x00004	/* Create files with directory's group */
 #define EXT3_MOUNT_DEBUG		0x00008	/* Some debugging messages */
 #define EXT3_MOUNT_ERRORS_CONT		0x00010	/* Continue on errors */
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index 258088a..6436525 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -76,10 +76,6 @@
 	struct mutex s_resize_lock;
 	unsigned long s_commit_interval;
 	struct block_device *journal_bdev;
-#ifdef CONFIG_JBD_DEBUG
-	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
-	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
-#endif
 #ifdef CONFIG_QUOTA
 	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
 	int s_jquota_fmt;			/* Format of quota to use */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 23467d7..0c4df26 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -780,12 +780,13 @@
 		unsigned int __i_nlink;
 	};
 	dev_t			i_rdev;
-	loff_t			i_size;
 	struct timespec		i_atime;
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
-	unsigned int		i_blkbits;
+	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
+	unsigned short          i_bytes;
 	blkcnt_t		i_blocks;
+	loff_t			i_size;
 
 #ifdef __NEED_I_SIZE_ORDERED
 	seqcount_t		i_size_seqcount;
@@ -793,7 +794,6 @@
 
 	/* Misc */
 	unsigned long		i_state;
-	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	struct mutex		i_mutex;
 
 	unsigned long		dirtied_when;	/* jiffies of first dirtying */
@@ -807,9 +807,10 @@
 		struct rcu_head		i_rcu;
 	};
 	atomic_t		i_count;
+	unsigned int		i_blkbits;
 	u64			i_version;
-	unsigned short          i_bytes;
 	atomic_t		i_dio_count;
+	atomic_t		i_writecount;
 	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 	struct file_lock	*i_flock;
 	struct address_space	i_data;
@@ -833,7 +834,6 @@
 #ifdef CONFIG_IMA
 	atomic_t		i_readcount; /* struct files open RO */
 #endif
-	atomic_t		i_writecount;
 	void			*i_private; /* fs or device private pointer */
 };
 
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index e6a5e34..c7acdde 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -244,6 +244,7 @@
 
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/jbd_common.h>
 
 #define J_ASSERT(assert)	BUG_ON(!(assert))
 
@@ -270,69 +271,6 @@
 #define J_EXPECT_JH(jh, expr, why...)	__journal_expect(expr, ## why)
 #endif
 
-enum jbd_state_bits {
-	BH_JBD			/* Has an attached ext3 journal_head */
-	  = BH_PrivateStart,
-	BH_JWrite,		/* Being written to log (@@@ DEBUGGING) */
-	BH_Freed,		/* Has been freed (truncated) */
-	BH_Revoked,		/* Has been revoked from the log */
-	BH_RevokeValid,		/* Revoked flag is valid */
-	BH_JBDDirty,		/* Is dirty but journaled */
-	BH_State,		/* Pins most journal_head state */
-	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
-	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
-};
-
-BUFFER_FNS(JBD, jbd)
-BUFFER_FNS(JWrite, jwrite)
-BUFFER_FNS(JBDDirty, jbddirty)
-TAS_BUFFER_FNS(JBDDirty, jbddirty)
-BUFFER_FNS(Revoked, revoked)
-TAS_BUFFER_FNS(Revoked, revoked)
-BUFFER_FNS(RevokeValid, revokevalid)
-TAS_BUFFER_FNS(RevokeValid, revokevalid)
-BUFFER_FNS(Freed, freed)
-
-static inline struct buffer_head *jh2bh(struct journal_head *jh)
-{
-	return jh->b_bh;
-}
-
-static inline struct journal_head *bh2jh(struct buffer_head *bh)
-{
-	return bh->b_private;
-}
-
-static inline void jbd_lock_bh_state(struct buffer_head *bh)
-{
-	bit_spin_lock(BH_State, &bh->b_state);
-}
-
-static inline int jbd_trylock_bh_state(struct buffer_head *bh)
-{
-	return bit_spin_trylock(BH_State, &bh->b_state);
-}
-
-static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
-{
-	return bit_spin_is_locked(BH_State, &bh->b_state);
-}
-
-static inline void jbd_unlock_bh_state(struct buffer_head *bh)
-{
-	bit_spin_unlock(BH_State, &bh->b_state);
-}
-
-static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
-{
-	bit_spin_lock(BH_JournalHead, &bh->b_state);
-}
-
-static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
-{
-	bit_spin_unlock(BH_JournalHead, &bh->b_state);
-}
-
 struct jbd_revoke_table_s;
 
 /**
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 38f307b..2092ea2 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -275,6 +275,7 @@
 
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/jbd_common.h>
 
 #define J_ASSERT(assert)	BUG_ON(!(assert))
 
@@ -302,70 +303,6 @@
 #define J_EXPECT_JH(jh, expr, why...)	__journal_expect(expr, ## why)
 #endif
 
-enum jbd_state_bits {
-	BH_JBD			/* Has an attached ext3 journal_head */
-	  = BH_PrivateStart,
-	BH_JWrite,		/* Being written to log (@@@ DEBUGGING) */
-	BH_Freed,		/* Has been freed (truncated) */
-	BH_Revoked,		/* Has been revoked from the log */
-	BH_RevokeValid,		/* Revoked flag is valid */
-	BH_JBDDirty,		/* Is dirty but journaled */
-	BH_State,		/* Pins most journal_head state */
-	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
-	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
-	BH_JBDPrivateStart,	/* First bit available for private use by FS */
-};
-
-BUFFER_FNS(JBD, jbd)
-BUFFER_FNS(JWrite, jwrite)
-BUFFER_FNS(JBDDirty, jbddirty)
-TAS_BUFFER_FNS(JBDDirty, jbddirty)
-BUFFER_FNS(Revoked, revoked)
-TAS_BUFFER_FNS(Revoked, revoked)
-BUFFER_FNS(RevokeValid, revokevalid)
-TAS_BUFFER_FNS(RevokeValid, revokevalid)
-BUFFER_FNS(Freed, freed)
-
-static inline struct buffer_head *jh2bh(struct journal_head *jh)
-{
-	return jh->b_bh;
-}
-
-static inline struct journal_head *bh2jh(struct buffer_head *bh)
-{
-	return bh->b_private;
-}
-
-static inline void jbd_lock_bh_state(struct buffer_head *bh)
-{
-	bit_spin_lock(BH_State, &bh->b_state);
-}
-
-static inline int jbd_trylock_bh_state(struct buffer_head *bh)
-{
-	return bit_spin_trylock(BH_State, &bh->b_state);
-}
-
-static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
-{
-	return bit_spin_is_locked(BH_State, &bh->b_state);
-}
-
-static inline void jbd_unlock_bh_state(struct buffer_head *bh)
-{
-	bit_spin_unlock(BH_State, &bh->b_state);
-}
-
-static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
-{
-	bit_spin_lock(BH_JournalHead, &bh->b_state);
-}
-
-static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
-{
-	bit_spin_unlock(BH_JournalHead, &bh->b_state);
-}
-
 /* Flags in jbd_inode->i_flags */
 #define __JI_COMMIT_RUNNING 0
 /* Commit of the inode data in progress. We use this flag to protect us from
@@ -1106,9 +1043,9 @@
  */
 
 extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
-extern handle_t *jbd2__journal_start(journal_t *, int nblocks, int gfp_mask);
+extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask);
 extern int	 jbd2_journal_restart(handle_t *, int nblocks);
-extern int	 jbd2__journal_restart(handle_t *, int nblocks, int gfp_mask);
+extern int	 jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask);
 extern int	 jbd2_journal_extend (handle_t *, int nblocks);
 extern int	 jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h
new file mode 100644
index 0000000..6230f85
--- /dev/null
+++ b/include/linux/jbd_common.h
@@ -0,0 +1,68 @@
+#ifndef _LINUX_JBD_STATE_H
+#define _LINUX_JBD_STATE_H
+
+enum jbd_state_bits {
+	BH_JBD			/* Has an attached ext3 journal_head */
+	  = BH_PrivateStart,
+	BH_JWrite,		/* Being written to log (@@@ DEBUGGING) */
+	BH_Freed,		/* Has been freed (truncated) */
+	BH_Revoked,		/* Has been revoked from the log */
+	BH_RevokeValid,		/* Revoked flag is valid */
+	BH_JBDDirty,		/* Is dirty but journaled */
+	BH_State,		/* Pins most journal_head state */
+	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
+	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
+	BH_JBDPrivateStart,	/* First bit available for private use by FS */
+};
+
+BUFFER_FNS(JBD, jbd)
+BUFFER_FNS(JWrite, jwrite)
+BUFFER_FNS(JBDDirty, jbddirty)
+TAS_BUFFER_FNS(JBDDirty, jbddirty)
+BUFFER_FNS(Revoked, revoked)
+TAS_BUFFER_FNS(Revoked, revoked)
+BUFFER_FNS(RevokeValid, revokevalid)
+TAS_BUFFER_FNS(RevokeValid, revokevalid)
+BUFFER_FNS(Freed, freed)
+
+static inline struct buffer_head *jh2bh(struct journal_head *jh)
+{
+	return jh->b_bh;
+}
+
+static inline struct journal_head *bh2jh(struct buffer_head *bh)
+{
+	return bh->b_private;
+}
+
+static inline void jbd_lock_bh_state(struct buffer_head *bh)
+{
+	bit_spin_lock(BH_State, &bh->b_state);
+}
+
+static inline int jbd_trylock_bh_state(struct buffer_head *bh)
+{
+	return bit_spin_trylock(BH_State, &bh->b_state);
+}
+
+static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
+{
+	return bit_spin_is_locked(BH_State, &bh->b_state);
+}
+
+static inline void jbd_unlock_bh_state(struct buffer_head *bh)
+{
+	bit_spin_unlock(BH_State, &bh->b_state);
+}
+
+static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
+{
+	bit_spin_lock(BH_JournalHead, &bh->b_state);
+}
+
+static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
+{
+	bit_spin_unlock(BH_JournalHead, &bh->b_state);
+}
+
+#endif
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index b50a547..748ff7c 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -9,9 +9,12 @@
 
 struct ext4_allocation_context;
 struct ext4_allocation_request;
+struct ext4_extent;
 struct ext4_prealloc_space;
 struct ext4_inode_info;
 struct mpage_da_data;
+struct ext4_map_blocks;
+struct ext4_extent;
 
 #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
 
@@ -1032,9 +1035,9 @@
 );
 
 TRACE_EVENT(ext4_da_update_reserve_space,
-	TP_PROTO(struct inode *inode, int used_blocks),
+	TP_PROTO(struct inode *inode, int used_blocks, int quota_claim),
 
-	TP_ARGS(inode, used_blocks),
+	TP_ARGS(inode, used_blocks, quota_claim),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
@@ -1045,6 +1048,7 @@
 		__field(	int,	reserved_data_blocks	)
 		__field(	int,	reserved_meta_blocks	)
 		__field(	int,	allocated_meta_blocks	)
+		__field(	int,	quota_claim		)
 	),
 
 	TP_fast_assign(
@@ -1053,19 +1057,24 @@
 		__entry->mode	= inode->i_mode;
 		__entry->i_blocks = inode->i_blocks;
 		__entry->used_blocks = used_blocks;
-		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
-		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
+		__entry->reserved_data_blocks =
+				EXT4_I(inode)->i_reserved_data_blocks;
+		__entry->reserved_meta_blocks =
+				EXT4_I(inode)->i_reserved_meta_blocks;
+		__entry->allocated_meta_blocks =
+				EXT4_I(inode)->i_allocated_meta_blocks;
+		__entry->quota_claim = quota_claim;
 	),
 
 	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
 		  "reserved_data_blocks %d reserved_meta_blocks %d "
-		  "allocated_meta_blocks %d",
+		  "allocated_meta_blocks %d quota_claim %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->i_blocks,
 		  __entry->used_blocks, __entry->reserved_data_blocks,
-		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
+		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks,
+		  __entry->quota_claim)
 );
 
 TRACE_EVENT(ext4_da_reserve_space,
@@ -1386,6 +1395,87 @@
 	TP_ARGS(inode)
 );
 
+/* 'ux' is the uninitialized extent. */
+TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
+	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
+		 struct ext4_extent *ux),
+
+	TP_ARGS(inode, map, ux),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino	)
+		__field(	dev_t,		dev	)
+		__field(	ext4_lblk_t,	m_lblk	)
+		__field(	unsigned,	m_len	)
+		__field(	ext4_lblk_t,	u_lblk	)
+		__field(	unsigned,	u_len	)
+		__field(	ext4_fsblk_t,	u_pblk	)
+	),
+
+	TP_fast_assign(
+		__entry->ino		= inode->i_ino;
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->m_lblk		= map->m_lblk;
+		__entry->m_len		= map->m_len;
+		__entry->u_lblk		= le32_to_cpu(ux->ee_block);
+		__entry->u_len		= ext4_ext_get_actual_len(ux);
+		__entry->u_pblk		= ext4_ext_pblock(ux);
+	),
+
+	TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u u_lblk %u u_len %u "
+		  "u_pblk %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->m_lblk, __entry->m_len,
+		  __entry->u_lblk, __entry->u_len, __entry->u_pblk)
+);
+
+/*
+ * 'ux' is the uninitialized extent.
+ * 'ix' is the initialized extent to which blocks are transferred.
+ */
+TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath,
+	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
+		 struct ext4_extent *ux, struct ext4_extent *ix),
+
+	TP_ARGS(inode, map, ux, ix),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino	)
+		__field(	dev_t,		dev	)
+		__field(	ext4_lblk_t,	m_lblk	)
+		__field(	unsigned,	m_len	)
+		__field(	ext4_lblk_t,	u_lblk	)
+		__field(	unsigned,	u_len	)
+		__field(	ext4_fsblk_t,	u_pblk	)
+		__field(	ext4_lblk_t,	i_lblk	)
+		__field(	unsigned,	i_len	)
+		__field(	ext4_fsblk_t,	i_pblk	)
+	),
+
+	TP_fast_assign(
+		__entry->ino		= inode->i_ino;
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->m_lblk		= map->m_lblk;
+		__entry->m_len		= map->m_len;
+		__entry->u_lblk		= le32_to_cpu(ux->ee_block);
+		__entry->u_len		= ext4_ext_get_actual_len(ux);
+		__entry->u_pblk		= ext4_ext_pblock(ux);
+		__entry->i_lblk		= le32_to_cpu(ix->ee_block);
+		__entry->i_len		= ext4_ext_get_actual_len(ix);
+		__entry->i_pblk		= ext4_ext_pblock(ix);
+	),
+
+	TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u "
+		  "u_lblk %u u_len %u u_pblk %llu "
+		  "i_lblk %u i_len %u i_pblk %llu ",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->m_lblk, __entry->m_len,
+		  __entry->u_lblk, __entry->u_len, __entry->u_pblk,
+		  __entry->i_lblk, __entry->i_len, __entry->i_pblk)
+);
+
 DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
 	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
 		 unsigned int len, unsigned int flags),
@@ -1589,6 +1679,382 @@
 	TP_ARGS(sb, group, start, len)
 );
 
+TRACE_EVENT(ext4_ext_handle_uninitialized_extents,
+	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
+		 unsigned int allocated, ext4_fsblk_t newblock),
+
+	TP_ARGS(inode, map, allocated, newblock),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino		)
+		__field(	dev_t,		dev		)
+		__field(	ext4_lblk_t,	lblk		)
+		__field(	ext4_fsblk_t,	pblk		)
+		__field(	unsigned int,	len		)
+		__field(	int,		flags		)
+		__field(	unsigned int,	allocated	)
+		__field(	ext4_fsblk_t,	newblk		)
+	),
+
+	TP_fast_assign(
+		__entry->ino		= inode->i_ino;
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->lblk		= map->m_lblk;
+		__entry->pblk		= map->m_pblk;
+		__entry->len		= map->m_len;
+		__entry->flags		= map->m_flags;
+		__entry->allocated	= allocated;
+		__entry->newblk		= newblock;
+	),
+
+	TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %d"
+		  "allocated %d newblock %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->lblk, (unsigned long long) __entry->pblk,
+		  __entry->len, __entry->flags,
+		  (unsigned int) __entry->allocated,
+		  (unsigned long long) __entry->newblk)
+);
+
+TRACE_EVENT(ext4_get_implied_cluster_alloc_exit,
+	TP_PROTO(struct super_block *sb, struct ext4_map_blocks *map, int ret),
+
+	TP_ARGS(sb, map, ret),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev	)
+		__field(	ext4_lblk_t,	lblk	)
+		__field(	ext4_fsblk_t,	pblk	)
+		__field(	unsigned int,	len	)
+		__field(	unsigned int,	flags	)
+		__field(	int,		ret	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->lblk	= map->m_lblk;
+		__entry->pblk	= map->m_pblk;
+		__entry->len	= map->m_len;
+		__entry->flags	= map->m_flags;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %u ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->lblk, (unsigned long long) __entry->pblk,
+		  __entry->len, __entry->flags, __entry->ret)
+);
+
+TRACE_EVENT(ext4_ext_put_in_cache,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len,
+		 ext4_fsblk_t start),
+
+	TP_ARGS(inode, lblk, len, start),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino	)
+		__field(	dev_t,		dev	)
+		__field(	ext4_lblk_t,	lblk	)
+		__field(	unsigned int,	len	)
+		__field(	ext4_fsblk_t,	start	)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->lblk	= lblk;
+		__entry->len	= len;
+		__entry->start	= start;
+	),
+
+	TP_printk("dev %d,%d ino %lu lblk %u len %u start %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->lblk,
+		  __entry->len,
+		  (unsigned long long) __entry->start)
+);
+
+TRACE_EVENT(ext4_ext_in_cache,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk, int ret),
+
+	TP_ARGS(inode, lblk, ret),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino	)
+		__field(	dev_t,		dev	)
+		__field(	ext4_lblk_t,	lblk	)
+		__field(	int,		ret	)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->lblk	= lblk;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("dev %d,%d ino %lu lblk %u ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->lblk,
+		  __entry->ret)
+
+);
+
+TRACE_EVENT(ext4_find_delalloc_range,
+	TP_PROTO(struct inode *inode, ext4_lblk_t from, ext4_lblk_t to,
+		int reverse, int found, ext4_lblk_t found_blk),
+
+	TP_ARGS(inode, from, to, reverse, found, found_blk),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino		)
+		__field(	dev_t,		dev		)
+		__field(	ext4_lblk_t,	from		)
+		__field(	ext4_lblk_t,	to		)
+		__field(	int,		reverse		)
+		__field(	int,		found		)
+		__field(	ext4_lblk_t,	found_blk	)
+	),
+
+	TP_fast_assign(
+		__entry->ino		= inode->i_ino;
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->from		= from;
+		__entry->to		= to;
+		__entry->reverse	= reverse;
+		__entry->found		= found;
+		__entry->found_blk	= found_blk;
+	),
+
+	TP_printk("dev %d,%d ino %lu from %u to %u reverse %d found %d "
+		  "(blk = %u)",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->from, (unsigned) __entry->to,
+		  __entry->reverse, __entry->found,
+		  (unsigned) __entry->found_blk)
+);
+
+TRACE_EVENT(ext4_get_reserved_cluster_alloc,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len),
+
+	TP_ARGS(inode, lblk, len),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino	)
+		__field(	dev_t,		dev	)
+		__field(	ext4_lblk_t,	lblk	)
+		__field(	unsigned int,	len	)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->lblk	= lblk;
+		__entry->len	= len;
+	),
+
+	TP_printk("dev %d,%d ino %lu lblk %u len %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->lblk,
+		  __entry->len)
+);
+
+TRACE_EVENT(ext4_ext_show_extent,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+		 unsigned short len),
+
+	TP_ARGS(inode, lblk, pblk, len),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino	)
+		__field(	dev_t,		dev	)
+		__field(	ext4_lblk_t,	lblk	)
+		__field(	ext4_fsblk_t,	pblk	)
+		__field(	unsigned short,	len	)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->lblk	= lblk;
+		__entry->pblk	= pblk;
+		__entry->len	= len;
+	),
+
+	TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->lblk,
+		  (unsigned long long) __entry->pblk,
+		  (unsigned short) __entry->len)
+);
+
+TRACE_EVENT(ext4_remove_blocks,
+	    TP_PROTO(struct inode *inode, struct ext4_extent *ex,
+		ext4_lblk_t from, ext4_fsblk_t to,
+		ext4_fsblk_t partial_cluster),
+
+	TP_ARGS(inode, ex, from, to, partial_cluster),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino	)
+		__field(	dev_t,		dev	)
+		__field(	ext4_lblk_t,	ee_lblk	)
+		__field(	ext4_fsblk_t,	ee_pblk	)
+		__field(	unsigned short,	ee_len	)
+		__field(	ext4_lblk_t,	from	)
+		__field(	ext4_lblk_t,	to	)
+		__field(	ext4_fsblk_t,	partial	)
+	),
+
+	TP_fast_assign(
+		__entry->ino		= inode->i_ino;
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ee_lblk	= cpu_to_le32(ex->ee_block);
+		__entry->ee_pblk	= ext4_ext_pblock(ex);
+		__entry->ee_len		= ext4_ext_get_actual_len(ex);
+		__entry->from		= from;
+		__entry->to		= to;
+		__entry->partial	= partial_cluster;
+	),
+
+	TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
+		  "from %u to %u partial_cluster %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->ee_lblk,
+		  (unsigned long long) __entry->ee_pblk,
+		  (unsigned short) __entry->ee_len,
+		  (unsigned) __entry->from,
+		  (unsigned) __entry->to,
+		  (unsigned) __entry->partial)
+);
+
+TRACE_EVENT(ext4_ext_rm_leaf,
+	TP_PROTO(struct inode *inode, ext4_lblk_t start,
+		 struct ext4_extent *ex, ext4_fsblk_t partial_cluster),
+
+	TP_ARGS(inode, start, ex, partial_cluster),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino	)
+		__field(	dev_t,		dev	)
+		__field(	ext4_lblk_t,	start	)
+		__field(	ext4_lblk_t,	ee_lblk	)
+		__field(	ext4_fsblk_t,	ee_pblk	)
+		__field(	short,		ee_len	)
+		__field(	ext4_fsblk_t,	partial	)
+	),
+
+	TP_fast_assign(
+		__entry->ino		= inode->i_ino;
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->start		= start;
+		__entry->ee_lblk	= le32_to_cpu(ex->ee_block);
+		__entry->ee_pblk	= ext4_ext_pblock(ex);
+		__entry->ee_len		= ext4_ext_get_actual_len(ex);
+		__entry->partial	= partial_cluster;
+	),
+
+	TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
+		  "partial_cluster %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->start,
+		  (unsigned) __entry->ee_lblk,
+		  (unsigned long long) __entry->ee_pblk,
+		  (unsigned short) __entry->ee_len,
+		  (unsigned) __entry->partial)
+);
+
+TRACE_EVENT(ext4_ext_rm_idx,
+	TP_PROTO(struct inode *inode, ext4_fsblk_t pblk),
+
+	TP_ARGS(inode, pblk),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino	)
+		__field(	dev_t,		dev	)
+		__field(	ext4_fsblk_t,	pblk	)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->pblk	= pblk;
+	),
+
+	TP_printk("dev %d,%d ino %lu index_pblk %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned long long) __entry->pblk)
+);
+
+TRACE_EVENT(ext4_ext_remove_space,
+	TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth),
+
+	TP_ARGS(inode, start, depth),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino	)
+		__field(	dev_t,		dev	)
+		__field(	ext4_lblk_t,	start	)
+		__field(	int,		depth	)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->start	= start;
+		__entry->depth	= depth;
+	),
+
+	TP_printk("dev %d,%d ino %lu since %u depth %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->start,
+		  __entry->depth)
+);
+
+TRACE_EVENT(ext4_ext_remove_space_done,
+	TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth,
+		ext4_lblk_t partial, unsigned short eh_entries),
+
+	TP_ARGS(inode, start, depth, partial, eh_entries),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino		)
+		__field(	dev_t,		dev		)
+		__field(	ext4_lblk_t,	start		)
+		__field(	int,		depth		)
+		__field(	ext4_lblk_t,	partial		)
+		__field(	unsigned short,	eh_entries	)
+	),
+
+	TP_fast_assign(
+		__entry->ino		= inode->i_ino;
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->start		= start;
+		__entry->depth		= depth;
+		__entry->partial	= partial;
+		__entry->eh_entries	= eh_entries;
+	),
+
+	TP_printk("dev %d,%d ino %lu since %u depth %d partial %u "
+		  "remaining_entries %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->start,
+		  __entry->depth,
+		  (unsigned) __entry->partial,
+		  (unsigned short) __entry->eh_entries)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 19c053b..4f554f20 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -9,7 +9,7 @@
 	select CRYPTO_HMAC
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
-	select TCG_TPM if !S390
+	select TCG_TPM if !S390 && !UML
 	select TCG_TIS if TCG_TPM
 	help
 	  The Trusted Computing Group(TCG) runtime Integrity
diff --git a/sound/Kconfig b/sound/Kconfig
index 1fef141..261a03c 100644
--- a/sound/Kconfig
+++ b/sound/Kconfig
@@ -59,7 +59,7 @@
 
 source "sound/oss/dmasound/Kconfig"
 
-if !M68K
+if !M68K && !UML
 
 menuconfig SND
 	tristate "Advanced Linux Sound Architecture"