diff --git a/Documentation/DocBook/s390-drivers.tmpl b/Documentation/DocBook/s390-drivers.tmpl
index 3d2f31b..4acc732 100644
--- a/Documentation/DocBook/s390-drivers.tmpl
+++ b/Documentation/DocBook/s390-drivers.tmpl
@@ -59,7 +59,7 @@
    <title>Introduction</title>
   <para>
     This document describes the interfaces available for device drivers that
-    drive s390 based channel attached devices. This includes interfaces for
+    drive s390 based channel attached I/O devices. This includes interfaces for
     interaction with the hardware and interfaces for interacting with the
     common driver core. Those interfaces are provided by the s390 common I/O
     layer.
@@ -86,9 +86,10 @@
 	The ccw bus typically contains the majority of devices available to
 	a s390 system. Named after the channel command word (ccw), the basic
 	command structure used to address its devices, the ccw bus contains
-	so-called channel attached devices. They are addressed via subchannels,
-	visible on the css bus. A device driver, however, will never interact
-	with the subchannel directly, but only via the device on the ccw bus,
+	so-called channel attached devices. They are addressed via I/O
+	subchannels, visible on the css bus. A device driver for
+	channel-attached devices, however, will never interact	with the
+	subchannel directly, but only via the I/O device on the ccw bus,
 	the ccw device.
   </para>
     <sect1 id="channelIO">
@@ -116,7 +117,6 @@
 !Iinclude/asm-s390/ccwdev.h
 !Edrivers/s390/cio/device.c
 !Edrivers/s390/cio/device_ops.c
-!Edrivers/s390/cio/airq.c
     </sect1>
     <sect1 id="cmf">
      <title>The channel-measurement facility</title>
@@ -147,4 +147,15 @@
    </sect1>
   </chapter>
 
+  <chapter id="genericinterfaces">
+   <title>Generic interfaces</title>
+  <para>
+	Some interfaces are available to other drivers that do not necessarily
+	have anything to do with the busses described above, but still are
+	indirectly using basic infrastructure in the common I/O layer.
+	One example is the support for adapter interrupts.
+  </para>
+!Edrivers/s390/cio/airq.c
+  </chapter>
+
 </book>
diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt
new file mode 100644
index 0000000..989c2fc
--- /dev/null
+++ b/Documentation/Smack.txt
@@ -0,0 +1,493 @@
+
+
+    "Good for you, you've decided to clean the elevator!"
+    - The Elevator, from Dark Star
+
+Smack is the the Simplified Mandatory Access Control Kernel.
+Smack is a kernel based implementation of mandatory access
+control that includes simplicity in its primary design goals.
+
+Smack is not the only Mandatory Access Control scheme
+available for Linux. Those new to Mandatory Access Control
+are encouraged to compare Smack with the other mechanisms
+available to determine which is best suited to the problem
+at hand.
+
+Smack consists of three major components:
+    - The kernel
+    - A start-up script and a few modified applications
+    - Configuration data
+
+The kernel component of Smack is implemented as a Linux
+Security Modules (LSM) module. It requires netlabel and
+works best with file systems that support extended attributes,
+although xattr support is not strictly required.
+It is safe to run a Smack kernel under a "vanilla" distribution.
+Smack kernels use the CIPSO IP option. Some network
+configurations are intolerant of IP options and can impede
+access to systems that use them as Smack does.
+
+The startup script etc-init.d-smack should be installed
+in /etc/init.d/smack and should be invoked early in the
+start-up process. On Fedora rc5.d/S02smack is recommended.
+This script ensures that certain devices have the correct
+Smack attributes and loads the Smack configuration if
+any is defined. This script invokes two programs that
+ensure configuration data is properly formatted. These
+programs are /usr/sbin/smackload and /usr/sin/smackcipso.
+The system will run just fine without these programs,
+but it will be difficult to set access rules properly.
+
+A version of "ls" that provides a "-M" option to display
+Smack labels on long listing is available.
+
+A hacked version of sshd that allows network logins by users
+with specific Smack labels is available. This version does
+not work for scp. You must set the /etc/ssh/sshd_config
+line:
+   UsePrivilegeSeparation no
+
+The format of /etc/smack/usr is:
+
+   username smack
+
+In keeping with the intent of Smack, configuration data is
+minimal and not strictly required. The most important
+configuration step is mounting the smackfs pseudo filesystem.
+
+Add this line to /etc/fstab:
+
+    smackfs /smack smackfs smackfsdef=* 0 0
+
+and create the /smack directory for mounting.
+
+Smack uses extended attributes (xattrs) to store file labels.
+The command to set a Smack label on a file is:
+
+    # attr -S -s SMACK64 -V "value" path
+
+NOTE: Smack labels are limited to 23 characters. The attr command
+      does not enforce this restriction and can be used to set
+      invalid Smack labels on files.
+
+If you don't do anything special all users will get the floor ("_")
+label when they log in. If you do want to log in via the hacked ssh
+at other labels use the attr command to set the smack value on the
+home directory and it's contents.
+
+You can add access rules in /etc/smack/accesses. They take the form:
+
+    subjectlabel objectlabel access
+
+access is a combination of the letters rwxa which specify the
+kind of access permitted a subject with subjectlabel on an
+object with objectlabel. If there is no rule no access is allowed.
+
+A process can see the smack label it is running with by
+reading /proc/self/attr/current. A privileged process can
+set the process smack by writing there.
+
+Look for additional programs on http://schaufler-ca.com
+
+From the Smack Whitepaper:
+
+The Simplified Mandatory Access Control Kernel
+
+Casey Schaufler
+casey@schaufler-ca.com
+
+Mandatory Access Control
+
+Computer systems employ a variety of schemes to constrain how information is
+shared among the people and services using the machine. Some of these schemes
+allow the program or user to decide what other programs or users are allowed
+access to pieces of data. These schemes are called discretionary access
+control mechanisms because the access control is specified at the discretion
+of the user. Other schemes do not leave the decision regarding what a user or
+program can access up to users or programs. These schemes are called mandatory
+access control mechanisms because you don't have a choice regarding the users
+or programs that have access to pieces of data.
+
+Bell & LaPadula
+
+From the middle of the 1980's until the turn of the century Mandatory Access
+Control (MAC) was very closely associated with the Bell & LaPadula security
+model, a mathematical description of the United States Department of Defense
+policy for marking paper documents. MAC in this form enjoyed a following
+within the Capital Beltway and Scandinavian supercomputer centers but was
+often sited as failing to address general needs.
+
+Domain Type Enforcement
+
+Around the turn of the century Domain Type Enforcement (DTE) became popular.
+This scheme organizes users, programs, and data into domains that are
+protected from each other. This scheme has been widely deployed as a component
+of popular Linux distributions. The administrative overhead required to
+maintain this scheme and the detailed understanding of the whole system
+necessary to provide a secure domain mapping leads to the scheme being
+disabled or used in limited ways in the majority of cases.
+
+Smack
+
+Smack is a Mandatory Access Control mechanism designed to provide useful MAC
+while avoiding the pitfalls of its predecessors. The limitations of Bell &
+LaPadula are addressed by providing a scheme whereby access can be controlled
+according to the requirements of the system and its purpose rather than those
+imposed by an arcane government policy. The complexity of Domain Type
+Enforcement and avoided by defining access controls in terms of the access
+modes already in use.
+
+Smack Terminology
+
+The jargon used to talk about Smack will be familiar to those who have dealt
+with other MAC systems and shouldn't be too difficult for the uninitiated to
+pick up. There are four terms that are used in a specific way and that are
+especially important:
+
+	Subject: A subject is an active entity on the computer system.
+	On Smack a subject is a task, which is in turn the basic unit
+	of execution.
+
+	Object: An object is a passive entity on the computer system.
+	On Smack files of all types, IPC, and tasks can be objects.
+
+	Access: Any attempt by a subject to put information into or get
+	information from an object is an access.
+
+	Label: Data that identifies the Mandatory Access Control
+	characteristics of a subject or an object.
+
+These definitions are consistent with the traditional use in the security
+community. There are also some terms from Linux that are likely to crop up:
+
+	Capability: A task that possesses a capability has permission to
+	violate an aspect of the system security policy, as identified by
+	the specific capability. A task that possesses one or more
+	capabilities is a privileged task, whereas a task with no
+	capabilities is an unprivileged task.
+
+	Privilege: A task that is allowed to violate the system security
+	policy is said to have privilege. As of this writing a task can
+	have privilege either by possessing capabilities or by having an
+	effective user of root.
+
+Smack Basics
+
+Smack is an extension to a Linux system. It enforces additional restrictions
+on what subjects can access which objects, based on the labels attached to
+each of the subject and the object.
+
+Labels
+
+Smack labels are ASCII character strings, one to twenty-three characters in
+length. Single character labels using special characters, that being anything
+other than a letter or digit, are reserved for use by the Smack development
+team. Smack labels are unstructured, case sensitive, and the only operation
+ever performed on them is comparison for equality. Smack labels cannot
+contain unprintable characters or the "/" (slash) character.
+
+There are some predefined labels:
+
+	_ Pronounced "floor", a single underscore character.
+	^ Pronounced "hat", a single circumflex character.
+	* Pronounced "star", a single asterisk character.
+	? Pronounced "huh", a single question mark character.
+
+Every task on a Smack system is assigned a label. System tasks, such as
+init(8) and systems daemons, are run with the floor ("_") label. User tasks
+are assigned labels according to the specification found in the
+/etc/smack/user configuration file.
+
+Access Rules
+
+Smack uses the traditional access modes of Linux. These modes are read,
+execute, write, and occasionally append. There are a few cases where the
+access mode may not be obvious. These include:
+
+	Signals: A signal is a write operation from the subject task to
+	the object task.
+	Internet Domain IPC: Transmission of a packet is considered a
+	write operation from the source task to the destination task.
+
+Smack restricts access based on the label attached to a subject and the label
+attached to the object it is trying to access. The rules enforced are, in
+order:
+
+	1. Any access requested by a task labeled "*" is denied.
+	2. A read or execute access requested by a task labeled "^"
+	   is permitted.
+	3. A read or execute access requested on an object labeled "_"
+	   is permitted.
+	4. Any access requested on an object labeled "*" is permitted.
+	5. Any access requested by a task on an object with the same
+	   label is permitted.
+	6. Any access requested that is explicitly defined in the loaded
+	   rule set is permitted.
+	7. Any other access is denied.
+
+Smack Access Rules
+
+With the isolation provided by Smack access separation is simple. There are
+many interesting cases where limited access by subjects to objects with
+different labels is desired. One example is the familiar spy model of
+sensitivity, where a scientist working on a highly classified project would be
+able to read documents of lower classifications and anything she writes will
+be "born" highly classified. To accommodate such schemes Smack includes a
+mechanism for specifying rules allowing access between labels.
+
+Access Rule Format
+
+The format of an access rule is:
+
+	subject-label object-label access
+
+Where subject-label is the Smack label of the task, object-label is the Smack
+label of the thing being accessed, and access is a string specifying the sort
+of access allowed. The Smack labels are limited to 23 characters. The access
+specification is searched for letters that describe access modes:
+
+	a: indicates that append access should be granted.
+	r: indicates that read access should be granted.
+	w: indicates that write access should be granted.
+	x: indicates that execute access should be granted.
+
+Uppercase values for the specification letters are allowed as well.
+Access mode specifications can be in any order. Examples of acceptable rules
+are:
+
+	TopSecret Secret  rx
+	Secret    Unclass R
+	Manager   Game    x
+	User      HR      w
+	New       Old     rRrRr
+	Closed    Off     -
+
+Examples of unacceptable rules are:
+
+	Top Secret Secret     rx
+	Ace        Ace        r
+	Odd        spells     waxbeans
+
+Spaces are not allowed in labels. Since a subject always has access to files
+with the same label specifying a rule for that case is pointless. Only
+valid letters (rwxaRWXA) and the dash ('-') character are allowed in
+access specifications. The dash is a placeholder, so "a-r" is the same
+as "ar". A lone dash is used to specify that no access should be allowed.
+
+Applying Access Rules
+
+The developers of Linux rarely define new sorts of things, usually importing
+schemes and concepts from other systems. Most often, the other systems are
+variants of Unix. Unix has many endearing properties, but consistency of
+access control models is not one of them. Smack strives to treat accesses as
+uniformly as is sensible while keeping with the spirit of the underlying
+mechanism.
+
+File system objects including files, directories, named pipes, symbolic links,
+and devices require access permissions that closely match those used by mode
+bit access. To open a file for reading read access is required on the file. To
+search a directory requires execute access. Creating a file with write access
+requires both read and write access on the containing directory. Deleting a
+file requires read and write access to the file and to the containing
+directory. It is possible that a user may be able to see that a file exists
+but not any of its attributes by the circumstance of having read access to the
+containing directory but not to the differently labeled file. This is an
+artifact of the file name being data in the directory, not a part of the file.
+
+IPC objects, message queues, semaphore sets, and memory segments exist in flat
+namespaces and access requests are only required to match the object in
+question.
+
+Process objects reflect tasks on the system and the Smack label used to access
+them is the same Smack label that the task would use for its own access
+attempts. Sending a signal via the kill() system call is a write operation
+from the signaler to the recipient. Debugging a process requires both reading
+and writing. Creating a new task is an internal operation that results in two
+tasks with identical Smack labels and requires no access checks.
+
+Sockets are data structures attached to processes and sending a packet from
+one process to another requires that the sender have write access to the
+receiver. The receiver is not required to have read access to the sender.
+
+Setting Access Rules
+
+The configuration file /etc/smack/accesses contains the rules to be set at
+system startup. The contents are written to the special file /smack/load.
+Rules can be written to /smack/load at any time and take effect immediately.
+For any pair of subject and object labels there can be only one rule, with the
+most recently specified overriding any earlier specification.
+
+The program smackload is provided to ensure data is formatted
+properly when written to /smack/load. This program reads lines
+of the form
+
+    subjectlabel objectlabel mode.
+
+Task Attribute
+
+The Smack label of a process can be read from /proc/<pid>/attr/current. A
+process can read its own Smack label from /proc/self/attr/current. A
+privileged process can change its own Smack label by writing to
+/proc/self/attr/current but not the label of another process.
+
+File Attribute
+
+The Smack label of a filesystem object is stored as an extended attribute
+named SMACK64 on the file. This attribute is in the security namespace. It can
+only be changed by a process with privilege.
+
+Privilege
+
+A process with CAP_MAC_OVERRIDE is privileged.
+
+Smack Networking
+
+As mentioned before, Smack enforces access control on network protocol
+transmissions. Every packet sent by a Smack process is tagged with its Smack
+label. This is done by adding a CIPSO tag to the header of the IP packet. Each
+packet received is expected to have a CIPSO tag that identifies the label and
+if it lacks such a tag the network ambient label is assumed. Before the packet
+is delivered a check is made to determine that a subject with the label on the
+packet has write access to the receiving process and if that is not the case
+the packet is dropped.
+
+CIPSO Configuration
+
+It is normally unnecessary to specify the CIPSO configuration. The default
+values used by the system handle all internal cases. Smack will compose CIPSO
+label values to match the Smack labels being used without administrative
+intervention. Unlabeled packets that come into the system will be given the
+ambient label.
+
+Smack requires configuration in the case where packets from a system that is
+not smack that speaks CIPSO may be encountered. Usually this will be a Trusted
+Solaris system, but there are other, less widely deployed systems out there.
+CIPSO provides 3 important values, a Domain Of Interpretation (DOI), a level,
+and a category set with each packet. The DOI is intended to identify a group
+of systems that use compatible labeling schemes, and the DOI specified on the
+smack system must match that of the remote system or packets will be
+discarded. The DOI is 3 by default. The value can be read from /smack/doi and
+can be changed by writing to /smack/doi.
+
+The label and category set are mapped to a Smack label as defined in
+/etc/smack/cipso.
+
+A Smack/CIPSO mapping has the form:
+
+	smack level [category [category]*]
+
+Smack does not expect the level or category sets to be related in any
+particular way and does not assume or assign accesses based on them. Some
+examples of mappings:
+
+	TopSecret 7
+	TS:A,B    7 1 2
+	SecBDE    5 2 4 6
+	RAFTERS   7 12 26
+
+The ":" and "," characters are permitted in a Smack label but have no special
+meaning.
+
+The mapping of Smack labels to CIPSO values is defined by writing to
+/smack/cipso. Again, the format of data written to this special file
+is highly restrictive, so the program smackcipso is provided to
+ensure the writes are done properly. This program takes mappings
+on the standard input and sends them to /smack/cipso properly.
+
+In addition to explicit mappings Smack supports direct CIPSO mappings. One
+CIPSO level is used to indicate that the category set passed in the packet is
+in fact an encoding of the Smack label. The level used is 250 by default. The
+value can be read from /smack/direct and changed by writing to /smack/direct.
+
+Socket Attributes
+
+There are two attributes that are associated with sockets. These attributes
+can only be set by privileged tasks, but any task can read them for their own
+sockets.
+
+	SMACK64IPIN: The Smack label of the task object. A privileged
+	program that will enforce policy may set this to the star label.
+
+	SMACK64IPOUT: The Smack label transmitted with outgoing packets.
+	A privileged program may set this to match the label of another
+	task with which it hopes to communicate.
+
+Writing Applications for Smack
+
+There are three sorts of applications that will run on a Smack system. How an
+application interacts with Smack will determine what it will have to do to
+work properly under Smack.
+
+Smack Ignorant Applications
+
+By far the majority of applications have no reason whatever to care about the
+unique properties of Smack. Since invoking a program has no impact on the
+Smack label associated with the process the only concern likely to arise is
+whether the process has execute access to the program.
+
+Smack Relevant Applications
+
+Some programs can be improved by teaching them about Smack, but do not make
+any security decisions themselves. The utility ls(1) is one example of such a
+program.
+
+Smack Enforcing Applications
+
+These are special programs that not only know about Smack, but participate in
+the enforcement of system policy. In most cases these are the programs that
+set up user sessions. There are also network services that provide information
+to processes running with various labels.
+
+File System Interfaces
+
+Smack maintains labels on file system objects using extended attributes. The
+Smack label of a file, directory, or other file system object can be obtained
+using getxattr(2).
+
+	len = getxattr("/", "security.SMACK64", value, sizeof (value));
+
+will put the Smack label of the root directory into value. A privileged
+process can set the Smack label of a file system object with setxattr(2).
+
+	len = strlen("Rubble");
+	rc = setxattr("/foo", "security.SMACK64", "Rubble", len, 0);
+
+will set the Smack label of /foo to "Rubble" if the program has appropriate
+privilege.
+
+Socket Interfaces
+
+The socket attributes can be read using fgetxattr(2).
+
+A privileged process can set the Smack label of outgoing packets with
+fsetxattr(2).
+
+	len = strlen("Rubble");
+	rc = fsetxattr(fd, "security.SMACK64IPOUT", "Rubble", len, 0);
+
+will set the Smack label "Rubble" on packets going out from the socket if the
+program has appropriate privilege.
+
+	rc = fsetxattr(fd, "security.SMACK64IPIN, "*", strlen("*"), 0);
+
+will set the Smack label "*" as the object label against which incoming
+packets will be checked if the program has appropriate privilege.
+
+Administration
+
+Smack supports some mount options:
+
+	smackfsdef=label: specifies the label to give files that lack
+	the Smack label extended attribute.
+
+	smackfsroot=label: specifies the label to assign the root of the
+	file system if it lacks the Smack extended attribute.
+
+	smackfshat=label: specifies a label that must have read access to
+	all labels set on the filesystem. Not yet enforced.
+
+	smackfsfloor=label: specifies a label to which all labels set on the
+	filesystem must have read access. Not yet enforced.
+
+These mount options apply to all file system types.
+
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 0b1b0c0..e2799b5 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1315,13 +1315,28 @@
 Data which has been dirty in-memory for longer than this interval will be
 written out next time a pdflush daemon wakes up.
 
+highmem_is_dirtyable
+--------------------
+
+Only present if CONFIG_HIGHMEM is set.
+
+This defaults to 0 (false), meaning that the ratios set above are calculated
+as a percentage of lowmem only.  This protects against excessive scanning
+in page reclaim, swapping and general VM distress.
+
+Setting this to 1 can be useful on 32 bit machines where you want to make
+random changes within an MMAPed file that is larger than your available
+lowmem without causing large quantities of random IO.  Is is safe if the
+behavior of all programs running on the machine is known and memory will
+not be otherwise stressed.
+
 legacy_va_layout
 ----------------
 
 If non-zero, this sysctl disables the new 32-bit mmap mmap layout - the kernel
 will use the legacy (2.4) layout for all processes.
 
-lower_zone_protection
+lowmem_reserve_ratio
 ---------------------
 
 For some specialised workloads on highmem machines it is dangerous for
@@ -1341,25 +1356,71 @@
 mechanism will also defend that region from allocations which could use
 highmem or lowmem).
 
-The `lower_zone_protection' tunable determines how aggressive the kernel is
-in defending these lower zones.  The default value is zero - no
-protection at all.
+The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is
+in defending these lower zones.
 
 If you have a machine which uses highmem or ISA DMA and your
 applications are using mlock(), or if you are running with no swap then
-you probably should increase the lower_zone_protection setting.
+you probably should change the lowmem_reserve_ratio setting.
 
-The units of this tunable are fairly vague.  It is approximately equal
-to "megabytes," so setting lower_zone_protection=100 will protect around 100
-megabytes of the lowmem zone from user allocations.  It will also make
-those 100 megabytes unavailable for use by applications and by
-pagecache, so there is a cost.
+The lowmem_reserve_ratio is an array. You can see them by reading this file.
+-
+% cat /proc/sys/vm/lowmem_reserve_ratio
+256     256     32
+-
+Note: # of this elements is one fewer than number of zones. Because the highest
+      zone's value is not necessary for following calculation.
 
-The effects of this tunable may be observed by monitoring
-/proc/meminfo:LowFree.  Write a single huge file and observe the point
-at which LowFree ceases to fall.
+But, these values are not used directly. The kernel calculates # of protection
+pages for each zones from them. These are shown as array of protection pages
+in /proc/zoneinfo like followings. (This is an example of x86-64 box).
+Each zone has an array of protection pages like this.
 
-A reasonable value for lower_zone_protection is 100.
+-
+Node 0, zone      DMA
+  pages free     1355
+        min      3
+        low      3
+        high     4
+	:
+	:
+    numa_other   0
+        protection: (0, 2004, 2004, 2004)
+	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  pagesets
+    cpu: 0 pcp: 0
+        :
+-
+These protections are added to score to judge whether this zone should be used
+for page allocation or should be reclaimed.
+
+In this example, if normal pages (index=2) are required to this DMA zone and
+pages_high is used for watermark, the kernel judges this zone should not be
+used because pages_free(1355) is smaller than watermark + protection[2]
+(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
+normal page requirement. If requirement is DMA zone(index=0), protection[0]
+(=0) is used.
+
+zone[i]'s protection[j] is calculated by following exprssion.
+
+(i < j):
+  zone[i]->protection[j]
+  = (total sums of present_pages from zone[i+1] to zone[j] on the node)
+    / lowmem_reserve_ratio[i];
+(i = j):
+   (should not be protected. = 0;
+(i > j):
+   (not necessary, but looks 0)
+
+The default values of lowmem_reserve_ratio[i] are
+    256 (if zone[i] means DMA or DMA32 zone)
+    32  (others).
+As above expression, they are reciprocal number of ratio.
+256 means 1/256. # of protection pages becomes about "0.39%" of total present
+pages of higher zones on the node.
+
+If you would like to protect more pages, smaller values are effective.
+The minimum value is 1 (1/1 -> 100%).
 
 page-cluster
 ------------
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 6bc2ba2..8da724e 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -32,7 +32,7 @@
   - Input values are likewise readable (1, 0).  Some chips support readback
     of pins configured as "output", which is very useful in such "wire-OR"
     cases (to support bidirectional signaling).  GPIO controllers may have
-    input de-glitch logic, sometimes with software controls.
+    input de-glitch/debounce logic, sometimes with software controls.
 
   - Inputs can often be used as IRQ signals, often edge triggered but
     sometimes level triggered.  Such IRQs may be configurable as system
@@ -60,10 +60,13 @@
 functionality can be very portable.  Other features are platform-specific,
 and that can be critical for glue logic.
 
-Plus, this doesn't define an implementation framework, just an interface.
+Plus, this doesn't require any implementation framework, just an interface.
 One platform might implement it as simple inline functions accessing chip
 registers; another might implement it by delegating through abstractions
-used for several very different kinds of GPIO controller.
+used for several very different kinds of GPIO controller.  (There is some
+optional code supporting such an implementation strategy, described later
+in this document, but drivers acting as clients to the GPIO interface must
+not care how it's implemented.)
 
 That said, if the convention is supported on their platform, drivers should
 use it when possible.  Platforms should declare GENERIC_GPIO support in
@@ -121,6 +124,11 @@
 For output GPIOs, the value provided becomes the initial output value.
 This helps avoid signal glitching during system startup.
 
+For compatibility with legacy interfaces to GPIOs, setting the direction
+of a GPIO implicitly requests that GPIO (see below) if it has not been
+requested already.  That compatibility may be removed in the future;
+explicitly requesting GPIOs is strongly preferred.
+
 Setting the direction can fail if the GPIO number is invalid, or when
 that particular GPIO can't be used in that mode.  It's generally a bad
 idea to rely on boot firmware to have set the direction correctly, since
@@ -133,6 +141,7 @@
 -------------------------
 Most GPIO controllers can be accessed with memory read/write instructions.
 That doesn't need to sleep, and can safely be done from inside IRQ handlers.
+(That includes hardirq contexts on RT kernels.)
 
 Use these calls to access such GPIOs:
 
@@ -145,7 +154,7 @@
 The values are boolean, zero for low, nonzero for high.  When reading the
 value of an output pin, the value returned should be what's seen on the
 pin ... that won't always match the specified output value, because of
-issues including wire-OR and output latencies.
+issues including open-drain signaling and output latencies.
 
 The get/set calls have no error returns because "invalid GPIO" should have
 been reported earlier from gpio_direction_*().  However, note that not all
@@ -170,7 +179,8 @@
 This requires sleeping, which can't be done from inside IRQ handlers.
 
 Platforms that support this type of GPIO distinguish them from other GPIOs
-by returning nonzero from this call:
+by returning nonzero from this call (which requires a valid GPIO number,
+either explicitly or implicitly requested):
 
 	int gpio_cansleep(unsigned gpio);
 
@@ -209,8 +219,11 @@
 These calls serve two basic purposes.  One is marking the signals which
 are actually in use as GPIOs, for better diagnostics; systems may have
 several hundred potential GPIOs, but often only a dozen are used on any
-given board.  Another is to catch conflicts between drivers, reporting
-errors when drivers wrongly think they have exclusive use of that signal.
+given board.  Another is to catch conflicts, identifying errors when
+(a) two or more drivers wrongly think they have exclusive use of that
+signal, or (b) something wrongly believes it's safe to remove drivers
+needed to manage a signal that's in active use.  That is, requesting a
+GPIO can serve as a kind of lock.
 
 These two calls are optional because not not all current Linux platforms
 offer such functionality in their GPIO support; a valid implementation
@@ -223,6 +236,9 @@
 way; it just marks that GPIO as in use.  Separate code must handle any
 pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown).
 
+Also note that it's your responsibility to have stopped using a GPIO
+before you free it.
+
 
 GPIOs mapped to IRQs
 --------------------
@@ -238,7 +254,7 @@
 
 Those return either the corresponding number in the other namespace, or
 else a negative errno code if the mapping can't be done.  (For example,
-some GPIOs can't used as IRQs.)  It is an unchecked error to use a GPIO
+some GPIOs can't be used as IRQs.)  It is an unchecked error to use a GPIO
 number that wasn't set up as an input using gpio_direction_input(), or
 to use an IRQ number that didn't originally come from gpio_to_irq().
 
@@ -299,17 +315,110 @@
 pulldowns integrated on some platforms.  Not all platforms support them,
 or support them in the same way; and any given board might use external
 pullups (or pulldowns) so that the on-chip ones should not be used.
+(When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.)
 
 There are other system-specific mechanisms that are not specified here,
 like the aforementioned options for input de-glitching and wire-OR output.
 Hardware may support reading or writing GPIOs in gangs, but that's usually
 configuration dependent:  for GPIOs sharing the same bank.  (GPIOs are
 commonly grouped in banks of 16 or 32, with a given SOC having several such
-banks.)  Some systems can trigger IRQs from output GPIOs.  Code relying on
-such mechanisms will necessarily be nonportable.
+banks.)  Some systems can trigger IRQs from output GPIOs, or read values
+from pins not managed as GPIOs.  Code relying on such mechanisms will
+necessarily be nonportable.
 
-Dynamic definition of GPIOs is not currently supported; for example, as
+Dynamic definition of GPIOs is not currently standard; for example, as
 a side effect of configuring an add-on board with some GPIO expanders.
 
 These calls are purely for kernel space, but a userspace API could be built
-on top of it.
+on top of them.
+
+
+GPIO implementor's framework (OPTIONAL)
+=======================================
+As noted earlier, there is an optional implementation framework making it
+easier for platforms to support different kinds of GPIO controller using
+the same programming interface.
+
+As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file
+will be found there.  That will list all the controllers registered through
+this framework, and the state of the GPIOs currently in use.
+
+
+Controller Drivers: gpio_chip
+-----------------------------
+In this framework each GPIO controller is packaged as a "struct gpio_chip"
+with information common to each controller of that type:
+
+ - methods to establish GPIO direction
+ - methods used to access GPIO values
+ - flag saying whether calls to its methods may sleep
+ - optional debugfs dump method (showing extra state like pullup config)
+ - label for diagnostics
+
+There is also per-instance data, which may come from device.platform_data:
+the number of its first GPIO, and how many GPIOs it exposes.
+
+The code implementing a gpio_chip should support multiple instances of the
+controller, possibly using the driver model.  That code will configure each
+gpio_chip and issue gpiochip_add().  Removing a GPIO controller should be
+rare; use gpiochip_remove() when it is unavoidable.
+
+Most often a gpio_chip is part of an instance-specific structure with state
+not exposed by the GPIO interfaces, such as addressing, power management,
+and more.  Chips such as codecs will have complex non-GPIO state,
+
+Any debugfs dump method should normally ignore signals which haven't been
+requested as GPIOs.  They can use gpiochip_is_requested(), which returns
+either NULL or the label associated with that GPIO when it was requested.
+
+
+Platform Support
+----------------
+To support this framework, a platform's Kconfig will "select HAVE_GPIO_LIB"
+and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
+three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
+They may also want to provide a custom value for ARCH_NR_GPIOS.
+
+Trivial implementations of those functions can directly use framework
+code, which always dispatches through the gpio_chip:
+
+  #define gpio_get_value	__gpio_get_value
+  #define gpio_set_value	__gpio_set_value
+  #define gpio_cansleep		__gpio_cansleep
+
+Fancier implementations could instead define those as inline functions with
+logic optimizing access to specific SOC-based GPIOs.  For example, if the
+referenced GPIO is the constant "12", getting or setting its value could
+cost as little as two or three instructions, never sleeping.  When such an
+optimization is not possible those calls must delegate to the framework
+code, costing at least a few dozen instructions.  For bitbanged I/O, such
+instruction savings can be significant.
+
+For SOCs, platform-specific code defines and registers gpio_chip instances
+for each bank of on-chip GPIOs.  Those GPIOs should be numbered/labeled to
+match chip vendor documentation, and directly match board schematics.  They
+may well start at zero and go up to a platform-specific limit.  Such GPIOs
+are normally integrated into platform initialization to make them always be
+available, from arch_initcall() or earlier; they can often serve as IRQs.
+
+
+Board Support
+-------------
+For external GPIO controllers -- such as I2C or SPI expanders, ASICs, multi
+function devices, FPGAs or CPLDs -- most often board-specific code handles
+registering controller devices and ensures that their drivers know what GPIO
+numbers to use with gpiochip_add().  Their numbers often start right after
+platform-specific GPIOs.
+
+For example, board setup code could create structures identifying the range
+of GPIOs that chip will expose, and passes them to each GPIO expander chip
+using platform_data.  Then the chip driver's probe() routine could pass that
+data to gpiochip_add().
+
+Initialization order can be important.  For example, when a device relies on
+an I2C-based GPIO, its probe() routine should only be called after that GPIO
+becomes available.  That may mean the device should not be registered until
+calls for that GPIO can work.  One way to address such dependencies is for
+such gpio_chip controllers to provide setup() and teardown() callbacks to
+board specific code; those board specific callbacks would register devices
+once all the necessary resources are available.
diff --git a/Documentation/i2c/chips/pca9539 b/Documentation/i2c/chips/pca9539
index c4fce6a..1d81c53 100644
--- a/Documentation/i2c/chips/pca9539
+++ b/Documentation/i2c/chips/pca9539
@@ -1,6 +1,9 @@
 Kernel driver pca9539
 =====================
 
+NOTE: this driver is deprecated and will be dropped soon, use
+drivers/gpio/pca9539.c instead.
+
 Supported chips:
   * Philips PCA9539
     Prefix: 'pca9539'
diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
index 4739c5c..96f155e 100644
--- a/Documentation/pcmcia/driver-changes.txt
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -33,8 +33,8 @@
    and can be used (e.g. for SET_NETDEV_DEV) by using
    handle_to_dev(client_handle_t * handle).
 
-* Convert internal I/O port addresses to unsigned long (as of 2.6.11)
-   ioaddr_t should be replaced by kio_addr_t in PCMCIA card drivers.
+* Convert internal I/O port addresses to unsigned int (as of 2.6.11)
+   ioaddr_t should be replaced by unsigned int in PCMCIA card drivers.
 
 * irq_mask and irq_list parameters (as of 2.6.11)
    The irq_mask and irq_list parameters should no longer be used in
diff --git a/Documentation/pm_qos_interface.txt b/Documentation/pm_qos_interface.txt
new file mode 100644
index 0000000..49adb1a
--- /dev/null
+++ b/Documentation/pm_qos_interface.txt
@@ -0,0 +1,59 @@
+PM quality of Service interface.
+
+This interface provides a kernel and user mode interface for registering
+performance expectations by drivers, subsystems and user space applications on
+one of the parameters.
+
+Currently we have {cpu_dma_latency, network_latency, network_throughput} as the
+initial set of pm_qos parameters.
+
+The infrastructure exposes multiple misc device nodes one per implemented
+parameter.  The set of parameters implement is defined by pm_qos_power_init()
+and pm_qos_params.h.  This is done because having the available parameters
+being runtime configurable or changeable from a driver was seen as too easy to
+abuse.
+
+For each parameter a list of performance requirements is maintained along with
+an aggregated target value.  The aggregated target value is updated with
+changes to the requirement list or elements of the list.  Typically the
+aggregated target value is simply the max or min of the requirement values held
+in the parameter list elements.
+
+From kernel mode the use of this interface is simple:
+pm_qos_add_requirement(param_id, name, target_value):
+Will insert a named element in the list for that identified PM_QOS parameter
+with the target value.  Upon change to this list the new target is recomputed
+and any registered notifiers are called only if the target value is now
+different.
+
+pm_qos_update_requirement(param_id, name, new_target_value):
+Will search the list identified by the param_id for the named list element and
+then update its target value, calling the notification tree if the aggregated
+target is changed.  with that name is already registered.
+
+pm_qos_remove_requirement(param_id, name):
+Will search the identified list for the named element and remove it, after
+removal it will update the aggregate target and call the notification tree if
+the target was changed as a result of removing the named requirement.
+
+
+From user mode:
+Only processes can register a pm_qos requirement.  To provide for automatic
+cleanup for process the interface requires the process to register its
+parameter requirements in the following way:
+
+To register the default pm_qos target for the specific parameter, the process
+must open one of /dev/[cpu_dma_latency, network_latency, network_throughput]
+
+As long as the device node is held open that process has a registered
+requirement on the parameter.  The name of the requirement is "process_<PID>"
+derived from the current->pid from within the open system call.
+
+To change the requested target value the process needs to write a s32 value to
+the open device node.  This translates to a pm_qos_update_requirement call.
+
+To remove the user mode request for a target value simply close the device
+node.
+
+
+
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 6f31f0a..24eac1b 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -22,6 +22,7 @@
 - dirty_background_ratio
 - dirty_expire_centisecs
 - dirty_writeback_centisecs
+- highmem_is_dirtyable   (only if CONFIG_HIGHMEM set)
 - max_map_count
 - min_free_kbytes
 - laptop_mode
@@ -40,9 +41,9 @@
 ==============================================================
 
 dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
-dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode,
-block_dump, swap_token_timeout, drop-caches,
-hugepages_treat_as_movable:
+dirty_writeback_centisecs, highmem_is_dirtyable,
+vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout,
+drop-caches, hugepages_treat_as_movable:
 
 See Documentation/filesystems/proc.txt
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 263ceae..4f3da8b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2690,6 +2690,16 @@
 L:	linux-scsi@vger.kernel.org
 S:	Maintained
 
+NETEFFECT IWARP RNIC DRIVER (IW_NES)
+P:	Faisal Latif
+M:	flatif@neteffect.com
+P:	Glenn Streiff
+M:	gstreiff@neteffect.com
+L:	general@lists.openfabrics.org
+W:	http://www.neteffect.com
+S:	Supported
+F:	drivers/infiniband/hw/nes/
+
 NETEM NETWORK EMULATOR
 P:	Stephen Hemminger
 M:	shemminger@linux-foundation.org
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index 468b76c..8ac0831 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -165,7 +165,7 @@
 	ret = (void *)__get_free_pages(gfp, get_order(size));
 	if (ret) {
 		memset(ret, 0, size);
-		*dma_handle = virt_to_bus(ret);
+		*dma_handle = virt_to_phys(ret);
 	}
 	return ret;
 }
@@ -184,7 +184,7 @@
 
 		BUG_ON(!sg_page(sg));
 		va = sg_virt(sg);
-		sg_dma_address(sg) = (dma_addr_t)virt_to_bus(va);
+		sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va);
 		sg_dma_len(sg) = sg->length;
 	}
 
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 2d00a08..26d3789 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -9,6 +9,7 @@
 #include <linux/bootmem.h>
 #include <linux/scatterlist.h>
 #include <linux/log2.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/io.h>
 #include <asm/hwrpb.h>
@@ -470,22 +471,29 @@
 #define SG_ENT_PHYS_ADDRESS(SG) __pa(SG_ENT_VIRT_ADDRESS(SG))
 
 static void
-sg_classify(struct scatterlist *sg, struct scatterlist *end, int virt_ok)
+sg_classify(struct device *dev, struct scatterlist *sg, struct scatterlist *end,
+	    int virt_ok)
 {
 	unsigned long next_paddr;
 	struct scatterlist *leader;
 	long leader_flag, leader_length;
+	unsigned int max_seg_size;
 
 	leader = sg;
 	leader_flag = 0;
 	leader_length = leader->length;
 	next_paddr = SG_ENT_PHYS_ADDRESS(leader) + leader_length;
 
+	/* we will not marge sg without device. */
+	max_seg_size = dev ? dma_get_max_seg_size(dev) : 0;
 	for (++sg; sg < end; ++sg) {
 		unsigned long addr, len;
 		addr = SG_ENT_PHYS_ADDRESS(sg);
 		len = sg->length;
 
+		if (leader_length + len > max_seg_size)
+			goto new_segment;
+
 		if (next_paddr == addr) {
 			sg->dma_address = -1;
 			leader_length += len;
@@ -494,6 +502,7 @@
 			leader_flag = 1;
 			leader_length += len;
 		} else {
+new_segment:
 			leader->dma_address = leader_flag;
 			leader->dma_length = leader_length;
 			leader = sg;
@@ -512,7 +521,7 @@
    in the blanks.  */
 
 static int
-sg_fill(struct scatterlist *leader, struct scatterlist *end,
+sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end,
 	struct scatterlist *out, struct pci_iommu_arena *arena,
 	dma_addr_t max_dma, int dac_allowed)
 {
@@ -562,8 +571,8 @@
 
 		/* Otherwise, break up the remaining virtually contiguous
 		   hunks into individual direct maps and retry.  */
-		sg_classify(leader, end, 0);
-		return sg_fill(leader, end, out, arena, max_dma, dac_allowed);
+		sg_classify(dev, leader, end, 0);
+		return sg_fill(dev, leader, end, out, arena, max_dma, dac_allowed);
 	}
 
 	out->dma_address = arena->dma_base + dma_ofs*PAGE_SIZE + paddr;
@@ -619,12 +628,15 @@
 	struct pci_iommu_arena *arena;
 	dma_addr_t max_dma;
 	int dac_allowed;
+	struct device *dev;
 
 	if (direction == PCI_DMA_NONE)
 		BUG();
 
 	dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0;
 
+	dev = pdev ? &pdev->dev : NULL;
+
 	/* Fast path single entry scatterlists.  */
 	if (nents == 1) {
 		sg->dma_length = sg->length;
@@ -638,7 +650,7 @@
 	end = sg + nents;
 
 	/* First, prepare information about the entries.  */
-	sg_classify(sg, end, alpha_mv.mv_pci_tbi != 0);
+	sg_classify(dev, sg, end, alpha_mv.mv_pci_tbi != 0);
 
 	/* Second, figure out where we're going to map things.  */
 	if (alpha_mv.mv_pci_tbi) {
@@ -658,7 +670,7 @@
 	for (out = sg; sg < end; ++sg) {
 		if ((int) sg->dma_address < 0)
 			continue;
-		if (sg_fill(sg, end, out, arena, max_dma, dac_allowed) < 0)
+		if (sg_fill(dev, sg, end, out, arena, max_dma, dac_allowed) < 0)
 			goto error;
 		out++;
 	}
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index bd5e68c..beff629 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -58,7 +58,6 @@
 #include <asm/system.h>
 #include <asm/hwrpb.h>
 #include <asm/dma.h>
-#include <asm/io.h>
 #include <asm/mmu_context.h>
 #include <asm/console.h>
 
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 79de99e3..ba914af 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -495,7 +495,7 @@
 	.quad sys_epoll_pwait
 	.quad sys_utimensat			/* 475 */
 	.quad sys_signalfd
-	.quad sys_timerfd
+	.quad sys_ni_syscall
 	.quad sys_eventfd
 
 	.size sys_call_table, . - sys_call_table
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4716370..e19e774 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -35,6 +35,11 @@
 	bool
 	default n
 
+config GENERIC_CLOCKEVENTS_BROADCAST
+	bool
+	depends on GENERIC_CLOCKEVENTS
+	default y if SMP && !LOCAL_TIMERS
+
 config MMU
 	bool
 	default y
@@ -187,6 +192,8 @@
 	bool "ARM Ltd. RealView family"
 	select ARM_AMBA
 	select ICST307
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
 	help
 	  This enables support for ARM Ltd RealView boards.
 
@@ -378,6 +385,7 @@
 	depends on MMU
 	select ARCH_MTD_XIP
 	select GENERIC_GPIO
+	select HAVE_GPIO_LIB
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	select TICK_ONESHOT
@@ -623,7 +631,7 @@
 
 config SMP
 	bool "Symmetric Multi-Processing (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && REALVIEW_MPCORE
+	depends on EXPERIMENTAL && REALVIEW_EB_ARM11MP
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
@@ -656,7 +664,7 @@
 
 config LOCAL_TIMERS
 	bool "Use local timer interrupts"
-	depends on SMP && REALVIEW_MPCORE
+	depends on SMP && REALVIEW_EB_ARM11MP
 	default y
 	help
 	  Enable support for local timers on SMP platforms, rather then the
@@ -912,6 +920,13 @@
 	  initially work for you.  It may help to enable device hotplugging
 	  support.
 
+config ATAGS_PROC
+	bool "Export atags in procfs"
+	default n
+	help
+	  Should the atags used to boot the kernel be exported in an "atags"
+	  file in procfs. Useful with kexec.
+
 endmenu
 
 if (ARCH_SA1100 || ARCH_INTEGRATOR || ARCH_OMAP || ARCH_IMX || ARCH_PXA)
@@ -1108,6 +1123,8 @@
 
 source "drivers/spi/Kconfig"
 
+source "drivers/gpio/Kconfig"
+
 source "drivers/w1/Kconfig"
 
 source "drivers/power/Kconfig"
diff --git a/arch/arm/common/time-acorn.c b/arch/arm/common/time-acorn.c
index 34038ec..d544da4 100644
--- a/arch/arm/common/time-acorn.c
+++ b/arch/arm/common/time-acorn.c
@@ -69,9 +69,7 @@
 static irqreturn_t
 ioc_timer_interrupt(int irq, void *dev_id)
 {
-	write_seqlock(&xtime_lock);
 	timer_tick();
-	write_sequnlock(&xtime_lock);
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig
index db850a5..efa0485 100644
--- a/arch/arm/configs/ixp4xx_defconfig
+++ b/arch/arm/configs/ixp4xx_defconfig
@@ -1,69 +1,96 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.15
-# Tue Jan  3 03:20:40 2006
+# Linux kernel version: 2.6.24
+# Sun Jan 27 07:33:38 2008
 #
 CONFIG_ARM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_MMU=y
-CONFIG_UID16=y
+# CONFIG_NO_IOPORT is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
-
-#
-# Code maturity level options
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_ZONE_DMA=y
+CONFIG_VECTORS_BASE=0xffff0000
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
 # General setup
 #
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_POSIX_MQUEUE is not set
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
-CONFIG_SYSCTL=y
+# CONFIG_TASKSTATS is not set
+# CONFIG_USER_NS is not set
+# CONFIG_PID_NS is not set
 # CONFIG_AUDIT is not set
-# CONFIG_HOTPLUG is not set
-CONFIG_KOBJECT_UEVENT=y
 # CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_FAIR_USER_SCHED=y
+# CONFIG_FAIR_CGROUP_SCHED is not set
+CONFIG_SYSFS_DEPRECATED=y
+# CONFIG_RELAY is not set
+CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
 CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
+CONFIG_ELF_CORE=y
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLUB_DEBUG=y
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
-
-#
-# Loadable module support
-#
 CONFIG_MODULES=y
 # CONFIG_MODULE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
 CONFIG_MODVERSIONS=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
-
-#
-# Block layer
-#
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
 
 #
 # IO Schedulers
@@ -81,28 +108,39 @@
 #
 # System Type
 #
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_AT91 is not set
 # CONFIG_ARCH_CLPS7500 is not set
 # CONFIG_ARCH_CLPS711X is not set
 # CONFIG_ARCH_CO285 is not set
 # CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
 # CONFIG_ARCH_FOOTBRIDGE is not set
-# CONFIG_ARCH_INTEGRATOR is not set
-# CONFIG_ARCH_IOP3XX is not set
-CONFIG_ARCH_IXP4XX=y
+# CONFIG_ARCH_NETX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_IMX is not set
+# CONFIG_ARCH_IOP13XX is not set
+# CONFIG_ARCH_IOP32X is not set
+# CONFIG_ARCH_IOP33X is not set
+# CONFIG_ARCH_IXP23XX is not set
 # CONFIG_ARCH_IXP2000 is not set
+CONFIG_ARCH_IXP4XX=y
 # CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_MXC is not set
+# CONFIG_ARCH_PNX4008 is not set
 # CONFIG_ARCH_PXA is not set
 # CONFIG_ARCH_RPC is not set
 # CONFIG_ARCH_SA1100 is not set
 # CONFIG_ARCH_S3C2410 is not set
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
-# CONFIG_ARCH_VERSATILE is not set
-# CONFIG_ARCH_REALVIEW is not set
-# CONFIG_ARCH_IMX is not set
-# CONFIG_ARCH_H720X is not set
-# CONFIG_ARCH_AAEC2000 is not set
 CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y
 
 #
@@ -112,8 +150,12 @@
 #
 # IXP4xx Platforms
 #
-CONFIG_ARCH_AVILA=y
+CONFIG_MACH_NSLU2=y
+CONFIG_MACH_AVILA=y
+CONFIG_MACH_LOFT=y
 CONFIG_ARCH_ADI_COYOTE=y
+CONFIG_MACH_GATEWAY7001=y
+CONFIG_MACH_WG302V2=y
 CONFIG_ARCH_IXDP425=y
 CONFIG_MACH_IXDPG425=y
 CONFIG_MACH_IXDP465=y
@@ -121,15 +163,27 @@
 CONFIG_ARCH_IXCDP1100=y
 CONFIG_ARCH_PRPMC1100=y
 CONFIG_MACH_NAS100D=y
+CONFIG_MACH_DSMG600=y
 CONFIG_ARCH_IXDP4XX=y
 CONFIG_CPU_IXP46X=y
 CONFIG_CPU_IXP43X=y
-# CONFIG_MACH_GTWX5715 is not set
+CONFIG_MACH_GTWX5715=y
 
 #
 # IXP4xx Options
 #
+CONFIG_DMABOUNCE=y
 # CONFIG_IXP4XX_INDIRECT_PCI is not set
+CONFIG_IXP4XX_QMGR=y
+CONFIG_IXP4XX_NPE=y
+
+#
+# Boot options
+#
+
+#
+# Power management
+#
 
 #
 # Processor Type
@@ -140,33 +194,40 @@
 CONFIG_CPU_ABRT_EV5T=y
 CONFIG_CPU_CACHE_VIVT=y
 CONFIG_CPU_TLB_V4WBI=y
+CONFIG_CPU_CP15=y
+CONFIG_CPU_CP15_MMU=y
 
 #
 # Processor Features
 #
 # CONFIG_ARM_THUMB is not set
 CONFIG_CPU_BIG_ENDIAN=y
+# CONFIG_CPU_DCACHE_DISABLE is not set
+# CONFIG_OUTER_CACHE is not set
+# CONFIG_IWMMXT is not set
 CONFIG_XSCALE_PMU=y
-CONFIG_DMABOUNCE=y
 
 #
 # Bus support
 #
-CONFIG_ISA_DMA_API=y
 CONFIG_PCI=y
-CONFIG_PCI_LEGACY_PROC=y
+CONFIG_PCI_SYSCALL=y
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+CONFIG_PCI_LEGACY=y
 # CONFIG_PCI_DEBUG is not set
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
 # CONFIG_PCCARD is not set
 
 #
 # Kernel Features
 #
+# CONFIG_TICK_ONESHOT is not set
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 # CONFIG_PREEMPT is not set
-# CONFIG_NO_IDLE_HZ is not set
+CONFIG_HZ=100
+CONFIG_AEABI=y
+CONFIG_OABI_COMPAT=y
 # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_FLATMEM_MANUAL=y
@@ -175,7 +236,12 @@
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
 CONFIG_SPLIT_PTLOCK_CPUS=4096
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_BOUNCE=y
+CONFIG_VIRT_TO_BUS=y
 CONFIG_ALIGNMENT_TRAP=y
 
 #
@@ -185,6 +251,7 @@
 CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_CMDLINE="console=ttyS0,115200 ip=bootp root=/dev/nfs"
 # CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
 
 #
 # Floating point emulation
@@ -203,13 +270,12 @@
 CONFIG_BINFMT_ELF=y
 # CONFIG_BINFMT_AOUT is not set
 # CONFIG_BINFMT_MISC is not set
-# CONFIG_ARTHUR is not set
 
 #
 # Power management options
 #
 # CONFIG_PM is not set
-# CONFIG_APM is not set
+CONFIG_SUSPEND_UP_POSSIBLE=y
 
 #
 # Networking
@@ -219,11 +285,13 @@
 #
 # Networking options
 #
-CONFIG_PACKET=m
+CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
 CONFIG_XFRM=y
 # CONFIG_XFRM_USER is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+# CONFIG_XFRM_MIGRATE is not set
 # CONFIG_NET_KEY is not set
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
@@ -232,9 +300,7 @@
 # CONFIG_IP_FIB_TRIE is not set
 CONFIG_IP_FIB_HASH=y
 CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_FWMARK=y
 CONFIG_IP_ROUTE_MULTIPATH=y
-# CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set
 CONFIG_IP_ROUTE_VERBOSE=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -251,15 +317,18 @@
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
-CONFIG_INET_TUNNEL=m
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
-
-#
-# IP: Virtual Server Configuration
-#
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
 CONFIG_IP_VS=m
 CONFIG_IP_VS_DEBUG=y
 CONFIG_IP_VS_TAB_BITS=12
@@ -290,6 +359,9 @@
 # IPVS application helper
 #
 # CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_BRIDGE_NETFILTER=y
@@ -298,70 +370,57 @@
 # Core Netfilter Configuration
 #
 # CONFIG_NETFILTER_NETLINK is not set
+# CONFIG_NF_CONNTRACK_ENABLED is not set
+# CONFIG_NF_CONNTRACK is not set
+CONFIG_NETFILTER_XTABLES=m
+# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set
+# CONFIG_NETFILTER_XT_TARGET_DSCP is not set
+# CONFIG_NETFILTER_XT_TARGET_MARK is not set
+# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set
+# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set
+# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set
+# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set
+# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
+# CONFIG_NETFILTER_XT_MATCH_DSCP is not set
+# CONFIG_NETFILTER_XT_MATCH_ESP is not set
+# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set
+# CONFIG_NETFILTER_XT_MATCH_LIMIT is not set
+# CONFIG_NETFILTER_XT_MATCH_MAC is not set
+# CONFIG_NETFILTER_XT_MATCH_MARK is not set
+# CONFIG_NETFILTER_XT_MATCH_POLICY is not set
+# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set
+# CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set
+# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
+# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set
+# CONFIG_NETFILTER_XT_MATCH_REALM is not set
+# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
+# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set
+# CONFIG_NETFILTER_XT_MATCH_STRING is not set
+# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set
+# CONFIG_NETFILTER_XT_MATCH_TIME is not set
+# CONFIG_NETFILTER_XT_MATCH_U32 is not set
+# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set
 
 #
 # IP: Netfilter Configuration
 #
-CONFIG_IP_NF_CONNTRACK=m
-# CONFIG_IP_NF_CT_ACCT is not set
-# CONFIG_IP_NF_CONNTRACK_MARK is not set
-# CONFIG_IP_NF_CONNTRACK_EVENTS is not set
-# CONFIG_IP_NF_CT_PROTO_SCTP is not set
-CONFIG_IP_NF_FTP=m
-CONFIG_IP_NF_IRC=m
-# CONFIG_IP_NF_NETBIOS_NS is not set
-# CONFIG_IP_NF_TFTP is not set
-# CONFIG_IP_NF_AMANDA is not set
-# CONFIG_IP_NF_PPTP is not set
 CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_LIMIT=m
 # CONFIG_IP_NF_MATCH_IPRANGE is not set
-CONFIG_IP_NF_MATCH_MAC=m
-# CONFIG_IP_NF_MATCH_PKTTYPE is not set
-CONFIG_IP_NF_MATCH_MARK=m
-CONFIG_IP_NF_MATCH_MULTIPORT=m
 CONFIG_IP_NF_MATCH_TOS=m
 # CONFIG_IP_NF_MATCH_RECENT is not set
 # CONFIG_IP_NF_MATCH_ECN is not set
-# CONFIG_IP_NF_MATCH_DSCP is not set
-CONFIG_IP_NF_MATCH_AH_ESP=m
-CONFIG_IP_NF_MATCH_LENGTH=m
+# CONFIG_IP_NF_MATCH_AH is not set
 CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_MATCH_TCPMSS=m
-# CONFIG_IP_NF_MATCH_HELPER is not set
-CONFIG_IP_NF_MATCH_STATE=m
-# CONFIG_IP_NF_MATCH_CONNTRACK is not set
 CONFIG_IP_NF_MATCH_OWNER=m
-# CONFIG_IP_NF_MATCH_PHYSDEV is not set
 # CONFIG_IP_NF_MATCH_ADDRTYPE is not set
-# CONFIG_IP_NF_MATCH_REALM is not set
-# CONFIG_IP_NF_MATCH_SCTP is not set
-# CONFIG_IP_NF_MATCH_DCCP is not set
-# CONFIG_IP_NF_MATCH_COMMENT is not set
-# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
-# CONFIG_IP_NF_MATCH_STRING is not set
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_TARGET_LOG=m
 CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_IP_NF_TARGET_TCPMSS=m
-# CONFIG_IP_NF_TARGET_NFQUEUE is not set
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_NAT_NEEDED=y
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-# CONFIG_IP_NF_TARGET_NETMAP is not set
-# CONFIG_IP_NF_TARGET_SAME is not set
-CONFIG_IP_NF_NAT_SNMP_BASIC=m
-CONFIG_IP_NF_NAT_IRC=m
-CONFIG_IP_NF_NAT_FTP=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_TOS=m
 # CONFIG_IP_NF_TARGET_ECN is not set
-# CONFIG_IP_NF_TARGET_DSCP is not set
-CONFIG_IP_NF_TARGET_MARK=m
-# CONFIG_IP_NF_TARGET_CLASSIFY is not set
 # CONFIG_IP_NF_TARGET_TTL is not set
 # CONFIG_IP_NF_RAW is not set
 CONFIG_IP_NF_ARPTABLES=m
@@ -372,16 +431,9 @@
 # Bridge: Netfilter Configuration
 #
 # CONFIG_BRIDGE_NF_EBTABLES is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
 # CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
 # CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
 CONFIG_ATM=y
 CONFIG_ATM_CLIP=y
 # CONFIG_ATM_CLIP_NO_ICMP is not set
@@ -397,25 +449,17 @@
 CONFIG_IPX=m
 # CONFIG_IPX_INTERN is not set
 CONFIG_ATALK=m
-CONFIG_DEV_APPLETALK=y
+CONFIG_DEV_APPLETALK=m
 CONFIG_IPDDP=m
 CONFIG_IPDDP_ENCAP=y
 CONFIG_IPDDP_DECAP=y
 CONFIG_X25=m
 CONFIG_LAPB=m
-# CONFIG_NET_DIVERT is not set
 CONFIG_ECONET=m
 CONFIG_ECONET_AUNUDP=y
 CONFIG_ECONET_NATIVE=y
 CONFIG_WAN_ROUTER=m
-
-#
-# QoS and/or fair queueing
-#
 CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CLK_JIFFIES=y
-# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
-# CONFIG_NET_SCH_CLK_CPU is not set
 
 #
 # Queueing/Scheduling
@@ -425,6 +469,7 @@
 # CONFIG_NET_SCH_HFSC is not set
 # CONFIG_NET_SCH_ATM is not set
 CONFIG_NET_SCH_PRIO=m
+# CONFIG_NET_SCH_RR is not set
 CONFIG_NET_SCH_RED=m
 CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_TEQL=m
@@ -449,10 +494,17 @@
 CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 # CONFIG_NET_EMATCH is not set
-# CONFIG_NET_CLS_ACT is not set
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=y
+# CONFIG_NET_ACT_GACT is not set
+# CONFIG_NET_ACT_MIRRED is not set
+# CONFIG_NET_ACT_IPT is not set
+# CONFIG_NET_ACT_NAT is not set
+# CONFIG_NET_ACT_PEDIT is not set
+# CONFIG_NET_ACT_SIMP is not set
 CONFIG_NET_CLS_POLICE=y
 # CONFIG_NET_CLS_IND is not set
-CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_SCH_FIFO=y
 
 #
 # Network testing
@@ -461,7 +513,18 @@
 # CONFIG_HAMRADIO is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+CONFIG_FIB_RULES=y
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
 # CONFIG_IEEE80211 is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
 
 #
 # Device Drivers
@@ -470,19 +533,14 @@
 #
 # Generic Driver Options
 #
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
+CONFIG_FW_LOADER=y
 # CONFIG_DEBUG_DRIVER is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
 # CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
 CONFIG_MTD=y
 # CONFIG_MTD_DEBUG is not set
 # CONFIG_MTD_CONCAT is not set
@@ -498,11 +556,14 @@
 # User Modules And Translation Layers
 #
 CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
 CONFIG_MTD_BLOCK=y
 # CONFIG_FTL is not set
 # CONFIG_NFTL is not set
 # CONFIG_INFTL is not set
 # CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
 
 #
 # RAM/ROM/Flash chip drivers
@@ -528,7 +589,6 @@
 # CONFIG_MTD_RAM is not set
 # CONFIG_MTD_ROM is not set
 # CONFIG_MTD_ABSENT is not set
-# CONFIG_MTD_XIP is not set
 
 #
 # Mapping drivers for chip access
@@ -538,6 +598,7 @@
 # CONFIG_MTD_ARM_INTEGRATOR is not set
 CONFIG_MTD_IXP4XX=y
 # CONFIG_MTD_PCI is not set
+# CONFIG_MTD_INTEL_VR_NOR is not set
 # CONFIG_MTD_PLATRAM is not set
 
 #
@@ -547,7 +608,6 @@
 # CONFIG_MTD_SLRAM is not set
 # CONFIG_MTD_PHRAM is not set
 # CONFIG_MTD_MTDRAM is not set
-# CONFIG_MTD_BLKMTD is not set
 # CONFIG_MTD_BLOCK2MTD is not set
 
 #
@@ -556,33 +616,24 @@
 # CONFIG_MTD_DOC2000 is not set
 # CONFIG_MTD_DOC2001 is not set
 # CONFIG_MTD_DOC2001PLUS is not set
-
-#
-# NAND Flash Device Drivers
-#
 CONFIG_MTD_NAND=m
 # CONFIG_MTD_NAND_VERIFY_WRITE is not set
+# CONFIG_MTD_NAND_ECC_SMC is not set
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
 CONFIG_MTD_NAND_IDS=m
 # CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_CAFE is not set
 # CONFIG_MTD_NAND_NANDSIM is not set
-
-#
-# OneNAND Flash Device Drivers
-#
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_ALAUDA is not set
 # CONFIG_MTD_ONENAND is not set
 
 #
-# Parallel port support
+# UBI - Unsorted block images
 #
+# CONFIG_MTD_UBI is not set
 # CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-
-#
-# Block devices
-#
+CONFIG_BLK_DEV=y
 # CONFIG_BLK_CPQ_DA is not set
 # CONFIG_BLK_CPQ_CISS_DA is not set
 # CONFIG_BLK_DEV_DAC960 is not set
@@ -592,17 +643,20 @@
 # CONFIG_BLK_DEV_CRYPTOLOOP is not set
 # CONFIG_BLK_DEV_NBD is not set
 # CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_ATA_OVER_ETH is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
+CONFIG_MISC_DEVICES=y
+# CONFIG_PHANTOM is not set
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_SGI_IOC4 is not set
+# CONFIG_TIFM_CORE is not set
 CONFIG_IDE=y
+CONFIG_IDE_MAX_HWIFS=4
 CONFIG_BLK_DEV_IDE=y
 
 #
@@ -614,24 +668,28 @@
 # CONFIG_BLK_DEV_IDECD is not set
 # CONFIG_BLK_DEV_IDETAPE is not set
 # CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_BLK_DEV_IDESCSI is not set
 # CONFIG_IDE_TASK_IOCTL is not set
+CONFIG_IDE_PROC_FS=y
 
 #
 # IDE chipset support/bugfixes
 #
 CONFIG_IDE_GENERIC=y
+# CONFIG_BLK_DEV_PLATFORM is not set
+
+#
+# PCI IDE chipsets support
+#
 CONFIG_BLK_DEV_IDEPCI=y
 # CONFIG_IDEPCI_SHARE_IRQ is not set
+CONFIG_IDEPCI_PCIBUS_ORDER=y
 # CONFIG_BLK_DEV_OFFBOARD is not set
 # CONFIG_BLK_DEV_GENERIC is not set
 # CONFIG_BLK_DEV_OPTI621 is not set
-# CONFIG_BLK_DEV_SL82C105 is not set
 CONFIG_BLK_DEV_IDEDMA_PCI=y
-# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-# CONFIG_IDEDMA_PCI_AUTO is not set
 # CONFIG_BLK_DEV_AEC62XX is not set
 # CONFIG_BLK_DEV_ALI15X3 is not set
-# CONFIG_BLK_DEV_AMD74XX is not set
 CONFIG_BLK_DEV_CMD64X=y
 # CONFIG_BLK_DEV_TRIFLEX is not set
 # CONFIG_BLK_DEV_CY82C693 is not set
@@ -639,93 +697,163 @@
 # CONFIG_BLK_DEV_CS5530 is not set
 # CONFIG_BLK_DEV_HPT34X is not set
 CONFIG_BLK_DEV_HPT366=y
+# CONFIG_BLK_DEV_JMICRON is not set
 # CONFIG_BLK_DEV_SC1200 is not set
 # CONFIG_BLK_DEV_PIIX is not set
+# CONFIG_BLK_DEV_IT8213 is not set
 # CONFIG_BLK_DEV_IT821X is not set
 # CONFIG_BLK_DEV_NS87415 is not set
 # CONFIG_BLK_DEV_PDC202XX_OLD is not set
 CONFIG_BLK_DEV_PDC202XX_NEW=y
-# CONFIG_PDC202XX_FORCE is not set
 # CONFIG_BLK_DEV_SVWKS is not set
 # CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SL82C105 is not set
 # CONFIG_BLK_DEV_SLC90E66 is not set
 # CONFIG_BLK_DEV_TRM290 is not set
 # CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_BLK_DEV_TC86C001 is not set
 # CONFIG_IDE_ARM is not set
 CONFIG_BLK_DEV_IDEDMA=y
-# CONFIG_IDEDMA_IVB is not set
-# CONFIG_IDEDMA_AUTO is not set
+CONFIG_IDE_ARCH_OBSOLETE_INIT=y
 # CONFIG_BLK_DEV_HD is not set
 
 #
 # SCSI device support
 #
 # CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_SCSI_PROC_FS is not set
 
 #
-# Multi-device support (RAID and LVM)
+# SCSI support type (disk, tape, CD-ROM)
 #
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+# CONFIG_ATA_NONSTANDARD is not set
+# CONFIG_SATA_AHCI is not set
+# CONFIG_SATA_SVW is not set
+# CONFIG_ATA_PIIX is not set
+# CONFIG_SATA_MV is not set
+# CONFIG_SATA_NV is not set
+# CONFIG_PDC_ADMA is not set
+# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_PROMISE is not set
+# CONFIG_SATA_SX4 is not set
+# CONFIG_SATA_SIL is not set
+# CONFIG_SATA_SIL24 is not set
+# CONFIG_SATA_SIS is not set
+# CONFIG_SATA_ULI is not set
+# CONFIG_SATA_VIA is not set
+# CONFIG_SATA_VITESSE is not set
+# CONFIG_SATA_INIC162X is not set
+# CONFIG_PATA_ALI is not set
+# CONFIG_PATA_AMD is not set
+CONFIG_PATA_ARTOP=y
+# CONFIG_PATA_ATIIXP is not set
+# CONFIG_PATA_CMD640_PCI is not set
+# CONFIG_PATA_CMD64X is not set
+# CONFIG_PATA_CS5520 is not set
+# CONFIG_PATA_CS5530 is not set
+# CONFIG_PATA_CYPRESS is not set
+# CONFIG_PATA_EFAR is not set
+# CONFIG_ATA_GENERIC is not set
+# CONFIG_PATA_HPT366 is not set
+# CONFIG_PATA_HPT37X is not set
+# CONFIG_PATA_HPT3X2N is not set
+# CONFIG_PATA_HPT3X3 is not set
+# CONFIG_PATA_IT821X is not set
+# CONFIG_PATA_IT8213 is not set
+# CONFIG_PATA_JMICRON is not set
+# CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_MARVELL is not set
+# CONFIG_PATA_MPIIX is not set
+# CONFIG_PATA_OLDPIIX is not set
+# CONFIG_PATA_NETCELL is not set
+# CONFIG_PATA_NS87410 is not set
+# CONFIG_PATA_NS87415 is not set
+# CONFIG_PATA_OPTI is not set
+# CONFIG_PATA_OPTIDMA is not set
+# CONFIG_PATA_PDC_OLD is not set
+# CONFIG_PATA_RADISYS is not set
+# CONFIG_PATA_RZ1000 is not set
+# CONFIG_PATA_SC1200 is not set
+# CONFIG_PATA_SERVERWORKS is not set
+# CONFIG_PATA_PDC2027X is not set
+# CONFIG_PATA_SIL680 is not set
+# CONFIG_PATA_SIS is not set
+# CONFIG_PATA_VIA is not set
+# CONFIG_PATA_WINBOND is not set
+# CONFIG_PATA_PLATFORM is not set
+CONFIG_PATA_IXP4XX_CF=y
 # CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
 # CONFIG_FUSION is not set
 
 #
 # IEEE 1394 (FireWire) support
 #
+# CONFIG_FIREWIRE is not set
 # CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
 # CONFIG_I2O is not set
-
-#
-# Network device support
-#
 CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
+# CONFIG_IFB is not set
 CONFIG_DUMMY=y
 # CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
-
-#
-# ARCnet devices
-#
+# CONFIG_VETH is not set
 # CONFIG_ARCNET is not set
-
-#
-# PHY device support
-#
 # CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
+CONFIG_IXP4XX_ETH=y
+# CONFIG_AX88796 is not set
 # CONFIG_HAPPYMEAL is not set
 # CONFIG_SUNGEM is not set
 # CONFIG_CASSINI is not set
 # CONFIG_NET_VENDOR_3COM is not set
 # CONFIG_SMC91X is not set
 # CONFIG_DM9000 is not set
-
-#
-# Tulip family network device support
-#
 # CONFIG_NET_TULIP is not set
 # CONFIG_HP100 is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
 CONFIG_NET_PCI=y
 # CONFIG_PCNET32 is not set
 # CONFIG_AMD8111_ETH is not set
 # CONFIG_ADAPTEC_STARFIRE is not set
 # CONFIG_B44 is not set
 # CONFIG_FORCEDETH is not set
-# CONFIG_DGRS is not set
 CONFIG_EEPRO100=y
 # CONFIG_E100 is not set
 # CONFIG_FEALNX is not set
@@ -738,93 +866,76 @@
 # CONFIG_SUNDANCE is not set
 # CONFIG_TLAN is not set
 # CONFIG_VIA_RHINE is not set
-
-#
-# Ethernet (1000 Mbit)
-#
+# CONFIG_SC92031 is not set
+CONFIG_NETDEV_1000=y
 # CONFIG_ACENIC is not set
 # CONFIG_DL2K is not set
 # CONFIG_E1000 is not set
+# CONFIG_E1000E is not set
+# CONFIG_IP1000 is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
 # CONFIG_R8169 is not set
 # CONFIG_SIS190 is not set
 # CONFIG_SKGE is not set
+# CONFIG_SKY2 is not set
 # CONFIG_SK98LIN is not set
 # CONFIG_VIA_VELOCITY is not set
 # CONFIG_TIGON3 is not set
 # CONFIG_BNX2 is not set
-
-#
-# Ethernet (10000 Mbit)
-#
+# CONFIG_QLA3XXX is not set
+# CONFIG_ATL1 is not set
+CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
+# CONFIG_CHELSIO_T3 is not set
+# CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
-
-#
-# Token Ring devices
-#
+# CONFIG_MYRI10GE is not set
+# CONFIG_NETXEN_NIC is not set
+# CONFIG_NIU is not set
+# CONFIG_MLX4_CORE is not set
+# CONFIG_TEHUTI is not set
 # CONFIG_TR is not set
 
 #
-# Wireless LAN (non-hamradio)
+# Wireless LAN
 #
-CONFIG_NET_RADIO=y
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
 
 #
-# Obsolete Wireless cards support (pre-802.11)
+# USB Network Adapters
 #
-# CONFIG_STRIP is not set
-
-#
-# Wireless 802.11b ISA/PCI cards support
-#
-# CONFIG_AIRO is not set
-CONFIG_HERMES=y
-# CONFIG_PLX_HERMES is not set
-# CONFIG_TMD_HERMES is not set
-# CONFIG_NORTEL_HERMES is not set
-CONFIG_PCI_HERMES=y
-# CONFIG_ATMEL is not set
-
-#
-# Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support
-#
-# CONFIG_PRISM54 is not set
-# CONFIG_HOSTAP is not set
-CONFIG_NET_WIRELESS=y
-
-#
-# Wan interfaces
-#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
 CONFIG_WAN=y
-# CONFIG_DSCC4 is not set
 # CONFIG_LANMEDIA is not set
-# CONFIG_SYNCLINK_SYNCPPP is not set
 CONFIG_HDLC=m
-CONFIG_HDLC_RAW=y
+CONFIG_HDLC_RAW=m
 # CONFIG_HDLC_RAW_ETH is not set
-CONFIG_HDLC_CISCO=y
-CONFIG_HDLC_FR=y
-CONFIG_HDLC_PPP=y
-CONFIG_HDLC_X25=y
+CONFIG_HDLC_CISCO=m
+CONFIG_HDLC_FR=m
+CONFIG_HDLC_PPP=m
+CONFIG_HDLC_X25=m
 # CONFIG_PCI200SYN is not set
 # CONFIG_WANXL is not set
 # CONFIG_PC300 is not set
+# CONFIG_PC300TOO is not set
 # CONFIG_FARSYNC is not set
+# CONFIG_DSCC4 is not set
+# CONFIG_IXP4XX_HSS is not set
 CONFIG_DLCI=m
-CONFIG_DLCI_COUNT=24
 CONFIG_DLCI_MAX=8
-CONFIG_WAN_ROUTER_DRIVERS=y
+CONFIG_WAN_ROUTER_DRIVERS=m
 # CONFIG_CYCLADES_SYNC is not set
 # CONFIG_LAPBETHER is not set
 # CONFIG_X25_ASY is not set
-
-#
-# ATM drivers
-#
+CONFIG_ATM_DRIVERS=y
 # CONFIG_ATM_DUMMY is not set
 CONFIG_ATM_TCP=m
 # CONFIG_ATM_LANAI is not set
@@ -842,20 +953,19 @@
 # CONFIG_HIPPI is not set
 # CONFIG_PPP is not set
 # CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
 # CONFIG_SHAPER is not set
 # CONFIG_NETCONSOLE is not set
 # CONFIG_NETPOLL is not set
 # CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
 # CONFIG_ISDN is not set
 
 #
 # Input device support
 #
 CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
 
 #
 # Userland interfaces
@@ -865,7 +975,6 @@
 CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
 # CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
 # CONFIG_INPUT_EVDEV is not set
 # CONFIG_INPUT_EVBUG is not set
 
@@ -875,8 +984,16 @@
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
 # CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_IXP4XX_BEEPER=y
+# CONFIG_INPUT_ATI_REMOTE is not set
+# CONFIG_INPUT_ATI_REMOTE2 is not set
+# CONFIG_INPUT_KEYSPAN_REMOTE is not set
+# CONFIG_INPUT_POWERMATE is not set
+# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INPUT_UINPUT is not set
 
 #
 # Hardware I/O ports
@@ -895,7 +1012,9 @@
 #
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_PCI=y
 CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
 # CONFIG_SERIAL_8250_EXTENDED is not set
 
 #
@@ -907,51 +1026,17 @@
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
 # CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-CONFIG_WATCHDOG=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-
-#
-# Watchdog Device Drivers
-#
-# CONFIG_SOFT_WATCHDOG is not set
-CONFIG_IXP4XX_WATCHDOG=y
-
-#
-# PCI-based Watchdog Cards
-#
-# CONFIG_PCIPCWATCHDOG is not set
-# CONFIG_WDTPCI is not set
+CONFIG_HW_RANDOM=m
+CONFIG_HW_RANDOM_IXP4XX=m
 # CONFIG_NVRAM is not set
-# CONFIG_RTC is not set
-# CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_DRM is not set
 # CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
 # CONFIG_TCG_TPM is not set
-# CONFIG_TELCLOCK is not set
-
-#
-# I2C support
-#
+CONFIG_DEVPORT=y
 CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
 CONFIG_I2C_CHARDEV=y
 
 #
@@ -969,57 +1054,68 @@
 # CONFIG_I2C_ALI15X3 is not set
 # CONFIG_I2C_AMD756 is not set
 # CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_GPIO is not set
 # CONFIG_I2C_I801 is not set
 # CONFIG_I2C_I810 is not set
 # CONFIG_I2C_PIIX4 is not set
 # CONFIG_I2C_IOP3XX is not set
 CONFIG_I2C_IXP4XX=y
 # CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_OCORES is not set
 # CONFIG_I2C_PARPORT_LIGHT is not set
 # CONFIG_I2C_PROSAVAGE is not set
 # CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_SCx200_ACB is not set
+# CONFIG_I2C_SIMTEC is not set
 # CONFIG_I2C_SIS5595 is not set
 # CONFIG_I2C_SIS630 is not set
 # CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_TAOS_EVM is not set
 # CONFIG_I2C_STUB is not set
+# CONFIG_I2C_TINY_USB is not set
 # CONFIG_I2C_VIA is not set
 # CONFIG_I2C_VIAPRO is not set
 # CONFIG_I2C_VOODOO3 is not set
-# CONFIG_I2C_PCA_ISA is not set
 
 #
 # Miscellaneous I2C Chip support
 #
 # CONFIG_SENSORS_DS1337 is not set
 # CONFIG_SENSORS_DS1374 is not set
+# CONFIG_DS1682 is not set
 CONFIG_SENSORS_EEPROM=y
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_SENSORS_PCA9539 is not set
 # CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_SENSORS_RTC8564 is not set
 # CONFIG_SENSORS_MAX6875 is not set
-# CONFIG_RTC_X1205_I2C is not set
+# CONFIG_SENSORS_TSL2550 is not set
 # CONFIG_I2C_DEBUG_CORE is not set
 # CONFIG_I2C_DEBUG_ALGO is not set
 # CONFIG_I2C_DEBUG_BUS is not set
 # CONFIG_I2C_DEBUG_CHIP is not set
 
 #
-# Hardware Monitoring support
+# SPI support
 #
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_AD7418 is not set
 # CONFIG_SENSORS_ADM1021 is not set
 # CONFIG_SENSORS_ADM1025 is not set
 # CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1029 is not set
 # CONFIG_SENSORS_ADM1031 is not set
 # CONFIG_SENSORS_ADM9240 is not set
-# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ADT7470 is not set
 # CONFIG_SENSORS_ATXP1 is not set
 # CONFIG_SENSORS_DS1621 is not set
-# CONFIG_SENSORS_FSCHER is not set
-# CONFIG_SENSORS_FSCPOS is not set
+# CONFIG_SENSORS_I5K_AMB is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_F75375S is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_IT87 is not set
@@ -1033,67 +1129,268 @@
 # CONFIG_SENSORS_LM87 is not set
 # CONFIG_SENSORS_LM90 is not set
 # CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_LM93 is not set
 # CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_MAX6650 is not set
 # CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_PC87427 is not set
 # CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_DME1737 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
 # CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_THMC50 is not set
 # CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT1211 is not set
+# CONFIG_SENSORS_VT8231 is not set
 # CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
 # CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83793 is not set
 # CONFIG_SENSORS_W83L785TS is not set
 # CONFIG_SENSORS_W83627HF is not set
 # CONFIG_SENSORS_W83627EHF is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
 
 #
-# Misc devices
+# Watchdog Device Drivers
 #
+# CONFIG_SOFT_WATCHDOG is not set
+CONFIG_IXP4XX_WATCHDOG=y
 
 #
-# Multimedia Capabilities Port drivers
+# PCI-based Watchdog Cards
 #
+# CONFIG_PCIPCWATCHDOG is not set
+# CONFIG_WDTPCI is not set
+
+#
+# USB-based Watchdog Cards
+#
+# CONFIG_USBPCWATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
 
 #
 # Multimedia devices
 #
 # CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
+# CONFIG_DVB_CORE is not set
+CONFIG_DAB=y
+# CONFIG_USB_DABUSB is not set
 
 #
 # Graphics support
 #
+# CONFIG_DRM is not set
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
 # CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
 
 #
 # Sound
 #
 # CONFIG_SOUND is not set
+CONFIG_HID_SUPPORT=y
+CONFIG_HID=y
+# CONFIG_HID_DEBUG is not set
+# CONFIG_HIDRAW is not set
 
 #
-# USB support
+# USB Input Devices
 #
+CONFIG_USB_HID=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
-# CONFIG_USB is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+# CONFIG_USB_ISP116X_HCD is not set
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
+# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=y
+# CONFIG_USB_SL811_HCD is not set
+# CONFIG_USB_R8A66597_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
 
 #
 # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
 #
 
 #
+# may also be needed; see USB_STORAGE Help for more information
+#
+CONFIG_USB_STORAGE=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_USBAT is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_STORAGE_KARMA is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+# CONFIG_USB_MON is not set
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_ADUTUX is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_BERRY_CHARGE is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYPRESS_CY7C63 is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGET is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_FTDI_ELAN is not set
+# CONFIG_USB_APPLEDISPLAY is not set
+# CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TRANCEVIBRATOR is not set
+# CONFIG_USB_IOWARRIOR is not set
+# CONFIG_USB_TEST is not set
+
+#
+# USB DSL modem support
+#
+# CONFIG_USB_ATM is not set
+
+#
 # USB Gadget Support
 #
 # CONFIG_USB_GADGET is not set
+# CONFIG_MMC is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
 
 #
-# MMC/SD Card support
+# LED drivers
 #
-# CONFIG_MMC is not set
+# CONFIG_LEDS_IXP4XX is not set
+CONFIG_LEDS_GPIO=y
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_IDE_DISK=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+CONFIG_RTC_DRV_X1205=y
+CONFIG_RTC_DRV_PCF8563=y
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+
+#
+# SPI RTC drivers
+#
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_CMOS is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
 
 #
 # File systems
@@ -1107,16 +1404,19 @@
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 # CONFIG_EXT3_FS_SECURITY is not set
+# CONFIG_EXT4DEV_FS is not set
 CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
 CONFIG_FS_MBCACHE=y
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
 CONFIG_FS_POSIX_ACL=y
 # CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_ROMFS_FS is not set
 CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
 # CONFIG_QUOTA is not set
 CONFIG_DNOTIFY=y
 # CONFIG_AUTOFS_FS is not set
@@ -1140,11 +1440,12 @@
 # Pseudo filesystems
 #
 CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
 # CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
+# CONFIG_CONFIGFS_FS is not set
 
 #
 # Miscellaneous filesystems
@@ -1156,13 +1457,15 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-# CONFIG_JFFS_FS is not set
 CONFIG_JFFS2_FS=y
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
 # CONFIG_JFFS2_SUMMARY is not set
+# CONFIG_JFFS2_FS_XATTR is not set
 # CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
 CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
 # CONFIG_CRAMFS is not set
@@ -1171,10 +1474,7 @@
 # CONFIG_QNX4FS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
+CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 # CONFIG_NFS_V3_ACL is not set
@@ -1186,6 +1486,7 @@
 CONFIG_LOCKD_V4=y
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
+# CONFIG_SUNRPC_BIND34 is not set
 # CONFIG_RPCSEC_GSS_KRB5 is not set
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -1193,7 +1494,6 @@
 # CONFIG_NCP_FS is not set
 # CONFIG_CODA_FS is not set
 # CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
 
 #
 # Partition Types
@@ -1213,37 +1513,53 @@
 # CONFIG_SGI_PARTITION is not set
 # CONFIG_ULTRIX_PARTITION is not set
 # CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
 # CONFIG_EFI_PARTITION is not set
-
-#
-# Native Language Support
-#
+# CONFIG_SYSV68_PARTITION is not set
 # CONFIG_NLS is not set
-
-#
-# Profiling support
-#
+# CONFIG_DLM is not set
+CONFIG_INSTRUMENTATION=y
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 
 #
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
-CONFIG_DEBUG_KERNEL=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
 CONFIG_DETECT_SOFTLOCKUP=y
+CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_DEBUG_KOBJECT is not set
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_FS is not set
 # CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
 CONFIG_FRAME_POINTER=y
+CONFIG_FORCED_INLINING=y
+# CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_SAMPLES is not set
 # CONFIG_DEBUG_USER is not set
 CONFIG_DEBUG_ERRORS=y
 CONFIG_DEBUG_LL=y
@@ -1254,22 +1570,22 @@
 #
 # CONFIG_KEYS is not set
 # CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
 
 #
-# Hardware crypto devices
-#
-
-#
 # Library routines
 #
+CONFIG_BITREVERSE=y
 # CONFIG_CRC_CCITT is not set
 # CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
 CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=y
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index faa7619..00d44c6 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -20,6 +20,7 @@
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o kprobes-decode.o
+obj-$(CONFIG_ATAGS_PROC)	+= atags.o
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 
 obj-$(CONFIG_CRUNCH)		+= crunch.o crunch-bits.o
diff --git a/arch/arm/kernel/atags.c b/arch/arm/kernel/atags.c
new file mode 100644
index 0000000..e2e934c
--- /dev/null
+++ b/arch/arm/kernel/atags.c
@@ -0,0 +1,86 @@
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/proc_fs.h>
+#include <asm/setup.h>
+#include <asm/types.h>
+#include <asm/page.h>
+
+struct buffer {
+	size_t size;
+	char *data;
+};
+static struct buffer tags_buffer;
+
+static int
+read_buffer(char* page, char** start, off_t off, int count,
+	int* eof, void* data)
+{
+	struct buffer *buffer = (struct buffer *)data;
+
+	if (off >= buffer->size) {
+		*eof = 1;
+		return 0;
+	}
+
+	count = min((int) (buffer->size - off), count);
+
+	memcpy(page, &buffer->data[off], count);
+
+	return count;
+}
+
+
+static int
+create_proc_entries(void)
+{
+	struct proc_dir_entry* tags_entry;
+
+	tags_entry = create_proc_read_entry("atags", 0400, &proc_root, read_buffer, &tags_buffer);
+	if (!tags_entry)
+		return -ENOMEM;
+
+	return 0;
+}
+
+
+static char __initdata atags_copy_buf[KEXEC_BOOT_PARAMS_SIZE];
+static char __initdata *atags_copy;
+
+void __init save_atags(const struct tag *tags)
+{
+	atags_copy = atags_copy_buf;
+	memcpy(atags_copy, tags, KEXEC_BOOT_PARAMS_SIZE);
+}
+
+
+static int __init init_atags_procfs(void)
+{
+	struct tag *tag;
+	int error;
+
+	if (!atags_copy) {
+		printk(KERN_WARNING "Exporting ATAGs: No saved tags found\n");
+		return -EIO;
+	}
+
+	for (tag = (struct tag *) atags_copy; tag->hdr.size; tag = tag_next(tag))
+		;
+
+	tags_buffer.size = ((char *) tag - atags_copy) + sizeof(tag->hdr);
+	tags_buffer.data = kmalloc(tags_buffer.size, GFP_KERNEL);
+	if (tags_buffer.data == NULL)
+		return -ENOMEM;
+	memcpy(tags_buffer.data, atags_copy, tags_buffer.size);
+
+	error = create_proc_entries();
+	if (error) {
+		printk(KERN_ERR "Exporting ATAGs: not enough memory\n");
+		kfree(tags_buffer.data);
+		tags_buffer.size = 0;
+		tags_buffer.data = NULL;
+	}
+
+	return error;
+}
+
+arch_initcall(init_atags_procfs);
diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h
new file mode 100644
index 0000000..e5f028d
--- /dev/null
+++ b/arch/arm/kernel/atags.h
@@ -0,0 +1,5 @@
+#ifdef CONFIG_ATAGS_PROC
+extern void save_atags(struct tag *tags);
+#else
+static inline void save_atags(struct tag *tags) { }
+#endif
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index cecf658..283e14f 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -359,7 +359,7 @@
 		CALL(sys_kexec_load)
 		CALL(sys_utimensat)
 		CALL(sys_signalfd)
-/* 350 */	CALL(sys_timerfd)
+/* 350 */	CALL(sys_ni_syscall)
 		CALL(sys_eventfd)
 		CALL(sys_fallocate)
 #ifndef syscalls_counted
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 863c664..db8f54a 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -21,6 +21,7 @@
 extern unsigned long kexec_start_address;
 extern unsigned long kexec_indirection_page;
 extern unsigned long kexec_mach_type;
+extern unsigned long kexec_boot_atags;
 
 /*
  * Provide a dummy crash_notes definition while crash dump arrives to arm.
@@ -62,6 +63,7 @@
 	kexec_start_address = image->start;
 	kexec_indirection_page = page_list;
 	kexec_mach_type = machine_arch_type;
+	kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET;
 
 	/* copy our kernel relocation code to the control code page */
 	memcpy(reboot_code_buffer,
diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S
index 062c111..61930eb 100644
--- a/arch/arm/kernel/relocate_kernel.S
+++ b/arch/arm/kernel/relocate_kernel.S
@@ -7,23 +7,6 @@
 	.globl relocate_new_kernel
 relocate_new_kernel:
 
-	/* Move boot params back to where the kernel expects them */
-
-	ldr	r0,kexec_boot_params_address
-	teq	r0,#0
-	beq	8f
-
-	ldr	r1,kexec_boot_params_copy
-	mov	r6,#KEXEC_BOOT_PARAMS_SIZE/4
-7:
-	ldr	r5,[r1],#4
-	str	r5,[r0],#4
-	subs	r6,r6,#1
-	bne	7b
-
-8:
-	/* Boot params moved, now go on with the kernel */
-
 	ldr	r0,kexec_indirection_page
 	ldr	r1,kexec_start_address
 
@@ -67,7 +50,7 @@
 	mov lr,r1
 	mov r0,#0
 	ldr r1,kexec_mach_type
-	ldr r2,kexec_boot_params_address
+	ldr r2,kexec_boot_atags
 	mov pc,lr
 
 	.globl kexec_start_address
@@ -82,14 +65,9 @@
 kexec_mach_type:
 	.long	0x0
 
-	/* phy addr where new kernel will expect to find boot params */
-	.globl kexec_boot_params_address
-kexec_boot_params_address:
-	.long	0x0
-
-	/* phy addr where old kernel put a copy of orig boot params */
-	.globl kexec_boot_params_copy
-kexec_boot_params_copy:
+	/* phy addr of the atags for the new kernel */
+	.globl kexec_boot_atags
+kexec_boot_atags:
 	.long	0x0
 
 relocate_new_kernel_end:
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index bf56eb3..d3941a7 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -24,7 +24,6 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/fs.h>
-#include <linux/kexec.h>
 
 #include <asm/cpu.h>
 #include <asm/elf.h>
@@ -39,6 +38,7 @@
 #include <asm/mach/time.h>
 
 #include "compat.h"
+#include "atags.h"
 
 #ifndef MEM_SIZE
 #define MEM_SIZE	(16*1024*1024)
@@ -62,6 +62,7 @@
 extern void _stext, _text, _etext, __data_start, _edata, _end;
 
 unsigned int processor_id;
+EXPORT_SYMBOL(processor_id);
 unsigned int __machine_arch_type;
 EXPORT_SYMBOL(__machine_arch_type);
 
@@ -784,23 +785,6 @@
 }
 arch_initcall(customize_machine);
 
-#ifdef CONFIG_KEXEC
-
-/* Physical addr of where the boot params should be for this machine */
-extern unsigned long kexec_boot_params_address;
-
-/* Physical addr of the buffer into which the boot params are copied */
-extern unsigned long kexec_boot_params_copy;
-
-/* Pointer to the boot params buffer, for manipulation and display */
-unsigned long kexec_boot_params;
-EXPORT_SYMBOL(kexec_boot_params);
-
-/* The buffer itself - make sure it is sized correctly */
-static unsigned long kexec_boot_params_buf[(KEXEC_BOOT_PARAMS_SIZE + 3) / 4];
-
-#endif
-
 void __init setup_arch(char **cmdline_p)
 {
 	struct tag *tags = (struct tag *)&init_tags;
@@ -819,18 +803,6 @@
 	else if (mdesc->boot_params)
 		tags = phys_to_virt(mdesc->boot_params);
 
-#ifdef CONFIG_KEXEC
-	kexec_boot_params_copy = virt_to_phys(kexec_boot_params_buf);
-	kexec_boot_params = (unsigned long)kexec_boot_params_buf;
-	if (__atags_pointer) {
-		kexec_boot_params_address = __atags_pointer;
-		memcpy((void *)kexec_boot_params, tags, KEXEC_BOOT_PARAMS_SIZE);
-	} else if (mdesc->boot_params) {
-		kexec_boot_params_address = mdesc->boot_params;
-		memcpy((void *)kexec_boot_params, tags, KEXEC_BOOT_PARAMS_SIZE);
-	}
-#endif
-
 	/*
 	 * If we have the old style parameters, convert them to
 	 * a tag list.
@@ -846,6 +818,7 @@
 	if (tags->hdr.tag == ATAG_CORE) {
 		if (meminfo.nr_banks != 0)
 			squash_mem_tags(tags);
+		save_atags(tags);
 		parse_tags(tags);
 	}
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index eafbb2b..eefae1d 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -150,7 +150,7 @@
 	secondary_data.pgdir = 0;
 
 	*pmd_offset(pgd, PHYS_OFFSET) = __pmd(0);
-	pgd_free(pgd);
+	pgd_free(&init_mm, pgd);
 
 	if (ret) {
 		printk(KERN_CRIT "CPU%u: processor failed to boot\n", cpu);
@@ -290,6 +290,11 @@
 	local_irq_enable();
 	local_fiq_enable();
 
+	/*
+	 * Setup local timer for this CPU.
+	 */
+	local_timer_setup(cpu);
+
 	calibrate_delay();
 
 	smp_store_cpu_info(cpu);
@@ -300,11 +305,6 @@
 	cpu_set(cpu, cpu_online_map);
 
 	/*
-	 * Setup local timer for this CPU.
-	 */
-	local_timer_setup(cpu);
-
-	/*
 	 * OK, it's off to the idle thread for us
 	 */
 	cpu_idle();
@@ -454,6 +454,27 @@
 }
 EXPORT_SYMBOL_GPL(smp_call_function);
 
+int smp_call_function_single(int cpu, void (*func)(void *info), void *info,
+			     int retry, int wait)
+{
+	/* prevent preemption and reschedule on another processor */
+	int current_cpu = get_cpu();
+	int ret = 0;
+
+	if (cpu == current_cpu) {
+		local_irq_disable();
+		func(info);
+		local_irq_enable();
+	} else
+		ret = smp_call_function_on_cpu(func, info, retry, wait,
+					       cpumask_of_cpu(cpu));
+
+	put_cpu();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(smp_call_function_single);
+
 void show_ipi_list(struct seq_file *p)
 {
 	unsigned int cpu;
@@ -481,8 +502,7 @@
 static void ipi_timer(void)
 {
 	irq_enter();
-	profile_tick(CPU_PROFILING);
-	update_process_times(user_mode(get_irq_regs()));
+	local_timer_interrupt();
 	irq_exit();
 }
 
@@ -621,6 +641,11 @@
 	send_ipi_message(mask, IPI_TIMER);
 }
 
+void smp_timer_broadcast(cpumask_t mask)
+{
+	send_ipi_message(mask, IPI_TIMER);
+}
+
 void smp_send_stop(void)
 {
 	cpumask_t mask = cpu_online_map;
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 5b0422c..074dcd5 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -253,6 +253,36 @@
 	  system clock (of at least several MHz), rounding is less of a
 	  problem so it can be safer to use a decimal values like 100.
 
+choice
+	prompt "Select a UART for early kernel messages"
+
+config AT91_EARLY_DBGU
+	bool "DBGU"
+
+config AT91_EARLY_USART0
+	bool "USART0"
+
+config AT91_EARLY_USART1
+	bool "USART1"
+
+config AT91_EARLY_USART2
+	bool "USART2"
+	depends on ! ARCH_AT91X40
+
+config AT91_EARLY_USART3
+	bool "USART3"
+	depends on (ARCH_AT91RM9200 || ARCH_AT91SAM9RL || ARCH_AT91SAM9260)
+
+config AT91_EARLY_USART4
+	bool "USART4"
+	depends on ARCH_AT91SAM9260
+
+config AT91_EARLY_USART5
+	bool "USART5"
+	depends on ARCH_AT91SAM9260
+
+endchoice
+
 endmenu
 
 endif
diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c
index 5c090c9..e38d237 100644
--- a/arch/arm/mach-at91/at91sam926x_time.c
+++ b/arch/arm/mach-at91/at91sam926x_time.c
@@ -49,8 +49,6 @@
 	volatile long nr_ticks;
 
 	if (at91_sys_read(AT91_PIT_SR) & AT91_PIT_PITS) {	/* This is a shared interrupt */
-		write_seqlock(&xtime_lock);
-
 		/* Get number to ticks performed before interrupt and clear PIT interrupt */
 		nr_ticks = PIT_PICNT(at91_sys_read(AT91_PIT_PIVR));
 		do {
@@ -58,7 +56,6 @@
 			nr_ticks--;
 		} while (nr_ticks);
 
-		write_sequnlock(&xtime_lock);
 		return IRQ_HANDLED;
 	} else
 		return IRQ_NONE;		/* not handled */
diff --git a/arch/arm/mach-at91/generic.h b/arch/arm/mach-at91/generic.h
index b5daf7f..7b9ce7a 100644
--- a/arch/arm/mach-at91/generic.h
+++ b/arch/arm/mach-at91/generic.h
@@ -47,6 +47,9 @@
 #define AT91RM9200_BGA		4	/* AT91RM9200 BGA package has 4 banks */
 
 struct at91_gpio_bank {
+	unsigned chipbase;		/* bank's first GPIO number */
+	void __iomem *regbase;		/* base of register bank */
+	struct at91_gpio_bank *next;	/* bank sharing same IRQ/clock/... */
 	unsigned short id;		/* peripheral ID */
 	unsigned long offset;		/* offset from system peripheral base */
 	struct clk *clock;		/* associated clock */
diff --git a/arch/arm/mach-at91/gpio.c b/arch/arm/mach-at91/gpio.c
index 6aeddd6..f629c2b 100644
--- a/arch/arm/mach-at91/gpio.c
+++ b/arch/arm/mach-at91/gpio.c
@@ -33,12 +33,10 @@
 
 static inline void __iomem *pin_to_controller(unsigned pin)
 {
-	void __iomem *sys_base = (void __iomem *) AT91_VA_BASE_SYS;
-
 	pin -= PIN_BASE;
 	pin /= 32;
 	if (likely(pin < gpio_banks))
-		return sys_base + gpio[pin].offset;
+		return gpio[pin].regbase;
 
 	return NULL;
 }
@@ -294,11 +292,11 @@
 	int i;
 
 	for (i = 0; i < gpio_banks; i++) {
-		u32 pio = gpio[i].offset;
+		void __iomem	*pio = gpio[i].regbase;
 
-		backups[i] = at91_sys_read(pio + PIO_IMR);
-		at91_sys_write(pio + PIO_IDR, backups[i]);
-		at91_sys_write(pio + PIO_IER, wakeups[i]);
+		backups[i] = __raw_readl(pio + PIO_IMR);
+		__raw_writel(backups[i], pio + PIO_IDR);
+		__raw_writel(wakeups[i], pio + PIO_IER);
 
 		if (!wakeups[i])
 			clk_disable(gpio[i].clock);
@@ -315,13 +313,13 @@
 	int i;
 
 	for (i = 0; i < gpio_banks; i++) {
-		u32 pio = gpio[i].offset;
+		void __iomem	*pio = gpio[i].regbase;
 
 		if (!wakeups[i])
 			clk_enable(gpio[i].clock);
 
-		at91_sys_write(pio + PIO_IDR, wakeups[i]);
-		at91_sys_write(pio + PIO_IER, backups[i]);
+		__raw_writel(wakeups[i], pio + PIO_IDR);
+		__raw_writel(backups[i], pio + PIO_IER);
 	}
 }
 
@@ -361,7 +359,13 @@
 
 static int gpio_irq_type(unsigned pin, unsigned type)
 {
-	return (type == IRQT_BOTHEDGE) ? 0 : -EINVAL;
+	switch (type) {
+	case IRQ_TYPE_NONE:
+	case IRQ_TYPE_EDGE_BOTH:
+		return 0;
+	default:
+		return -EINVAL;
+	}
 }
 
 static struct irq_chip gpio_irqchip = {
@@ -376,20 +380,30 @@
 {
 	unsigned	pin;
 	struct irq_desc	*gpio;
+	struct at91_gpio_bank *bank;
 	void __iomem	*pio;
 	u32		isr;
 
-	pio = get_irq_chip_data(irq);
+	bank = get_irq_chip_data(irq);
+	pio = bank->regbase;
 
 	/* temporarily mask (level sensitive) parent IRQ */
 	desc->chip->ack(irq);
 	for (;;) {
-		/* reading ISR acks the pending (edge triggered) GPIO interrupt */
+		/* Reading ISR acks pending (edge triggered) GPIO interrupts.
+		 * When there none are pending, we're finished unless we need
+		 * to process multiple banks (like ID_PIOCDE on sam9263).
+		 */
 		isr = __raw_readl(pio + PIO_ISR) & __raw_readl(pio + PIO_IMR);
-		if (!isr)
-			break;
+		if (!isr) {
+			if (!bank->next)
+				break;
+			bank = bank->next;
+			pio = bank->regbase;
+			continue;
+		}
 
-		pin = (unsigned) get_irq_data(irq);
+		pin = bank->chipbase;
 		gpio = &irq_desc[pin];
 
 		while (isr) {
@@ -481,24 +495,21 @@
  */
 void __init at91_gpio_irq_setup(void)
 {
-	unsigned	pioc, pin;
+	unsigned		pioc, pin;
+	struct at91_gpio_bank	*this, *prev;
 
-	for (pioc = 0, pin = PIN_BASE;
-			pioc < gpio_banks;
-			pioc++) {
-		void __iomem	*controller;
-		unsigned	id = gpio[pioc].id;
+	for (pioc = 0, pin = PIN_BASE, this = gpio, prev = NULL;
+			pioc++ < gpio_banks;
+			prev = this, this++) {
+		unsigned	id = this->id;
 		unsigned	i;
 
-		clk_enable(gpio[pioc].clock);	/* enable PIO controller's clock */
+		/* enable PIO controller's clock */
+		clk_enable(this->clock);
 
-		controller = (void __iomem *) AT91_VA_BASE_SYS + gpio[pioc].offset;
-		__raw_writel(~0, controller + PIO_IDR);
+		__raw_writel(~0, this->regbase + PIO_IDR);
 
-		set_irq_data(id, (void *) pin);
-		set_irq_chip_data(id, controller);
-
-		for (i = 0; i < 32; i++, pin++) {
+		for (i = 0, pin = this->chipbase; i < 32; i++, pin++) {
 			/*
 			 * Can use the "simple" and not "edge" handler since it's
 			 * shorter, and the AIC handles interrupts sanely.
@@ -508,6 +519,14 @@
 			set_irq_flags(pin, IRQF_VALID);
 		}
 
+		/* The toplevel handler handles one bank of GPIOs, except
+		 * AT91SAM9263_ID_PIOCDE handles three... PIOC is first in
+		 * the list, so we only set up that handler.
+		 */
+		if (prev && prev->next == this)
+			continue;
+
+		set_irq_chip_data(id, this);
 		set_irq_chained_handler(id, gpio_irq_handler);
 	}
 	pr_info("AT91: %d gpio irqs in %d banks\n", pin - PIN_BASE, gpio_banks);
@@ -518,8 +537,20 @@
  */
 void __init at91_gpio_init(struct at91_gpio_bank *data, int nr_banks)
 {
+	unsigned		i;
+	struct at91_gpio_bank	*last;
+
 	BUG_ON(nr_banks > MAX_GPIO_BANKS);
 
 	gpio = data;
 	gpio_banks = nr_banks;
+
+	for (i = 0, last = NULL; i < nr_banks; i++, last = data, data++) {
+		data->chipbase = PIN_BASE + i * 32;
+		data->regbase = data->offset + (void __iomem *)AT91_VA_BASE_SYS;
+
+		/* AT91SAM9263_ID_PIOCDE groups PIOC, PIOD, PIOE */
+		if (last && last->id == data->id)
+			last->next = data;
+	}
 }
diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig
index 61b2dfc..e774447 100644
--- a/arch/arm/mach-ixp4xx/Kconfig
+++ b/arch/arm/mach-ixp4xx/Kconfig
@@ -189,6 +189,20 @@
 	  need to use the indirect method instead. If you don't know
 	  what you need, leave this option unselected.
 
+config IXP4XX_QMGR
+	tristate "IXP4xx Queue Manager support"
+	help
+	  This driver supports IXP4xx built-in hardware queue manager
+	  and is automatically selected by Ethernet and HSS drivers.
+
+config IXP4XX_NPE
+	tristate "IXP4xx Network Processor Engine support"
+	select HOTPLUG
+	select FW_LOADER
+	help
+	  This driver supports IXP4xx built-in network coprocessors
+	  and is automatically selected by Ethernet and HSS drivers.
+
 endmenu
 
 endif
diff --git a/arch/arm/mach-ixp4xx/Makefile b/arch/arm/mach-ixp4xx/Makefile
index 77e00ad..c195688 100644
--- a/arch/arm/mach-ixp4xx/Makefile
+++ b/arch/arm/mach-ixp4xx/Makefile
@@ -23,10 +23,12 @@
 obj-$(CONFIG_MACH_IXDPG425)	+= coyote-setup.o
 obj-$(CONFIG_ARCH_ADI_COYOTE)	+= coyote-setup.o
 obj-$(CONFIG_MACH_GTWX5715)	+= gtwx5715-setup.o
-obj-$(CONFIG_MACH_NSLU2)	+= nslu2-setup.o nslu2-power.o
-obj-$(CONFIG_MACH_NAS100D)	+= nas100d-setup.o nas100d-power.o
-obj-$(CONFIG_MACH_DSMG600)      += dsmg600-setup.o dsmg600-power.o
+obj-$(CONFIG_MACH_NSLU2)	+= nslu2-setup.o
+obj-$(CONFIG_MACH_NAS100D)	+= nas100d-setup.o
+obj-$(CONFIG_MACH_DSMG600)      += dsmg600-setup.o
 obj-$(CONFIG_MACH_GATEWAY7001)	+= gateway7001-setup.o
 obj-$(CONFIG_MACH_WG302V2)	+= wg302v2-setup.o
 
 obj-$(CONFIG_PCI)		+= $(obj-pci-$(CONFIG_PCI)) common-pci.o
+obj-$(CONFIG_IXP4XX_QMGR)	+= ixp4xx_qmgr.o
+obj-$(CONFIG_IXP4XX_NPE)	+= ixp4xx_npe.o
diff --git a/arch/arm/mach-ixp4xx/dsmg600-power.c b/arch/arm/mach-ixp4xx/dsmg600-power.c
deleted file mode 100644
index 3471787..0000000
--- a/arch/arm/mach-ixp4xx/dsmg600-power.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * arch/arm/mach-ixp4xx/dsmg600-power.c
- *
- * DSM-G600 Power/Reset driver
- * Author: Michael Westerhof <mwester@dls.net>
- *
- * Based on nslu2-power.c
- *  Copyright (C) 2005 Tower Technologies
- *  Author: Alessandro Zummo <a.zummo@towertech.it>
- *
- * which was based on nslu2-io.c
- *  Copyright (C) 2004 Karen Spearel
- *
- * Maintainers: http://www.nslu2-linux.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/reboot.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/jiffies.h>
-#include <linux/timer.h>
-
-#include <asm/mach-types.h>
-
-extern void ctrl_alt_del(void);
-
-/* This is used to make sure the power-button pusher is serious.  The button
- * must be held until the value of this counter reaches zero.
- */
-static volatile int power_button_countdown;
-
-/* Must hold the button down for at least this many counts to be processed */
-#define PBUTTON_HOLDDOWN_COUNT 4 /* 2 secs */
-
-static void dsmg600_power_handler(unsigned long data);
-static DEFINE_TIMER(dsmg600_power_timer, dsmg600_power_handler, 0, 0);
-
-static void dsmg600_power_handler(unsigned long data)
-{
-	/* This routine is called twice per second to check the
-	 * state of the power button.
-	 */
-
-	if (*IXP4XX_GPIO_GPINR & DSMG600_PB_BM) {
-
-		/* IO Pin is 1 (button pushed) */
-		if (power_button_countdown == 0) {
-			/* Signal init to do the ctrlaltdel action, this will bypass
-			 * init if it hasn't started and do a kernel_restart.
-			 */
-			ctrl_alt_del();
-
-			/* Change the state of the power LED to "blink" */
-			gpio_line_set(DSMG600_LED_PWR_GPIO, IXP4XX_GPIO_LOW);
-		}
-		power_button_countdown--;
-
-	} else {
-		power_button_countdown = PBUTTON_HOLDDOWN_COUNT;
-	}
-
-	mod_timer(&dsmg600_power_timer, jiffies + msecs_to_jiffies(500));
-}
-
-static irqreturn_t dsmg600_reset_handler(int irq, void *dev_id)
-{
-	/* This is the paper-clip reset, it shuts the machine down directly. */
-	machine_power_off();
-
-	return IRQ_HANDLED;
-}
-
-static int __init dsmg600_power_init(void)
-{
-	if (!(machine_is_dsmg600()))
-		return 0;
-
-	if (request_irq(DSMG600_RB_IRQ, &dsmg600_reset_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_LOW, "DSM-G600 reset button",
-		NULL) < 0) {
-
-		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
-			DSMG600_RB_IRQ);
-
-		return -EIO;
-	}
-
-	/* The power button on the D-Link DSM-G600 is on GPIO 15, but
-	 * it cannot handle interrupts on that GPIO line.  So we'll
-	 * have to poll it with a kernel timer.
-	 */
-
-	/* Make sure that the power button GPIO is set up as an input */
-	gpio_line_config(DSMG600_PB_GPIO, IXP4XX_GPIO_IN);
-
-	/* Set the initial value for the power button IRQ handler */
-	power_button_countdown = PBUTTON_HOLDDOWN_COUNT;
-
-	mod_timer(&dsmg600_power_timer, jiffies + msecs_to_jiffies(500));
-
-	return 0;
-}
-
-static void __exit dsmg600_power_exit(void)
-{
-	if (!(machine_is_dsmg600()))
-		return;
-
-	del_timer_sync(&dsmg600_power_timer);
-
-	free_irq(DSMG600_RB_IRQ, NULL);
-}
-
-module_init(dsmg600_power_init);
-module_exit(dsmg600_power_exit);
-
-MODULE_AUTHOR("Michael Westerhof <mwester@dls.net>");
-MODULE_DESCRIPTION("DSM-G600 Power/Reset driver");
-MODULE_LICENSE("GPL");
diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c
index c473d40..6886596 100644
--- a/arch/arm/mach-ixp4xx/dsmg600-setup.c
+++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c
@@ -1,25 +1,37 @@
 /*
  * DSM-G600 board-setup
  *
+ * Copyright (C) 2008 Rod Whitby <rod@whitby.id.au>
  * Copyright (C) 2006 Tower Technologies
- * Author: Alessandro Zummo <a.zummo@towertech.it>
  *
- * based ixdp425-setup.c:
+ * based on ixdp425-setup.c:
  *      Copyright (C) 2003-2004 MontaVista Software, Inc.
+ * based on nslu2-power.c:
+ *	Copyright (C) 2005 Tower Technologies
+ * based on nslu2-io.c:
+ *	Copyright (C) 2004 Karen Spearel
  *
  * Author: Alessandro Zummo <a.zummo@towertech.it>
+ * Author: Michael Westerhof <mwester@dls.net>
+ * Author: Rod Whitby <rod@whitby.id.au>
  * Maintainers: http://www.nslu2-linux.org/
  */
 
-#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
 #include <linux/serial.h>
 #include <linux/serial_8250.h>
+#include <linux/leds.h>
+#include <linux/reboot.h>
+#include <linux/i2c.h>
 #include <linux/i2c-gpio.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/time.h>
+#include <asm/gpio.h>
 
 static struct flash_platform_data dsmg600_flash_data = {
 	.map_name		= "cfi_probe",
@@ -51,29 +63,34 @@
 	},
 };
 
-#ifdef CONFIG_LEDS_CLASS
-static struct resource dsmg600_led_resources[] = {
+static struct i2c_board_info __initdata dsmg600_i2c_board_info [] = {
 	{
-		.name           = "power",
-		.start          = DSMG600_LED_PWR_GPIO,
-		.end            = DSMG600_LED_PWR_GPIO,
-		.flags          = IXP4XX_GPIO_HIGH,
-	},
-	{
-		.name           = "wlan",
-		.start		= DSMG600_LED_WLAN_GPIO,
-		.end            = DSMG600_LED_WLAN_GPIO,
-		.flags          = IXP4XX_GPIO_LOW,
+		I2C_BOARD_INFO("rtc-pcf8563", 0x51),
 	},
 };
 
-static struct platform_device dsmg600_leds = {
-        .name                   = "IXP4XX-GPIO-LED",
-        .id                     = -1,
-        .num_resources          = ARRAY_SIZE(dsmg600_led_resources),
-        .resource               = dsmg600_led_resources,
+static struct gpio_led dsmg600_led_pins[] = {
+	{
+		.name		= "power",
+		.gpio		= DSMG600_LED_PWR_GPIO,
+	},
+	{
+		.name		= "wlan",
+		.gpio		= DSMG600_LED_WLAN_GPIO,
+		.active_low	= true,
+	},
 };
-#endif
+
+static struct gpio_led_platform_data dsmg600_led_data = {
+	.num_leds		= ARRAY_SIZE(dsmg600_led_pins),
+	.leds			= dsmg600_led_pins,
+};
+
+static struct platform_device dsmg600_leds = {
+	.name			= "leds-gpio",
+	.id			= -1,
+	.dev.platform_data	= &dsmg600_led_data,
+};
 
 static struct resource dsmg600_uart_resources[] = {
 	{
@@ -121,6 +138,7 @@
 static struct platform_device *dsmg600_devices[] __initdata = {
 	&dsmg600_i2c_gpio,
 	&dsmg600_flash,
+	&dsmg600_leds,
 };
 
 static void dsmg600_power_off(void)
@@ -132,6 +150,57 @@
 	gpio_line_set(DSMG600_PO_GPIO, IXP4XX_GPIO_HIGH);
 }
 
+/* This is used to make sure the power-button pusher is serious.  The button
+ * must be held until the value of this counter reaches zero.
+ */
+static int power_button_countdown;
+
+/* Must hold the button down for at least this many counts to be processed */
+#define PBUTTON_HOLDDOWN_COUNT 4 /* 2 secs */
+
+static void dsmg600_power_handler(unsigned long data);
+static DEFINE_TIMER(dsmg600_power_timer, dsmg600_power_handler, 0, 0);
+
+static void dsmg600_power_handler(unsigned long data)
+{
+	/* This routine is called twice per second to check the
+	 * state of the power button.
+	 */
+
+	if (gpio_get_value(DSMG600_PB_GPIO)) {
+
+		/* IO Pin is 1 (button pushed) */
+		if (power_button_countdown > 0)
+			power_button_countdown--;
+
+	} else {
+
+		/* Done on button release, to allow for auto-power-on mods. */
+		if (power_button_countdown == 0) {
+			/* Signal init to do the ctrlaltdel action,
+			 * this will bypass init if it hasn't started
+			 * and do a kernel_restart.
+			 */
+			ctrl_alt_del();
+
+			/* Change the state of the power LED to "blink" */
+			gpio_line_set(DSMG600_LED_PWR_GPIO, IXP4XX_GPIO_LOW);
+		} else {
+			power_button_countdown = PBUTTON_HOLDDOWN_COUNT;
+		}
+	}
+
+	mod_timer(&dsmg600_power_timer, jiffies + msecs_to_jiffies(500));
+}
+
+static irqreturn_t dsmg600_reset_handler(int irq, void *dev_id)
+{
+	/* This is the paper-clip reset, it shuts the machine down directly. */
+	machine_power_off();
+
+	return IRQ_HANDLED;
+}
+
 static void __init dsmg600_timer_init(void)
 {
     /* The xtal on this machine is non-standard. */
@@ -156,7 +225,8 @@
 	dsmg600_flash_resource.end =
 		IXP4XX_EXP_BUS_BASE(0) + ixp4xx_exp_bus_size - 1;
 
-	pm_power_off = dsmg600_power_off;
+	i2c_register_board_info(0, dsmg600_i2c_board_info,
+				ARRAY_SIZE(dsmg600_i2c_board_info));
 
 	/* The UART is required on the DSM-G600 (Redboot cannot use the
 	 * NIC) -- do it here so that it does *not* get removed if
@@ -166,10 +236,28 @@
 
 	platform_add_devices(dsmg600_devices, ARRAY_SIZE(dsmg600_devices));
 
-#ifdef CONFIG_LEDS_CLASS
-        /* We don't care whether or not this works. */
-        (void)platform_device_register(&dsmg600_leds);
-#endif
+	pm_power_off = dsmg600_power_off;
+
+	if (request_irq(gpio_to_irq(DSMG600_RB_GPIO), &dsmg600_reset_handler,
+		IRQF_DISABLED | IRQF_TRIGGER_LOW,
+		"DSM-G600 reset button", NULL) < 0) {
+
+		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
+			gpio_to_irq(DSMG600_RB_GPIO));
+	}
+
+	/* The power button on the D-Link DSM-G600 is on GPIO 15, but
+	 * it cannot handle interrupts on that GPIO line.  So we'll
+	 * have to poll it with a kernel timer.
+	 */
+
+	/* Make sure that the power button GPIO is set up as an input */
+	gpio_line_config(DSMG600_PB_GPIO, IXP4XX_GPIO_IN);
+
+	/* Set the initial value for the power button IRQ handler */
+	power_button_countdown = PBUTTON_HOLDDOWN_COUNT;
+
+	mod_timer(&dsmg600_power_timer, jiffies + msecs_to_jiffies(500));
 }
 
 MACHINE_START(DSMG600, "D-Link DSM-G600 RevA")
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index e89070d..44584af 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -177,6 +177,31 @@
 	.resource		= ixdp425_uart_resources
 };
 
+/* Built-in 10/100 Ethernet MAC interfaces */
+static struct eth_plat_info ixdp425_plat_eth[] = {
+	{
+		.phy		= 0,
+		.rxq		= 3,
+		.txreadyq	= 20,
+	}, {
+		.phy		= 1,
+		.rxq		= 4,
+		.txreadyq	= 21,
+	}
+};
+
+static struct platform_device ixdp425_eth[] = {
+	{
+		.name			= "ixp4xx_eth",
+		.id			= IXP4XX_ETH_NPEB,
+		.dev.platform_data	= ixdp425_plat_eth,
+	}, {
+		.name			= "ixp4xx_eth",
+		.id			= IXP4XX_ETH_NPEC,
+		.dev.platform_data	= ixdp425_plat_eth + 1,
+	}
+};
+
 static struct platform_device *ixdp425_devices[] __initdata = {
 	&ixdp425_i2c_gpio,
 	&ixdp425_flash,
@@ -184,7 +209,9 @@
     defined(CONFIG_MTD_NAND_PLATFORM_MODULE)
 	&ixdp425_flash_nand,
 #endif
-	&ixdp425_uart
+	&ixdp425_uart,
+	&ixdp425_eth[0],
+	&ixdp425_eth[1],
 };
 
 static void __init ixdp425_init(void)
diff --git a/arch/arm/mach-ixp4xx/ixp4xx_npe.c b/arch/arm/mach-ixp4xx/ixp4xx_npe.c
new file mode 100644
index 0000000..83c137e
--- /dev/null
+++ b/arch/arm/mach-ixp4xx/ixp4xx_npe.c
@@ -0,0 +1,741 @@
+/*
+ * Intel IXP4xx Network Processor Engine driver for Linux
+ *
+ * Copyright (C) 2007 Krzysztof Halasa <khc@pm.waw.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * The code is based on publicly available information:
+ * - Intel IXP4xx Developer's Manual and other e-papers
+ * - Intel IXP400 Access Library Software (BSD license)
+ * - previous works by Christian Hohnstaedt <chohnstaedt@innominate.com>
+ *   Thanks, Christian.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/arch/npe.h>
+
+#define DEBUG_MSG			0
+#define DEBUG_FW			0
+
+#define NPE_COUNT			3
+#define MAX_RETRIES			1000	/* microseconds */
+#define NPE_42X_DATA_SIZE		0x800	/* in dwords */
+#define NPE_46X_DATA_SIZE		0x1000
+#define NPE_A_42X_INSTR_SIZE		0x1000
+#define NPE_B_AND_C_42X_INSTR_SIZE	0x800
+#define NPE_46X_INSTR_SIZE		0x1000
+#define REGS_SIZE			0x1000
+
+#define NPE_PHYS_REG			32
+
+#define FW_MAGIC			0xFEEDF00D
+#define FW_BLOCK_TYPE_INSTR		0x0
+#define FW_BLOCK_TYPE_DATA		0x1
+#define FW_BLOCK_TYPE_EOF		0xF
+
+/* NPE exec status (read) and command (write) */
+#define CMD_NPE_STEP			0x01
+#define CMD_NPE_START			0x02
+#define CMD_NPE_STOP			0x03
+#define CMD_NPE_CLR_PIPE		0x04
+#define CMD_CLR_PROFILE_CNT		0x0C
+#define CMD_RD_INS_MEM			0x10 /* instruction memory */
+#define CMD_WR_INS_MEM			0x11
+#define CMD_RD_DATA_MEM			0x12 /* data memory */
+#define CMD_WR_DATA_MEM			0x13
+#define CMD_RD_ECS_REG			0x14 /* exec access register */
+#define CMD_WR_ECS_REG			0x15
+
+#define STAT_RUN			0x80000000
+#define STAT_STOP			0x40000000
+#define STAT_CLEAR			0x20000000
+#define STAT_ECS_K			0x00800000 /* pipeline clean */
+
+#define NPE_STEVT			0x1B
+#define NPE_STARTPC			0x1C
+#define NPE_REGMAP			0x1E
+#define NPE_CINDEX			0x1F
+
+#define INSTR_WR_REG_SHORT		0x0000C000
+#define INSTR_WR_REG_BYTE		0x00004000
+#define INSTR_RD_FIFO			0x0F888220
+#define INSTR_RESET_MBOX		0x0FAC8210
+
+#define ECS_BG_CTXT_REG_0		0x00 /* Background Executing Context */
+#define ECS_BG_CTXT_REG_1		0x01 /*		Stack level */
+#define ECS_BG_CTXT_REG_2		0x02
+#define ECS_PRI_1_CTXT_REG_0		0x04 /* Priority 1 Executing Context */
+#define ECS_PRI_1_CTXT_REG_1		0x05 /*		Stack level */
+#define ECS_PRI_1_CTXT_REG_2		0x06
+#define ECS_PRI_2_CTXT_REG_0		0x08 /* Priority 2 Executing Context */
+#define ECS_PRI_2_CTXT_REG_1		0x09 /*		Stack level */
+#define ECS_PRI_2_CTXT_REG_2		0x0A
+#define ECS_DBG_CTXT_REG_0		0x0C /* Debug Executing Context */
+#define ECS_DBG_CTXT_REG_1		0x0D /*		Stack level */
+#define ECS_DBG_CTXT_REG_2		0x0E
+#define ECS_INSTRUCT_REG		0x11 /* NPE Instruction Register */
+
+#define ECS_REG_0_ACTIVE		0x80000000 /* all levels */
+#define ECS_REG_0_NEXTPC_MASK		0x1FFF0000 /* BG/PRI1/PRI2 levels */
+#define ECS_REG_0_LDUR_BITS		8
+#define ECS_REG_0_LDUR_MASK		0x00000700 /* all levels */
+#define ECS_REG_1_CCTXT_BITS		16
+#define ECS_REG_1_CCTXT_MASK		0x000F0000 /* all levels */
+#define ECS_REG_1_SELCTXT_BITS		0
+#define ECS_REG_1_SELCTXT_MASK		0x0000000F /* all levels */
+#define ECS_DBG_REG_2_IF		0x00100000 /* debug level */
+#define ECS_DBG_REG_2_IE		0x00080000 /* debug level */
+
+/* NPE watchpoint_fifo register bit */
+#define WFIFO_VALID			0x80000000
+
+/* NPE messaging_status register bit definitions */
+#define MSGSTAT_OFNE	0x00010000 /* OutFifoNotEmpty */
+#define MSGSTAT_IFNF	0x00020000 /* InFifoNotFull */
+#define MSGSTAT_OFNF	0x00040000 /* OutFifoNotFull */
+#define MSGSTAT_IFNE	0x00080000 /* InFifoNotEmpty */
+#define MSGSTAT_MBINT	0x00100000 /* Mailbox interrupt */
+#define MSGSTAT_IFINT	0x00200000 /* InFifo interrupt */
+#define MSGSTAT_OFINT	0x00400000 /* OutFifo interrupt */
+#define MSGSTAT_WFINT	0x00800000 /* WatchFifo interrupt */
+
+/* NPE messaging_control register bit definitions */
+#define MSGCTL_OUT_FIFO			0x00010000 /* enable output FIFO */
+#define MSGCTL_IN_FIFO			0x00020000 /* enable input FIFO */
+#define MSGCTL_OUT_FIFO_WRITE		0x01000000 /* enable FIFO + WRITE */
+#define MSGCTL_IN_FIFO_WRITE		0x02000000
+
+/* NPE mailbox_status value for reset */
+#define RESET_MBOX_STAT			0x0000F0F0
+
+const char *npe_names[] = { "NPE-A", "NPE-B", "NPE-C" };
+
+#define print_npe(pri, npe, fmt, ...)					\
+	printk(pri "%s: " fmt, npe_name(npe), ## __VA_ARGS__)
+
+#if DEBUG_MSG
+#define debug_msg(npe, fmt, ...)					\
+	print_npe(KERN_DEBUG, npe, fmt, ## __VA_ARGS__)
+#else
+#define debug_msg(npe, fmt, ...)
+#endif
+
+static struct {
+	u32 reg, val;
+} ecs_reset[] = {
+	{ ECS_BG_CTXT_REG_0,	0xA0000000 },
+	{ ECS_BG_CTXT_REG_1,	0x01000000 },
+	{ ECS_BG_CTXT_REG_2,	0x00008000 },
+	{ ECS_PRI_1_CTXT_REG_0,	0x20000080 },
+	{ ECS_PRI_1_CTXT_REG_1,	0x01000000 },
+	{ ECS_PRI_1_CTXT_REG_2,	0x00008000 },
+	{ ECS_PRI_2_CTXT_REG_0,	0x20000080 },
+	{ ECS_PRI_2_CTXT_REG_1,	0x01000000 },
+	{ ECS_PRI_2_CTXT_REG_2,	0x00008000 },
+	{ ECS_DBG_CTXT_REG_0,	0x20000000 },
+	{ ECS_DBG_CTXT_REG_1,	0x00000000 },
+	{ ECS_DBG_CTXT_REG_2,	0x001E0000 },
+	{ ECS_INSTRUCT_REG,	0x1003C00F },
+};
+
+static struct npe npe_tab[NPE_COUNT] = {
+	{
+		.id	= 0,
+		.regs	= (struct npe_regs __iomem *)IXP4XX_NPEA_BASE_VIRT,
+		.regs_phys = IXP4XX_NPEA_BASE_PHYS,
+	}, {
+		.id	= 1,
+		.regs	= (struct npe_regs __iomem *)IXP4XX_NPEB_BASE_VIRT,
+		.regs_phys = IXP4XX_NPEB_BASE_PHYS,
+	}, {
+		.id	= 2,
+		.regs	= (struct npe_regs __iomem *)IXP4XX_NPEC_BASE_VIRT,
+		.regs_phys = IXP4XX_NPEC_BASE_PHYS,
+	}
+};
+
+int npe_running(struct npe *npe)
+{
+	return (__raw_readl(&npe->regs->exec_status_cmd) & STAT_RUN) != 0;
+}
+
+static void npe_cmd_write(struct npe *npe, u32 addr, int cmd, u32 data)
+{
+	__raw_writel(data, &npe->regs->exec_data);
+	__raw_writel(addr, &npe->regs->exec_addr);
+	__raw_writel(cmd, &npe->regs->exec_status_cmd);
+}
+
+static u32 npe_cmd_read(struct npe *npe, u32 addr, int cmd)
+{
+	__raw_writel(addr, &npe->regs->exec_addr);
+	__raw_writel(cmd, &npe->regs->exec_status_cmd);
+	/* Iintroduce extra read cycles after issuing read command to NPE
+	   so that we read the register after the NPE has updated it.
+	   This is to overcome race condition between XScale and NPE */
+	__raw_readl(&npe->regs->exec_data);
+	__raw_readl(&npe->regs->exec_data);
+	return __raw_readl(&npe->regs->exec_data);
+}
+
+static void npe_clear_active(struct npe *npe, u32 reg)
+{
+	u32 val = npe_cmd_read(npe, reg, CMD_RD_ECS_REG);
+	npe_cmd_write(npe, reg, CMD_WR_ECS_REG, val & ~ECS_REG_0_ACTIVE);
+}
+
+static void npe_start(struct npe *npe)
+{
+	/* ensure only Background Context Stack Level is active */
+	npe_clear_active(npe, ECS_PRI_1_CTXT_REG_0);
+	npe_clear_active(npe, ECS_PRI_2_CTXT_REG_0);
+	npe_clear_active(npe, ECS_DBG_CTXT_REG_0);
+
+	__raw_writel(CMD_NPE_CLR_PIPE, &npe->regs->exec_status_cmd);
+	__raw_writel(CMD_NPE_START, &npe->regs->exec_status_cmd);
+}
+
+static void npe_stop(struct npe *npe)
+{
+	__raw_writel(CMD_NPE_STOP, &npe->regs->exec_status_cmd);
+	__raw_writel(CMD_NPE_CLR_PIPE, &npe->regs->exec_status_cmd); /*FIXME?*/
+}
+
+static int __must_check npe_debug_instr(struct npe *npe, u32 instr, u32 ctx,
+					u32 ldur)
+{
+	u32 wc;
+	int i;
+
+	/* set the Active bit, and the LDUR, in the debug level */
+	npe_cmd_write(npe, ECS_DBG_CTXT_REG_0, CMD_WR_ECS_REG,
+		      ECS_REG_0_ACTIVE | (ldur << ECS_REG_0_LDUR_BITS));
+
+	/* set CCTXT at ECS DEBUG L3 to specify in which context to execute
+	   the instruction, and set SELCTXT at ECS DEBUG Level to specify
+	   which context store to access.
+	   Debug ECS Level Reg 1 has form 0x000n000n, where n = context number
+	*/
+	npe_cmd_write(npe, ECS_DBG_CTXT_REG_1, CMD_WR_ECS_REG,
+		      (ctx << ECS_REG_1_CCTXT_BITS) |
+		      (ctx << ECS_REG_1_SELCTXT_BITS));
+
+	/* clear the pipeline */
+	__raw_writel(CMD_NPE_CLR_PIPE, &npe->regs->exec_status_cmd);
+
+	/* load NPE instruction into the instruction register */
+	npe_cmd_write(npe, ECS_INSTRUCT_REG, CMD_WR_ECS_REG, instr);
+
+	/* we need this value later to wait for completion of NPE execution
+	   step */
+	wc = __raw_readl(&npe->regs->watch_count);
+
+	/* issue a Step One command via the Execution Control register */
+	__raw_writel(CMD_NPE_STEP, &npe->regs->exec_status_cmd);
+
+	/* Watch Count register increments when NPE completes an instruction */
+	for (i = 0; i < MAX_RETRIES; i++) {
+		if (wc != __raw_readl(&npe->regs->watch_count))
+			return 0;
+		udelay(1);
+	}
+
+	print_npe(KERN_ERR, npe, "reset: npe_debug_instr(): timeout\n");
+	return -ETIMEDOUT;
+}
+
+static int __must_check npe_logical_reg_write8(struct npe *npe, u32 addr,
+					       u8 val, u32 ctx)
+{
+	/* here we build the NPE assembler instruction: mov8 d0, #0 */
+	u32 instr = INSTR_WR_REG_BYTE |	/* OpCode */
+		addr << 9 |		/* base Operand */
+		(val & 0x1F) << 4 |	/* lower 5 bits to immediate data */
+		(val & ~0x1F) << (18 - 5);/* higher 3 bits to CoProc instr. */
+	return npe_debug_instr(npe, instr, ctx, 1); /* execute it */
+}
+
+static int __must_check npe_logical_reg_write16(struct npe *npe, u32 addr,
+						u16 val, u32 ctx)
+{
+	/* here we build the NPE assembler instruction: mov16 d0, #0 */
+	u32 instr = INSTR_WR_REG_SHORT | /* OpCode */
+		addr << 9 |		/* base Operand */
+		(val & 0x1F) << 4 |	/* lower 5 bits to immediate data */
+		(val & ~0x1F) << (18 - 5);/* higher 11 bits to CoProc instr. */
+	return npe_debug_instr(npe, instr, ctx, 1); /* execute it */
+}
+
+static int __must_check npe_logical_reg_write32(struct npe *npe, u32 addr,
+						u32 val, u32 ctx)
+{
+	/* write in 16 bit steps first the high and then the low value */
+	if (npe_logical_reg_write16(npe, addr, val >> 16, ctx))
+		return -ETIMEDOUT;
+	return npe_logical_reg_write16(npe, addr + 2, val & 0xFFFF, ctx);
+}
+
+static int npe_reset(struct npe *npe)
+{
+	u32 val, ctl, exec_count, ctx_reg2;
+	int i;
+
+	ctl = (__raw_readl(&npe->regs->messaging_control) | 0x3F000000) &
+		0x3F3FFFFF;
+
+	/* disable parity interrupt */
+	__raw_writel(ctl & 0x3F00FFFF, &npe->regs->messaging_control);
+
+	/* pre exec - debug instruction */
+	/* turn off the halt bit by clearing Execution Count register. */
+	exec_count = __raw_readl(&npe->regs->exec_count);
+	__raw_writel(0, &npe->regs->exec_count);
+	/* ensure that IF and IE are on (temporarily), so that we don't end up
+	   stepping forever */
+	ctx_reg2 = npe_cmd_read(npe, ECS_DBG_CTXT_REG_2, CMD_RD_ECS_REG);
+	npe_cmd_write(npe, ECS_DBG_CTXT_REG_2, CMD_WR_ECS_REG, ctx_reg2 |
+		      ECS_DBG_REG_2_IF | ECS_DBG_REG_2_IE);
+
+	/* clear the FIFOs */
+	while (__raw_readl(&npe->regs->watchpoint_fifo) & WFIFO_VALID)
+		;
+	while (__raw_readl(&npe->regs->messaging_status) & MSGSTAT_OFNE)
+		/* read from the outFIFO until empty */
+		print_npe(KERN_DEBUG, npe, "npe_reset: read FIFO = 0x%X\n",
+			  __raw_readl(&npe->regs->in_out_fifo));
+
+	while (__raw_readl(&npe->regs->messaging_status) & MSGSTAT_IFNE)
+		/* step execution of the NPE intruction to read inFIFO using
+		   the Debug Executing Context stack */
+		if (npe_debug_instr(npe, INSTR_RD_FIFO, 0, 0))
+			return -ETIMEDOUT;
+
+	/* reset the mailbox reg from the XScale side */
+	__raw_writel(RESET_MBOX_STAT, &npe->regs->mailbox_status);
+	/* from NPE side */
+	if (npe_debug_instr(npe, INSTR_RESET_MBOX, 0, 0))
+		return -ETIMEDOUT;
+
+	/* Reset the physical registers in the NPE register file */
+	for (val = 0; val < NPE_PHYS_REG; val++) {
+		if (npe_logical_reg_write16(npe, NPE_REGMAP, val >> 1, 0))
+			return -ETIMEDOUT;
+		/* address is either 0 or 4 */
+		if (npe_logical_reg_write32(npe, (val & 1) * 4, 0, 0))
+			return -ETIMEDOUT;
+	}
+
+	/* Reset the context store = each context's Context Store registers */
+
+	/* Context 0 has no STARTPC. Instead, this value is used to set NextPC
+	   for Background ECS, to set where NPE starts executing code */
+	val = npe_cmd_read(npe, ECS_BG_CTXT_REG_0, CMD_RD_ECS_REG);
+	val &= ~ECS_REG_0_NEXTPC_MASK;
+	val |= (0 /* NextPC */ << 16) & ECS_REG_0_NEXTPC_MASK;
+	npe_cmd_write(npe, ECS_BG_CTXT_REG_0, CMD_WR_ECS_REG, val);
+
+	for (i = 0; i < 16; i++) {
+		if (i) {	/* Context 0 has no STEVT nor STARTPC */
+			/* STEVT = off, 0x80 */
+			if (npe_logical_reg_write8(npe, NPE_STEVT, 0x80, i))
+				return -ETIMEDOUT;
+			if (npe_logical_reg_write16(npe, NPE_STARTPC, 0, i))
+				return -ETIMEDOUT;
+		}
+		/* REGMAP = d0->p0, d8->p2, d16->p4 */
+		if (npe_logical_reg_write16(npe, NPE_REGMAP, 0x820, i))
+			return -ETIMEDOUT;
+		if (npe_logical_reg_write8(npe, NPE_CINDEX, 0, i))
+			return -ETIMEDOUT;
+	}
+
+	/* post exec */
+	/* clear active bit in debug level */
+	npe_cmd_write(npe, ECS_DBG_CTXT_REG_0, CMD_WR_ECS_REG, 0);
+	/* clear the pipeline */
+	__raw_writel(CMD_NPE_CLR_PIPE, &npe->regs->exec_status_cmd);
+	/* restore previous values */
+	__raw_writel(exec_count, &npe->regs->exec_count);
+	npe_cmd_write(npe, ECS_DBG_CTXT_REG_2, CMD_WR_ECS_REG, ctx_reg2);
+
+	/* write reset values to Execution Context Stack registers */
+	for (val = 0; val < ARRAY_SIZE(ecs_reset); val++)
+		npe_cmd_write(npe, ecs_reset[val].reg, CMD_WR_ECS_REG,
+			      ecs_reset[val].val);
+
+	/* clear the profile counter */
+	__raw_writel(CMD_CLR_PROFILE_CNT, &npe->regs->exec_status_cmd);
+
+	__raw_writel(0, &npe->regs->exec_count);
+	__raw_writel(0, &npe->regs->action_points[0]);
+	__raw_writel(0, &npe->regs->action_points[1]);
+	__raw_writel(0, &npe->regs->action_points[2]);
+	__raw_writel(0, &npe->regs->action_points[3]);
+	__raw_writel(0, &npe->regs->watch_count);
+
+	val = ixp4xx_read_feature_bits();
+	/* reset the NPE */
+	ixp4xx_write_feature_bits(val &
+				  ~(IXP4XX_FEATURE_RESET_NPEA << npe->id));
+	for (i = 0; i < MAX_RETRIES; i++) {
+		if (!(ixp4xx_read_feature_bits() &
+		      (IXP4XX_FEATURE_RESET_NPEA << npe->id)))
+			break;	/* reset completed */
+		udelay(1);
+	}
+	if (i == MAX_RETRIES)
+		return -ETIMEDOUT;
+
+	/* deassert reset */
+	ixp4xx_write_feature_bits(val |
+				  (IXP4XX_FEATURE_RESET_NPEA << npe->id));
+	for (i = 0; i < MAX_RETRIES; i++) {
+		if (ixp4xx_read_feature_bits() &
+		    (IXP4XX_FEATURE_RESET_NPEA << npe->id))
+			break;	/* NPE is back alive */
+		udelay(1);
+	}
+	if (i == MAX_RETRIES)
+		return -ETIMEDOUT;
+
+	npe_stop(npe);
+
+	/* restore NPE configuration bus Control Register - parity settings */
+	__raw_writel(ctl, &npe->regs->messaging_control);
+	return 0;
+}
+
+
+int npe_send_message(struct npe *npe, const void *msg, const char *what)
+{
+	const u32 *send = msg;
+	int cycles = 0;
+
+	debug_msg(npe, "Trying to send message %s [%08X:%08X]\n",
+		  what, send[0], send[1]);
+
+	if (__raw_readl(&npe->regs->messaging_status) & MSGSTAT_IFNE) {
+		debug_msg(npe, "NPE input FIFO not empty\n");
+		return -EIO;
+	}
+
+	__raw_writel(send[0], &npe->regs->in_out_fifo);
+
+	if (!(__raw_readl(&npe->regs->messaging_status) & MSGSTAT_IFNF)) {
+		debug_msg(npe, "NPE input FIFO full\n");
+		return -EIO;
+	}
+
+	__raw_writel(send[1], &npe->regs->in_out_fifo);
+
+	while ((cycles < MAX_RETRIES) &&
+	       (__raw_readl(&npe->regs->messaging_status) & MSGSTAT_IFNE)) {
+		udelay(1);
+		cycles++;
+	}
+
+	if (cycles == MAX_RETRIES) {
+		debug_msg(npe, "Timeout sending message\n");
+		return -ETIMEDOUT;
+	}
+
+	debug_msg(npe, "Sending a message took %i cycles\n", cycles);
+	return 0;
+}
+
+int npe_recv_message(struct npe *npe, void *msg, const char *what)
+{
+	u32 *recv = msg;
+	int cycles = 0, cnt = 0;
+
+	debug_msg(npe, "Trying to receive message %s\n", what);
+
+	while (cycles < MAX_RETRIES) {
+		if (__raw_readl(&npe->regs->messaging_status) & MSGSTAT_OFNE) {
+			recv[cnt++] = __raw_readl(&npe->regs->in_out_fifo);
+			if (cnt == 2)
+				break;
+		} else {
+			udelay(1);
+			cycles++;
+		}
+	}
+
+	switch(cnt) {
+	case 1:
+		debug_msg(npe, "Received [%08X]\n", recv[0]);
+		break;
+	case 2:
+		debug_msg(npe, "Received [%08X:%08X]\n", recv[0], recv[1]);
+		break;
+	}
+
+	if (cycles == MAX_RETRIES) {
+		debug_msg(npe, "Timeout waiting for message\n");
+		return -ETIMEDOUT;
+	}
+
+	debug_msg(npe, "Receiving a message took %i cycles\n", cycles);
+	return 0;
+}
+
+int npe_send_recv_message(struct npe *npe, void *msg, const char *what)
+{
+	int result;
+	u32 *send = msg, recv[2];
+
+	if ((result = npe_send_message(npe, msg, what)) != 0)
+		return result;
+	if ((result = npe_recv_message(npe, recv, what)) != 0)
+		return result;
+
+	if ((recv[0] != send[0]) || (recv[1] != send[1])) {
+		debug_msg(npe, "Message %s: unexpected message received\n",
+			  what);
+		return -EIO;
+	}
+	return 0;
+}
+
+
+int npe_load_firmware(struct npe *npe, const char *name, struct device *dev)
+{
+	const struct firmware *fw_entry;
+
+	struct dl_block {
+		u32 type;
+		u32 offset;
+	} *blk;
+
+	struct dl_image {
+		u32 magic;
+		u32 id;
+		u32 size;
+		union {
+			u32 data[0];
+			struct dl_block blocks[0];
+		};
+	} *image;
+
+	struct dl_codeblock {
+		u32 npe_addr;
+		u32 size;
+		u32 data[0];
+	} *cb;
+
+	int i, j, err, data_size, instr_size, blocks, table_end;
+	u32 cmd;
+
+	if ((err = request_firmware(&fw_entry, name, dev)) != 0)
+		return err;
+
+	err = -EINVAL;
+	if (fw_entry->size < sizeof(struct dl_image)) {
+		print_npe(KERN_ERR, npe, "incomplete firmware file\n");
+		goto err;
+	}
+	image = (struct dl_image*)fw_entry->data;
+
+#if DEBUG_FW
+	print_npe(KERN_DEBUG, npe, "firmware: %08X %08X %08X (0x%X bytes)\n",
+		  image->magic, image->id, image->size, image->size * 4);
+#endif
+
+	if (image->magic == swab32(FW_MAGIC)) { /* swapped file */
+		image->id = swab32(image->id);
+		image->size = swab32(image->size);
+	} else if (image->magic != FW_MAGIC) {
+		print_npe(KERN_ERR, npe, "bad firmware file magic: 0x%X\n",
+			  image->magic);
+		goto err;
+	}
+	if ((image->size * 4 + sizeof(struct dl_image)) != fw_entry->size) {
+		print_npe(KERN_ERR, npe,
+			  "inconsistent size of firmware file\n");
+		goto err;
+	}
+	if (((image->id >> 24) & 0xF /* NPE ID */) != npe->id) {
+		print_npe(KERN_ERR, npe, "firmware file NPE ID mismatch\n");
+		goto err;
+	}
+	if (image->magic == swab32(FW_MAGIC))
+		for (i = 0; i < image->size; i++)
+			image->data[i] = swab32(image->data[i]);
+
+	if (!cpu_is_ixp46x() && ((image->id >> 28) & 0xF /* device ID */)) {
+		print_npe(KERN_INFO, npe, "IXP46x firmware ignored on "
+			  "IXP42x\n");
+		goto err;
+	}
+
+	if (npe_running(npe)) {
+		print_npe(KERN_INFO, npe, "unable to load firmware, NPE is "
+			  "already running\n");
+		err = -EBUSY;
+		goto err;
+	}
+#if 0
+	npe_stop(npe);
+	npe_reset(npe);
+#endif
+
+	print_npe(KERN_INFO, npe, "firmware functionality 0x%X, "
+		  "revision 0x%X:%X\n", (image->id >> 16) & 0xFF,
+		  (image->id >> 8) & 0xFF, image->id & 0xFF);
+
+	if (!cpu_is_ixp46x()) {
+		if (!npe->id)
+			instr_size = NPE_A_42X_INSTR_SIZE;
+		else
+			instr_size = NPE_B_AND_C_42X_INSTR_SIZE;
+		data_size = NPE_42X_DATA_SIZE;
+	} else {
+		instr_size = NPE_46X_INSTR_SIZE;
+		data_size = NPE_46X_DATA_SIZE;
+	}
+
+	for (blocks = 0; blocks * sizeof(struct dl_block) / 4 < image->size;
+	     blocks++)
+		if (image->blocks[blocks].type == FW_BLOCK_TYPE_EOF)
+			break;
+	if (blocks * sizeof(struct dl_block) / 4 >= image->size) {
+		print_npe(KERN_INFO, npe, "firmware EOF block marker not "
+			  "found\n");
+		goto err;
+	}
+
+#if DEBUG_FW
+	print_npe(KERN_DEBUG, npe, "%i firmware blocks found\n", blocks);
+#endif
+
+	table_end = blocks * sizeof(struct dl_block) / 4 + 1 /* EOF marker */;
+	for (i = 0, blk = image->blocks; i < blocks; i++, blk++) {
+		if (blk->offset > image->size - sizeof(struct dl_codeblock) / 4
+		    || blk->offset < table_end) {
+			print_npe(KERN_INFO, npe, "invalid offset 0x%X of "
+				  "firmware block #%i\n", blk->offset, i);
+			goto err;
+		}
+
+		cb = (struct dl_codeblock*)&image->data[blk->offset];
+		if (blk->type == FW_BLOCK_TYPE_INSTR) {
+			if (cb->npe_addr + cb->size > instr_size)
+				goto too_big;
+			cmd = CMD_WR_INS_MEM;
+		} else if (blk->type == FW_BLOCK_TYPE_DATA) {
+			if (cb->npe_addr + cb->size > data_size)
+				goto too_big;
+			cmd = CMD_WR_DATA_MEM;
+		} else {
+			print_npe(KERN_INFO, npe, "invalid firmware block #%i "
+				  "type 0x%X\n", i, blk->type);
+			goto err;
+		}
+		if (blk->offset + sizeof(*cb) / 4 + cb->size > image->size) {
+			print_npe(KERN_INFO, npe, "firmware block #%i doesn't "
+				  "fit in firmware image: type %c, start 0x%X,"
+				  " length 0x%X\n", i,
+				  blk->type == FW_BLOCK_TYPE_INSTR ? 'I' : 'D',
+				  cb->npe_addr, cb->size);
+			goto err;
+		}
+
+		for (j = 0; j < cb->size; j++)
+			npe_cmd_write(npe, cb->npe_addr + j, cmd, cb->data[j]);
+	}
+
+	npe_start(npe);
+	if (!npe_running(npe))
+		print_npe(KERN_ERR, npe, "unable to start\n");
+	release_firmware(fw_entry);
+	return 0;
+
+too_big:
+	print_npe(KERN_INFO, npe, "firmware block #%i doesn't fit in NPE "
+		  "memory: type %c, start 0x%X, length 0x%X\n", i,
+		  blk->type == FW_BLOCK_TYPE_INSTR ? 'I' : 'D',
+		  cb->npe_addr, cb->size);
+err:
+	release_firmware(fw_entry);
+	return err;
+}
+
+
+struct npe *npe_request(int id)
+{
+	if (id < NPE_COUNT)
+		if (npe_tab[id].valid)
+			if (try_module_get(THIS_MODULE))
+				return &npe_tab[id];
+	return NULL;
+}
+
+void npe_release(struct npe *npe)
+{
+	module_put(THIS_MODULE);
+}
+
+
+static int __init npe_init_module(void)
+{
+
+	int i, found = 0;
+
+	for (i = 0; i < NPE_COUNT; i++) {
+		struct npe *npe = &npe_tab[i];
+		if (!(ixp4xx_read_feature_bits() &
+		      (IXP4XX_FEATURE_RESET_NPEA << i)))
+			continue; /* NPE already disabled or not present */
+		if (!(npe->mem_res = request_mem_region(npe->regs_phys,
+							REGS_SIZE,
+							npe_name(npe)))) {
+			print_npe(KERN_ERR, npe,
+				  "failed to request memory region\n");
+			continue;
+		}
+
+		if (npe_reset(npe))
+			continue;
+		npe->valid = 1;
+		found++;
+	}
+
+	if (!found)
+		return -ENOSYS;
+	return 0;
+}
+
+static void __exit npe_cleanup_module(void)
+{
+	int i;
+
+	for (i = 0; i < NPE_COUNT; i++)
+		if (npe_tab[i].mem_res) {
+			npe_reset(&npe_tab[i]);
+			release_resource(npe_tab[i].mem_res);
+		}
+}
+
+module_init(npe_init_module);
+module_exit(npe_cleanup_module);
+
+MODULE_AUTHOR("Krzysztof Halasa");
+MODULE_LICENSE("GPL v2");
+
+EXPORT_SYMBOL(npe_names);
+EXPORT_SYMBOL(npe_running);
+EXPORT_SYMBOL(npe_request);
+EXPORT_SYMBOL(npe_release);
+EXPORT_SYMBOL(npe_load_firmware);
+EXPORT_SYMBOL(npe_send_message);
+EXPORT_SYMBOL(npe_recv_message);
+EXPORT_SYMBOL(npe_send_recv_message);
diff --git a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
new file mode 100644
index 0000000..e833013
--- /dev/null
+++ b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
@@ -0,0 +1,274 @@
+/*
+ * Intel IXP4xx Queue Manager driver for Linux
+ *
+ * Copyright (C) 2007 Krzysztof Halasa <khc@pm.waw.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/arch/qmgr.h>
+
+#define DEBUG		0
+
+struct qmgr_regs __iomem *qmgr_regs;
+static struct resource *mem_res;
+static spinlock_t qmgr_lock;
+static u32 used_sram_bitmap[4]; /* 128 16-dword pages */
+static void (*irq_handlers[HALF_QUEUES])(void *pdev);
+static void *irq_pdevs[HALF_QUEUES];
+
+void qmgr_set_irq(unsigned int queue, int src,
+		  void (*handler)(void *pdev), void *pdev)
+{
+	u32 __iomem *reg = &qmgr_regs->irqsrc[queue / 8]; /* 8 queues / u32 */
+	int bit = (queue % 8) * 4; /* 3 bits + 1 reserved bit per queue */
+	unsigned long flags;
+
+	src &= 7;
+	spin_lock_irqsave(&qmgr_lock, flags);
+	__raw_writel((__raw_readl(reg) & ~(7 << bit)) | (src << bit), reg);
+	irq_handlers[queue] = handler;
+	irq_pdevs[queue] = pdev;
+	spin_unlock_irqrestore(&qmgr_lock, flags);
+}
+
+
+static irqreturn_t qmgr_irq1(int irq, void *pdev)
+{
+	int i;
+	u32 val = __raw_readl(&qmgr_regs->irqstat[0]);
+	__raw_writel(val, &qmgr_regs->irqstat[0]); /* ACK */
+
+	for (i = 0; i < HALF_QUEUES; i++)
+		if (val & (1 << i))
+			irq_handlers[i](irq_pdevs[i]);
+
+	return val ? IRQ_HANDLED : 0;
+}
+
+
+void qmgr_enable_irq(unsigned int queue)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&qmgr_lock, flags);
+	__raw_writel(__raw_readl(&qmgr_regs->irqen[0]) | (1 << queue),
+		     &qmgr_regs->irqen[0]);
+	spin_unlock_irqrestore(&qmgr_lock, flags);
+}
+
+void qmgr_disable_irq(unsigned int queue)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&qmgr_lock, flags);
+	__raw_writel(__raw_readl(&qmgr_regs->irqen[0]) & ~(1 << queue),
+		     &qmgr_regs->irqen[0]);
+	spin_unlock_irqrestore(&qmgr_lock, flags);
+}
+
+static inline void shift_mask(u32 *mask)
+{
+	mask[3] = mask[3] << 1 | mask[2] >> 31;
+	mask[2] = mask[2] << 1 | mask[1] >> 31;
+	mask[1] = mask[1] << 1 | mask[0] >> 31;
+	mask[0] <<= 1;
+}
+
+int qmgr_request_queue(unsigned int queue, unsigned int len /* dwords */,
+		       unsigned int nearly_empty_watermark,
+		       unsigned int nearly_full_watermark)
+{
+	u32 cfg, addr = 0, mask[4]; /* in 16-dwords */
+	int err;
+
+	if (queue >= HALF_QUEUES)
+		return -ERANGE;
+
+	if ((nearly_empty_watermark | nearly_full_watermark) & ~7)
+		return -EINVAL;
+
+	switch (len) {
+	case  16:
+		cfg = 0 << 24;
+		mask[0] = 0x1;
+		break;
+	case  32:
+		cfg = 1 << 24;
+		mask[0] = 0x3;
+		break;
+	case  64:
+		cfg = 2 << 24;
+		mask[0] = 0xF;
+		break;
+	case 128:
+		cfg = 3 << 24;
+		mask[0] = 0xFF;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	cfg |= nearly_empty_watermark << 26;
+	cfg |= nearly_full_watermark << 29;
+	len /= 16;		/* in 16-dwords: 1, 2, 4 or 8 */
+	mask[1] = mask[2] = mask[3] = 0;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	spin_lock_irq(&qmgr_lock);
+	if (__raw_readl(&qmgr_regs->sram[queue])) {
+		err = -EBUSY;
+		goto err;
+	}
+
+	while (1) {
+		if (!(used_sram_bitmap[0] & mask[0]) &&
+		    !(used_sram_bitmap[1] & mask[1]) &&
+		    !(used_sram_bitmap[2] & mask[2]) &&
+		    !(used_sram_bitmap[3] & mask[3]))
+			break; /* found free space */
+
+		addr++;
+		shift_mask(mask);
+		if (addr + len > ARRAY_SIZE(qmgr_regs->sram)) {
+			printk(KERN_ERR "qmgr: no free SRAM space for"
+			       " queue %i\n", queue);
+			err = -ENOMEM;
+			goto err;
+		}
+	}
+
+	used_sram_bitmap[0] |= mask[0];
+	used_sram_bitmap[1] |= mask[1];
+	used_sram_bitmap[2] |= mask[2];
+	used_sram_bitmap[3] |= mask[3];
+	__raw_writel(cfg | (addr << 14), &qmgr_regs->sram[queue]);
+	spin_unlock_irq(&qmgr_lock);
+
+#if DEBUG
+	printk(KERN_DEBUG "qmgr: requested queue %i, addr = 0x%02X\n",
+	       queue, addr);
+#endif
+	return 0;
+
+err:
+	spin_unlock_irq(&qmgr_lock);
+	module_put(THIS_MODULE);
+	return err;
+}
+
+void qmgr_release_queue(unsigned int queue)
+{
+	u32 cfg, addr, mask[4];
+
+	BUG_ON(queue >= HALF_QUEUES); /* not in valid range */
+
+	spin_lock_irq(&qmgr_lock);
+	cfg = __raw_readl(&qmgr_regs->sram[queue]);
+	addr = (cfg >> 14) & 0xFF;
+
+	BUG_ON(!addr);		/* not requested */
+
+	switch ((cfg >> 24) & 3) {
+	case 0: mask[0] = 0x1; break;
+	case 1: mask[0] = 0x3; break;
+	case 2: mask[0] = 0xF; break;
+	case 3: mask[0] = 0xFF; break;
+	}
+
+	while (addr--)
+		shift_mask(mask);
+
+	__raw_writel(0, &qmgr_regs->sram[queue]);
+
+	used_sram_bitmap[0] &= ~mask[0];
+	used_sram_bitmap[1] &= ~mask[1];
+	used_sram_bitmap[2] &= ~mask[2];
+	used_sram_bitmap[3] &= ~mask[3];
+	irq_handlers[queue] = NULL; /* catch IRQ bugs */
+	spin_unlock_irq(&qmgr_lock);
+
+	module_put(THIS_MODULE);
+#if DEBUG
+	printk(KERN_DEBUG "qmgr: released queue %i\n", queue);
+#endif
+}
+
+static int qmgr_init(void)
+{
+	int i, err;
+	mem_res = request_mem_region(IXP4XX_QMGR_BASE_PHYS,
+				     IXP4XX_QMGR_REGION_SIZE,
+				     "IXP4xx Queue Manager");
+	if (mem_res == NULL)
+		return -EBUSY;
+
+	qmgr_regs = ioremap(IXP4XX_QMGR_BASE_PHYS, IXP4XX_QMGR_REGION_SIZE);
+	if (qmgr_regs == NULL) {
+		err = -ENOMEM;
+		goto error_map;
+	}
+
+	/* reset qmgr registers */
+	for (i = 0; i < 4; i++) {
+		__raw_writel(0x33333333, &qmgr_regs->stat1[i]);
+		__raw_writel(0, &qmgr_regs->irqsrc[i]);
+	}
+	for (i = 0; i < 2; i++) {
+		__raw_writel(0, &qmgr_regs->stat2[i]);
+		__raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[i]); /* clear */
+		__raw_writel(0, &qmgr_regs->irqen[i]);
+	}
+
+	for (i = 0; i < QUEUES; i++)
+		__raw_writel(0, &qmgr_regs->sram[i]);
+
+	err = request_irq(IRQ_IXP4XX_QM1, qmgr_irq1, 0,
+			  "IXP4xx Queue Manager", NULL);
+	if (err) {
+		printk(KERN_ERR "qmgr: failed to request IRQ%i\n",
+		       IRQ_IXP4XX_QM1);
+		goto error_irq;
+	}
+
+	used_sram_bitmap[0] = 0xF; /* 4 first pages reserved for config */
+	spin_lock_init(&qmgr_lock);
+
+	printk(KERN_INFO "IXP4xx Queue Manager initialized.\n");
+	return 0;
+
+error_irq:
+	iounmap(qmgr_regs);
+error_map:
+	release_mem_region(IXP4XX_QMGR_BASE_PHYS, IXP4XX_QMGR_REGION_SIZE);
+	return err;
+}
+
+static void qmgr_remove(void)
+{
+	free_irq(IRQ_IXP4XX_QM1, NULL);
+	synchronize_irq(IRQ_IXP4XX_QM1);
+	iounmap(qmgr_regs);
+	release_mem_region(IXP4XX_QMGR_BASE_PHYS, IXP4XX_QMGR_REGION_SIZE);
+}
+
+module_init(qmgr_init);
+module_exit(qmgr_remove);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Krzysztof Halasa");
+
+EXPORT_SYMBOL(qmgr_regs);
+EXPORT_SYMBOL(qmgr_set_irq);
+EXPORT_SYMBOL(qmgr_enable_irq);
+EXPORT_SYMBOL(qmgr_disable_irq);
+EXPORT_SYMBOL(qmgr_request_queue);
+EXPORT_SYMBOL(qmgr_release_queue);
diff --git a/arch/arm/mach-ixp4xx/nas100d-power.c b/arch/arm/mach-ixp4xx/nas100d-power.c
deleted file mode 100644
index 29aa98d..0000000
--- a/arch/arm/mach-ixp4xx/nas100d-power.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * arch/arm/mach-ixp4xx/nas100d-power.c
- *
- * NAS 100d Power/Reset driver
- *
- * Copyright (C) 2005 Tower Technologies
- *
- * based on nas100d-io.c
- *  Copyright (C) 2004 Karen Spearel
- *
- * Author: Alessandro Zummo <a.zummo@towertech.it>
- * Maintainers: http://www.nslu2-linux.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/reboot.h>
-
-#include <asm/mach-types.h>
-
-static irqreturn_t nas100d_reset_handler(int irq, void *dev_id)
-{
-	/* Signal init to do the ctrlaltdel action, this will bypass init if
-	 * it hasn't started and do a kernel_restart.
-	 */
-	ctrl_alt_del();
-
-	return IRQ_HANDLED;
-}
-
-static int __init nas100d_power_init(void)
-{
-	if (!(machine_is_nas100d()))
-		return 0;
-
-	set_irq_type(NAS100D_RB_IRQ, IRQT_LOW);
-
-	if (request_irq(NAS100D_RB_IRQ, &nas100d_reset_handler,
-		IRQF_DISABLED, "NAS100D reset button", NULL) < 0) {
-
-		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
-			NAS100D_RB_IRQ);
-
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static void __exit nas100d_power_exit(void)
-{
-	if (!(machine_is_nas100d()))
-		return;
-
-	free_irq(NAS100D_RB_IRQ, NULL);
-}
-
-module_init(nas100d_power_init);
-module_exit(nas100d_power_exit);
-
-MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
-MODULE_DESCRIPTION("NAS100D Power/Reset driver");
-MODULE_LICENSE("GPL");
diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c
index 54d884f..4cecae8 100644
--- a/arch/arm/mach-ixp4xx/nas100d-setup.c
+++ b/arch/arm/mach-ixp4xx/nas100d-setup.c
@@ -3,8 +3,14 @@
  *
  * NAS 100d board-setup
  *
- * based ixdp425-setup.c:
+ * Copyright (C) 2008 Rod Whitby <rod@whitby.id.au>
+ *
+ * based on ixdp425-setup.c:
  *      Copyright (C) 2003-2004 MontaVista Software, Inc.
+ * based on nas100d-power.c:
+ *	Copyright (C) 2005 Tower Technologies
+ * based on nas100d-io.c
+ *	Copyright (C) 2004 Karen Spearel
  *
  * Author: Alessandro Zummo <a.zummo@towertech.it>
  * Author: Rod Whitby <rod@whitby.id.au>
@@ -12,15 +18,22 @@
  *
  */
 
-#include <linux/kernel.h>
+#include <linux/if_ether.h>
+#include <linux/irq.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
 #include <linux/serial.h>
 #include <linux/serial_8250.h>
 #include <linux/leds.h>
+#include <linux/reboot.h>
+#include <linux/i2c.h>
 #include <linux/i2c-gpio.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
+#include <asm/io.h>
+#include <asm/gpio.h>
 
 static struct flash_platform_data nas100d_flash_data = {
 	.map_name		= "cfi_probe",
@@ -39,35 +52,40 @@
 	.resource		= &nas100d_flash_resource,
 };
 
-#ifdef CONFIG_LEDS_IXP4XX
-static struct resource nas100d_led_resources[] = {
+static struct i2c_board_info __initdata nas100d_i2c_board_info [] = {
 	{
-		.name		= "wlan",   /* green led */
-		.start		= 0,
-		.end		= 0,
-		.flags		= IXP4XX_GPIO_LOW,
-	},
-	{
-		.name		= "ready",  /* blue power led (off is flashing!) */
-		.start		= 15,
-		.end		= 15,
-		.flags		= IXP4XX_GPIO_LOW,
-	},
-	{
-		.name		= "disk",   /* yellow led */
-		.start		= 3,
-		.end		= 3,
-		.flags		= IXP4XX_GPIO_LOW,
+		I2C_BOARD_INFO("rtc-pcf8563", 0x51),
 	},
 };
 
-static struct platform_device nas100d_leds = {
-	.name			= "IXP4XX-GPIO-LED",
-	.id			= -1,
-	.num_resources		= ARRAY_SIZE(nas100d_led_resources),
-	.resource		= nas100d_led_resources,
+static struct gpio_led nas100d_led_pins[] = {
+	{
+		.name		= "wlan",   /* green led */
+		.gpio		= NAS100D_LED_WLAN_GPIO,
+		.active_low	= true,
+	},
+	{
+		.name		= "power",  /* blue power led (off=flashing) */
+		.gpio		= NAS100D_LED_PWR_GPIO,
+		.active_low	= true,
+	},
+	{
+		.name		= "disk",   /* yellow led */
+		.gpio		= NAS100D_LED_DISK_GPIO,
+		.active_low	= true,
+	},
 };
-#endif
+
+static struct gpio_led_platform_data nas100d_led_data = {
+	.num_leds		= ARRAY_SIZE(nas100d_led_pins),
+	.leds			= nas100d_led_pins,
+};
+
+static struct platform_device nas100d_leds = {
+	.name			= "leds-gpio",
+	.id			= -1,
+	.dev.platform_data	= &nas100d_led_data,
+};
 
 static struct i2c_gpio_platform_data nas100d_i2c_gpio_data = {
 	.sda_pin		= NAS100D_SDA_PIN,
@@ -125,12 +143,28 @@
 	.resource		= nas100d_uart_resources,
 };
 
+/* Built-in 10/100 Ethernet MAC interfaces */
+static struct eth_plat_info nas100d_plat_eth[] = {
+	{
+		.phy		= 0,
+		.rxq		= 3,
+		.txreadyq	= 20,
+	}
+};
+
+static struct platform_device nas100d_eth[] = {
+	{
+		.name			= "ixp4xx_eth",
+		.id			= IXP4XX_ETH_NPEB,
+		.dev.platform_data	= nas100d_plat_eth,
+	}
+};
+
 static struct platform_device *nas100d_devices[] __initdata = {
 	&nas100d_i2c_gpio,
 	&nas100d_flash,
-#ifdef CONFIG_LEDS_IXP4XX
 	&nas100d_leds,
-#endif
+	&nas100d_eth[0],
 };
 
 static void nas100d_power_off(void)
@@ -144,8 +178,63 @@
 	gpio_line_set(NAS100D_PO_GPIO, IXP4XX_GPIO_HIGH);
 }
 
+/* This is used to make sure the power-button pusher is serious.  The button
+ * must be held until the value of this counter reaches zero.
+ */
+static int power_button_countdown;
+
+/* Must hold the button down for at least this many counts to be processed */
+#define PBUTTON_HOLDDOWN_COUNT 4 /* 2 secs */
+
+static void nas100d_power_handler(unsigned long data);
+static DEFINE_TIMER(nas100d_power_timer, nas100d_power_handler, 0, 0);
+
+static void nas100d_power_handler(unsigned long data)
+{
+	/* This routine is called twice per second to check the
+	 * state of the power button.
+	 */
+
+	if (gpio_get_value(NAS100D_PB_GPIO)) {
+
+		/* IO Pin is 1 (button pushed) */
+		if (power_button_countdown > 0)
+			power_button_countdown--;
+
+	} else {
+
+		/* Done on button release, to allow for auto-power-on mods. */
+		if (power_button_countdown == 0) {
+			/* Signal init to do the ctrlaltdel action,
+			 * this will bypass init if it hasn't started
+			 * and do a kernel_restart.
+			 */
+			ctrl_alt_del();
+
+			/* Change the state of the power LED to "blink" */
+			gpio_line_set(NAS100D_LED_PWR_GPIO, IXP4XX_GPIO_LOW);
+		} else {
+			power_button_countdown = PBUTTON_HOLDDOWN_COUNT;
+		}
+	}
+
+	mod_timer(&nas100d_power_timer, jiffies + msecs_to_jiffies(500));
+}
+
+static irqreturn_t nas100d_reset_handler(int irq, void *dev_id)
+{
+	/* This is the paper-clip reset, it shuts the machine down directly. */
+	machine_power_off();
+
+	return IRQ_HANDLED;
+}
+
 static void __init nas100d_init(void)
 {
+	DECLARE_MAC_BUF(mac_buf);
+	uint8_t __iomem *f;
+	int i;
+
 	ixp4xx_sys_init();
 
 	/* gpio 14 and 15 are _not_ clocks */
@@ -155,7 +244,8 @@
 	nas100d_flash_resource.end =
 		IXP4XX_EXP_BUS_BASE(0) + ixp4xx_exp_bus_size - 1;
 
-	pm_power_off = nas100d_power_off;
+	i2c_register_board_info(0, nas100d_i2c_board_info,
+				ARRAY_SIZE(nas100d_i2c_board_info));
 
 	/*
 	 * This is only useful on a modified machine, but it is valuable
@@ -165,6 +255,48 @@
 	(void)platform_device_register(&nas100d_uart);
 
 	platform_add_devices(nas100d_devices, ARRAY_SIZE(nas100d_devices));
+
+	pm_power_off = nas100d_power_off;
+
+	if (request_irq(gpio_to_irq(NAS100D_RB_GPIO), &nas100d_reset_handler,
+		IRQF_DISABLED | IRQF_TRIGGER_LOW,
+		"NAS100D reset button", NULL) < 0) {
+
+		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
+			gpio_to_irq(NAS100D_RB_GPIO));
+	}
+
+	/* The power button on the Iomega NAS100d is on GPIO 14, but
+	 * it cannot handle interrupts on that GPIO line.  So we'll
+	 * have to poll it with a kernel timer.
+	 */
+
+	/* Make sure that the power button GPIO is set up as an input */
+	gpio_line_config(NAS100D_PB_GPIO, IXP4XX_GPIO_IN);
+
+	/* Set the initial value for the power button IRQ handler */
+	power_button_countdown = PBUTTON_HOLDDOWN_COUNT;
+
+	mod_timer(&nas100d_power_timer, jiffies + msecs_to_jiffies(500));
+
+	/*
+	 * Map in a portion of the flash and read the MAC address.
+	 * Since it is stored in BE in the flash itself, we need to
+	 * byteswap it if we're in LE mode.
+	 */
+	f = ioremap(IXP4XX_EXP_BUS_BASE(0), 0x1000000);
+	if (f) {
+		for (i = 0; i < 6; i++)
+#ifdef __ARMEB__
+			nas100d_plat_eth[0].hwaddr[i] = readb(f + 0xFC0FD8 + i);
+#else
+			nas100d_plat_eth[0].hwaddr[i] = readb(f + 0xFC0FD8 + (i^3));
+#endif
+		iounmap(f);
+	}
+	printk(KERN_INFO "NAS100D: Using MAC address %s for port 0\n",
+	       print_mac(mac_buf, nas100d_plat_eth[0].hwaddr));
+
 }
 
 MACHINE_START(NAS100D, "Iomega NAS 100d")
diff --git a/arch/arm/mach-ixp4xx/nslu2-power.c b/arch/arm/mach-ixp4xx/nslu2-power.c
deleted file mode 100644
index 6f10dc20..0000000
--- a/arch/arm/mach-ixp4xx/nslu2-power.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * arch/arm/mach-ixp4xx/nslu2-power.c
- *
- * NSLU2 Power/Reset driver
- *
- * Copyright (C) 2005 Tower Technologies
- *
- * based on nslu2-io.c
- *  Copyright (C) 2004 Karen Spearel
- *
- * Author: Alessandro Zummo <a.zummo@towertech.it>
- * Maintainers: http://www.nslu2-linux.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/reboot.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-
-#include <asm/mach-types.h>
-
-static irqreturn_t nslu2_power_handler(int irq, void *dev_id)
-{
-	/* Signal init to do the ctrlaltdel action, this will bypass init if
-	 * it hasn't started and do a kernel_restart.
-	 */
-	ctrl_alt_del();
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t nslu2_reset_handler(int irq, void *dev_id)
-{
-	/* This is the paper-clip reset, it shuts the machine down directly.
-	 */
-	machine_power_off();
-
-	return IRQ_HANDLED;
-}
-
-static int __init nslu2_power_init(void)
-{
-	if (!(machine_is_nslu2()))
-		return 0;
-
-	*IXP4XX_GPIO_GPISR = 0x20400000;	/* read the 2 irqs to clr */
-
-	set_irq_type(NSLU2_RB_IRQ, IRQT_LOW);
-	set_irq_type(NSLU2_PB_IRQ, IRQT_HIGH);
-
-	if (request_irq(NSLU2_RB_IRQ, &nslu2_reset_handler,
-		IRQF_DISABLED, "NSLU2 reset button", NULL) < 0) {
-
-		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
-			NSLU2_RB_IRQ);
-
-		return -EIO;
-	}
-
-	if (request_irq(NSLU2_PB_IRQ, &nslu2_power_handler,
-		IRQF_DISABLED, "NSLU2 power button", NULL) < 0) {
-
-		printk(KERN_DEBUG "Power Button IRQ %d not available\n",
-			NSLU2_PB_IRQ);
-
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static void __exit nslu2_power_exit(void)
-{
-	if (!(machine_is_nslu2()))
-		return;
-
-	free_irq(NSLU2_RB_IRQ, NULL);
-	free_irq(NSLU2_PB_IRQ, NULL);
-}
-
-module_init(nslu2_power_init);
-module_exit(nslu2_power_exit);
-
-MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
-MODULE_DESCRIPTION("NSLU2 Power/Reset driver");
-MODULE_LICENSE("GPL");
diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c
index 77277d2..acaebcb 100644
--- a/arch/arm/mach-ixp4xx/nslu2-setup.c
+++ b/arch/arm/mach-ixp4xx/nslu2-setup.c
@@ -3,27 +3,35 @@
  *
  * NSLU2 board-setup
  *
- * based ixdp425-setup.c:
+ * Copyright (C) 2008 Rod Whitby <rod@whitby.id.au>
+ *
+ * based on ixdp425-setup.c:
  *      Copyright (C) 2003-2004 MontaVista Software, Inc.
+ * based on nslu2-power.c:
+ *	Copyright (C) 2005 Tower Technologies
  *
  * Author: Mark Rakes <mrakes at mac.com>
  * Author: Rod Whitby <rod@whitby.id.au>
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
  * Maintainers: http://www.nslu2-linux.org/
  *
- * Fixed missing init_time in MACHINE_START kas11 10/22/04
- * Changed to conform to new style __init ixdp425 kas11 10/22/04
  */
 
-#include <linux/kernel.h>
+#include <linux/if_ether.h>
+#include <linux/irq.h>
 #include <linux/serial.h>
 #include <linux/serial_8250.h>
 #include <linux/leds.h>
+#include <linux/reboot.h>
+#include <linux/i2c.h>
 #include <linux/i2c-gpio.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/time.h>
+#include <asm/io.h>
+#include <asm/gpio.h>
 
 static struct flash_platform_data nslu2_flash_data = {
 	.map_name		= "cfi_probe",
@@ -47,41 +55,43 @@
 	.scl_pin		= NSLU2_SCL_PIN,
 };
 
-#ifdef CONFIG_LEDS_IXP4XX
-static struct resource nslu2_led_resources[] = {
+static struct i2c_board_info __initdata nslu2_i2c_board_info [] = {
 	{
-		.name		= "ready",  /* green led */
-		.start		= NSLU2_LED_GRN_GPIO,
-		.end		= NSLU2_LED_GRN_GPIO,
-		.flags		= IXP4XX_GPIO_HIGH,
-	},
-	{
-		.name		= "status", /* red led */
-		.start		= NSLU2_LED_RED_GPIO,
-		.end		= NSLU2_LED_RED_GPIO,
-		.flags		= IXP4XX_GPIO_HIGH,
-	},
-	{
-		.name		= "disk-1",
-		.start		= NSLU2_LED_DISK1_GPIO,
-		.end		= NSLU2_LED_DISK1_GPIO,
-		.flags		= IXP4XX_GPIO_LOW,
-	},
-	{
-		.name		= "disk-2",
-		.start		= NSLU2_LED_DISK2_GPIO,
-		.end		= NSLU2_LED_DISK2_GPIO,
-		.flags		= IXP4XX_GPIO_LOW,
+		I2C_BOARD_INFO("rtc-x1205", 0x6f),
 	},
 };
 
-static struct platform_device nslu2_leds = {
-	.name			= "IXP4XX-GPIO-LED",
-	.id			= -1,
-	.num_resources		= ARRAY_SIZE(nslu2_led_resources),
-	.resource		= nslu2_led_resources,
+static struct gpio_led nslu2_led_pins[] = {
+	{
+		.name		= "ready",  /* green led */
+		.gpio		= NSLU2_LED_GRN_GPIO,
+	},
+	{
+		.name		= "status", /* red led */
+		.gpio		= NSLU2_LED_RED_GPIO,
+	},
+	{
+		.name		= "disk-1",
+		.gpio		= NSLU2_LED_DISK1_GPIO,
+		.active_low	= true,
+	},
+	{
+		.name		= "disk-2",
+		.gpio		= NSLU2_LED_DISK2_GPIO,
+		.active_low	= true,
+	},
 };
-#endif
+
+static struct gpio_led_platform_data nslu2_led_data = {
+	.num_leds		= ARRAY_SIZE(nslu2_led_pins),
+	.leds			= nslu2_led_pins,
+};
+
+static struct platform_device nslu2_leds = {
+	.name			= "leds-gpio",
+	.id			= -1,
+	.dev.platform_data	= &nslu2_led_data,
+};
 
 static struct platform_device nslu2_i2c_gpio = {
 	.name			= "i2c-gpio",
@@ -140,13 +150,29 @@
 	.resource		= nslu2_uart_resources,
 };
 
+/* Built-in 10/100 Ethernet MAC interfaces */
+static struct eth_plat_info nslu2_plat_eth[] = {
+	{
+		.phy		= 1,
+		.rxq		= 3,
+		.txreadyq	= 20,
+	}
+};
+
+static struct platform_device nslu2_eth[] = {
+	{
+		.name			= "ixp4xx_eth",
+		.id			= IXP4XX_ETH_NPEB,
+		.dev.platform_data	= nslu2_plat_eth,
+	}
+};
+
 static struct platform_device *nslu2_devices[] __initdata = {
 	&nslu2_i2c_gpio,
 	&nslu2_flash,
 	&nslu2_beeper,
-#ifdef CONFIG_LEDS_IXP4XX
 	&nslu2_leds,
-#endif
+	&nslu2_eth[0],
 };
 
 static void nslu2_power_off(void)
@@ -160,6 +186,25 @@
 	gpio_line_set(NSLU2_PO_GPIO, IXP4XX_GPIO_HIGH);
 }
 
+static irqreturn_t nslu2_power_handler(int irq, void *dev_id)
+{
+	/* Signal init to do the ctrlaltdel action, this will bypass init if
+	 * it hasn't started and do a kernel_restart.
+	 */
+	ctrl_alt_del();
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t nslu2_reset_handler(int irq, void *dev_id)
+{
+	/* This is the paper-clip reset, it shuts the machine down directly.
+	 */
+	machine_power_off();
+
+	return IRQ_HANDLED;
+}
+
 static void __init nslu2_timer_init(void)
 {
     /* The xtal on this machine is non-standard. */
@@ -175,13 +220,18 @@
 
 static void __init nslu2_init(void)
 {
+	DECLARE_MAC_BUF(mac_buf);
+	uint8_t __iomem *f;
+	int i;
+
 	ixp4xx_sys_init();
 
 	nslu2_flash_resource.start = IXP4XX_EXP_BUS_BASE(0);
 	nslu2_flash_resource.end =
 		IXP4XX_EXP_BUS_BASE(0) + ixp4xx_exp_bus_size - 1;
 
-	pm_power_off = nslu2_power_off;
+	i2c_register_board_info(0, nslu2_i2c_board_info,
+				ARRAY_SIZE(nslu2_i2c_board_info));
 
 	/*
 	 * This is only useful on a modified machine, but it is valuable
@@ -191,6 +241,43 @@
 	(void)platform_device_register(&nslu2_uart);
 
 	platform_add_devices(nslu2_devices, ARRAY_SIZE(nslu2_devices));
+
+	pm_power_off = nslu2_power_off;
+
+	if (request_irq(gpio_to_irq(NSLU2_RB_GPIO), &nslu2_reset_handler,
+		IRQF_DISABLED | IRQF_TRIGGER_LOW,
+		"NSLU2 reset button", NULL) < 0) {
+
+		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
+			gpio_to_irq(NSLU2_RB_GPIO));
+	}
+
+	if (request_irq(gpio_to_irq(NSLU2_PB_GPIO), &nslu2_power_handler,
+		IRQF_DISABLED | IRQF_TRIGGER_HIGH,
+		"NSLU2 power button", NULL) < 0) {
+
+		printk(KERN_DEBUG "Power Button IRQ %d not available\n",
+			gpio_to_irq(NSLU2_PB_GPIO));
+	}
+
+	/*
+	 * Map in a portion of the flash and read the MAC address.
+	 * Since it is stored in BE in the flash itself, we need to
+	 * byteswap it if we're in LE mode.
+	 */
+	f = ioremap(IXP4XX_EXP_BUS_BASE(0), 0x40000);
+	if (f) {
+		for (i = 0; i < 6; i++)
+#ifdef __ARMEB__
+			nslu2_plat_eth[0].hwaddr[i] = readb(f + 0x3FFB0 + i);
+#else
+			nslu2_plat_eth[0].hwaddr[i] = readb(f + 0x3FFB0 + (i^3));
+#endif
+		iounmap(f);
+	}
+	printk(KERN_INFO "NSLU2: Using MAC address %s for port 0\n",
+	       print_mac(mac_buf, nslu2_plat_eth[0].hwaddr));
+
 }
 
 MACHINE_START(NSLU2, "Linksys NSLU2")
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index b5c916c..6e0c4f5 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -3,10 +3,11 @@
 #
 
 # Common support (must be linked before board specific support)
-obj-y				+= clock.o devices.o generic.o irq.o dma.o time.o
+obj-y				+= clock.o devices.o generic.o irq.o dma.o \
+				   time.o gpio.o
 obj-$(CONFIG_PXA25x)		+= pxa25x.o
 obj-$(CONFIG_PXA27x)		+= pxa27x.o
-obj-$(CONFIG_PXA3xx)		+= pxa3xx.o mfp.o
+obj-$(CONFIG_PXA3xx)		+= pxa3xx.o mfp.o smemc.o
 obj-$(CONFIG_CPU_PXA300)	+= pxa300.o
 obj-$(CONFIG_CPU_PXA320)	+= pxa320.o
 
diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c
index 28cfd71..6012177 100644
--- a/arch/arm/mach-pxa/cm-x270.c
+++ b/arch/arm/mach-pxa/cm-x270.c
@@ -29,6 +29,7 @@
 #include <asm/mach/map.h>
 
 #include <asm/arch/pxa-regs.h>
+#include <asm/arch/pxa2xx-regs.h>
 #include <asm/arch/pxafb.h>
 #include <asm/arch/ohci.h>
 #include <asm/arch/mmc.h>
diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index 50ff453..bfccb80 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -10,6 +10,7 @@
 #include <asm/arch/mmc.h>
 #include <asm/arch/irda.h>
 #include <asm/arch/i2c.h>
+#include <asm/arch/ohci.h>
 
 #include "devices.h"
 
diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c
index 698aeec..80721c6 100644
--- a/arch/arm/mach-pxa/generic.c
+++ b/arch/arm/mach-pxa/generic.c
@@ -23,6 +23,7 @@
 #include <linux/ioport.h>
 #include <linux/pm.h>
 #include <linux/string.h>
+#include <linux/sysdev.h>
 
 #include <asm/hardware.h>
 #include <asm/irq.h>
@@ -31,7 +32,6 @@
 #include <asm/mach/map.h>
 
 #include <asm/arch/pxa-regs.h>
-#include <asm/arch/gpio.h>
 
 #include "generic.h"
 
@@ -66,97 +66,6 @@
 EXPORT_SYMBOL(get_memclk_frequency_10khz);
 
 /*
- * Handy function to set GPIO alternate functions
- */
-int pxa_last_gpio;
-
-int pxa_gpio_mode(int gpio_mode)
-{
-	unsigned long flags;
-	int gpio = gpio_mode & GPIO_MD_MASK_NR;
-	int fn = (gpio_mode & GPIO_MD_MASK_FN) >> 8;
-	int gafr;
-
-	if (gpio > pxa_last_gpio)
-		return -EINVAL;
-
-	local_irq_save(flags);
-	if (gpio_mode & GPIO_DFLT_LOW)
-		GPCR(gpio) = GPIO_bit(gpio);
-	else if (gpio_mode & GPIO_DFLT_HIGH)
-		GPSR(gpio) = GPIO_bit(gpio);
-	if (gpio_mode & GPIO_MD_MASK_DIR)
-		GPDR(gpio) |= GPIO_bit(gpio);
-	else
-		GPDR(gpio) &= ~GPIO_bit(gpio);
-	gafr = GAFR(gpio) & ~(0x3 << (((gpio) & 0xf)*2));
-	GAFR(gpio) = gafr |  (fn  << (((gpio) & 0xf)*2));
-	local_irq_restore(flags);
-
-	return 0;
-}
-
-EXPORT_SYMBOL(pxa_gpio_mode);
-
-int gpio_direction_input(unsigned gpio)
-{
-	unsigned long flags;
-	u32 mask;
-
-	if (gpio > pxa_last_gpio)
-		return -EINVAL;
-
-	mask = GPIO_bit(gpio);
-	local_irq_save(flags);
-	GPDR(gpio) &= ~mask;
-	local_irq_restore(flags);
-
-	return 0;
-}
-EXPORT_SYMBOL(gpio_direction_input);
-
-int gpio_direction_output(unsigned gpio, int value)
-{
-	unsigned long flags;
-	u32 mask;
-
-	if (gpio > pxa_last_gpio)
-		return -EINVAL;
-
-	mask = GPIO_bit(gpio);
-	local_irq_save(flags);
-	if (value)
-		GPSR(gpio) = mask;
-	else
-		GPCR(gpio) = mask;
-	GPDR(gpio) |= mask;
-	local_irq_restore(flags);
-
-	return 0;
-}
-EXPORT_SYMBOL(gpio_direction_output);
-
-/*
- * Return GPIO level
- */
-int pxa_gpio_get_value(unsigned gpio)
-{
-	return __gpio_get_value(gpio);
-}
-
-EXPORT_SYMBOL(pxa_gpio_get_value);
-
-/*
- * Set output GPIO level
- */
-void pxa_gpio_set_value(unsigned gpio, int value)
-{
-	__gpio_set_value(gpio, value);
-}
-
-EXPORT_SYMBOL(pxa_gpio_set_value);
-
-/*
  * Routine to safely enable or disable a clock in the CKEN
  */
 void __pxa_set_cken(int clock, int enable)
@@ -171,7 +80,6 @@
 
 	local_irq_restore(flags);
 }
-
 EXPORT_SYMBOL(__pxa_set_cken);
 
 /*
@@ -226,3 +134,59 @@
 	iotable_init(standard_io_desc, ARRAY_SIZE(standard_io_desc));
 	get_clk_frequency_khz(1);
 }
+
+#ifdef CONFIG_PM
+
+static unsigned long saved_gplr[4];
+static unsigned long saved_gpdr[4];
+static unsigned long saved_grer[4];
+static unsigned long saved_gfer[4];
+
+static int pxa_gpio_suspend(struct sys_device *dev, pm_message_t state)
+{
+	int i, gpio;
+
+	for (gpio = 0, i = 0; gpio < pxa_last_gpio; gpio += 32, i++) {
+		saved_gplr[i] = GPLR(gpio);
+		saved_gpdr[i] = GPDR(gpio);
+		saved_grer[i] = GRER(gpio);
+		saved_gfer[i] = GFER(gpio);
+
+		/* Clear GPIO transition detect bits */
+		GEDR(gpio) = GEDR(gpio);
+	}
+	return 0;
+}
+
+static int pxa_gpio_resume(struct sys_device *dev)
+{
+	int i, gpio;
+
+	for (gpio = 0, i = 0; gpio < pxa_last_gpio; gpio += 32, i++) {
+		/* restore level with set/clear */
+		GPSR(gpio) = saved_gplr[i];
+		GPCR(gpio) = ~saved_gplr[i];
+
+		GRER(gpio) = saved_grer[i];
+		GFER(gpio) = saved_gfer[i];
+		GPDR(gpio) = saved_gpdr[i];
+	}
+	return 0;
+}
+#else
+#define pxa_gpio_suspend	NULL
+#define pxa_gpio_resume		NULL
+#endif
+
+struct sysdev_class pxa_gpio_sysclass = {
+	.name		= "gpio",
+	.suspend	= pxa_gpio_suspend,
+	.resume		= pxa_gpio_resume,
+};
+
+static int __init pxa_gpio_init(void)
+{
+	return sysdev_class_register(&pxa_gpio_sysclass);
+}
+
+core_initcall(pxa_gpio_init);
diff --git a/arch/arm/mach-pxa/generic.h b/arch/arm/mach-pxa/generic.h
index b30f240..b3d10b0 100644
--- a/arch/arm/mach-pxa/generic.h
+++ b/arch/arm/mach-pxa/generic.h
@@ -16,6 +16,7 @@
 extern void __init pxa_init_irq_high(void);
 extern void __init pxa_init_irq_gpio(int gpio_nr);
 extern void __init pxa_init_irq_set_wake(int (*set_wake)(unsigned int, unsigned int));
+extern void __init pxa_init_gpio(int gpio_nr);
 extern void __init pxa25x_init_irq(void);
 extern void __init pxa27x_init_irq(void);
 extern void __init pxa3xx_init_irq(void);
@@ -52,3 +53,6 @@
 #define pxa3xx_get_clk_frequency_khz(x)		(0)
 #define pxa3xx_get_memclk_frequency_10khz()	(0)
 #endif
+
+extern struct sysdev_class pxa_irq_sysclass;
+extern struct sysdev_class pxa_gpio_sysclass;
diff --git a/arch/arm/mach-pxa/gpio.c b/arch/arm/mach-pxa/gpio.c
new file mode 100644
index 0000000..8638dd7
--- /dev/null
+++ b/arch/arm/mach-pxa/gpio.c
@@ -0,0 +1,197 @@
+/*
+ *  linux/arch/arm/mach-pxa/gpio.c
+ *
+ *  Generic PXA GPIO handling
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Jun 15, 2001
+ *  Copyright:	MontaVista Software Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/gpio.h>
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/arch/pxa-regs.h>
+
+#include "generic.h"
+
+
+struct pxa_gpio_chip {
+	struct gpio_chip chip;
+	void __iomem     *regbase;
+};
+
+int pxa_last_gpio;
+
+/*
+ * Configure pins for GPIO or other functions
+ */
+int pxa_gpio_mode(int gpio_mode)
+{
+	unsigned long flags;
+	int gpio = gpio_mode & GPIO_MD_MASK_NR;
+	int fn = (gpio_mode & GPIO_MD_MASK_FN) >> 8;
+	int gafr;
+
+	if (gpio > pxa_last_gpio)
+		return -EINVAL;
+
+	local_irq_save(flags);
+	if (gpio_mode & GPIO_DFLT_LOW)
+		GPCR(gpio) = GPIO_bit(gpio);
+	else if (gpio_mode & GPIO_DFLT_HIGH)
+		GPSR(gpio) = GPIO_bit(gpio);
+	if (gpio_mode & GPIO_MD_MASK_DIR)
+		GPDR(gpio) |= GPIO_bit(gpio);
+	else
+		GPDR(gpio) &= ~GPIO_bit(gpio);
+	gafr = GAFR(gpio) & ~(0x3 << (((gpio) & 0xf)*2));
+	GAFR(gpio) = gafr |  (fn  << (((gpio) & 0xf)*2));
+	local_irq_restore(flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(pxa_gpio_mode);
+
+static int pxa_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	unsigned long        flags;
+	u32                  mask = 1 << offset;
+	u32                  value;
+	struct pxa_gpio_chip *pxa;
+	void __iomem         *gpdr;
+
+	pxa = container_of(chip, struct pxa_gpio_chip, chip);
+	gpdr = pxa->regbase + GPDR_OFFSET;
+	local_irq_save(flags);
+	value = __raw_readl(gpdr);
+	value &= ~mask;
+	__raw_writel(value, gpdr);
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+static int pxa_gpio_direction_output(struct gpio_chip *chip,
+					unsigned offset, int value)
+{
+	unsigned long        flags;
+	u32                  mask = 1 << offset;
+	u32                  tmp;
+	struct pxa_gpio_chip *pxa;
+	void __iomem         *gpdr;
+
+	pxa = container_of(chip, struct pxa_gpio_chip, chip);
+	__raw_writel(mask,
+			pxa->regbase + (value ? GPSR_OFFSET : GPCR_OFFSET));
+	gpdr = pxa->regbase + GPDR_OFFSET;
+	local_irq_save(flags);
+	tmp = __raw_readl(gpdr);
+	tmp |= mask;
+	__raw_writel(tmp, gpdr);
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+/*
+ * Return GPIO level
+ */
+static int pxa_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	u32                  mask = 1 << offset;
+	struct pxa_gpio_chip *pxa;
+
+	pxa = container_of(chip, struct pxa_gpio_chip, chip);
+	return __raw_readl(pxa->regbase + GPLR_OFFSET) & mask;
+}
+
+/*
+ * Set output GPIO level
+ */
+static void pxa_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	u32                  mask = 1 << offset;
+	struct pxa_gpio_chip *pxa;
+
+	pxa = container_of(chip, struct pxa_gpio_chip, chip);
+
+	if (value)
+		__raw_writel(mask, pxa->regbase + GPSR_OFFSET);
+	else
+		__raw_writel(mask, pxa->regbase + GPCR_OFFSET);
+}
+
+static struct pxa_gpio_chip pxa_gpio_chip[] = {
+	[0] = {
+		.regbase = GPIO0_BASE,
+		.chip = {
+			.label            = "gpio-0",
+			.direction_input  = pxa_gpio_direction_input,
+			.direction_output = pxa_gpio_direction_output,
+			.get              = pxa_gpio_get,
+			.set              = pxa_gpio_set,
+			.base             = 0,
+			.ngpio            = 32,
+		},
+	},
+	[1] = {
+		.regbase = GPIO1_BASE,
+		.chip = {
+			.label            = "gpio-1",
+			.direction_input  = pxa_gpio_direction_input,
+			.direction_output = pxa_gpio_direction_output,
+			.get              = pxa_gpio_get,
+			.set              = pxa_gpio_set,
+			.base             = 32,
+			.ngpio            = 32,
+		},
+	},
+	[2] = {
+		.regbase = GPIO2_BASE,
+		.chip = {
+			.label            = "gpio-2",
+			.direction_input  = pxa_gpio_direction_input,
+			.direction_output = pxa_gpio_direction_output,
+			.get              = pxa_gpio_get,
+			.set              = pxa_gpio_set,
+			.base             = 64,
+			.ngpio            = 32, /* 21 for PXA25x */
+		},
+	},
+#if defined(CONFIG_PXA27x) || defined(CONFIG_PXA3xx)
+	[3] = {
+		.regbase = GPIO3_BASE,
+		.chip = {
+			.label            = "gpio-3",
+			.direction_input  = pxa_gpio_direction_input,
+			.direction_output = pxa_gpio_direction_output,
+			.get              = pxa_gpio_get,
+			.set              = pxa_gpio_set,
+			.base             = 96,
+			.ngpio            = 32,
+		},
+	},
+#endif
+};
+
+void __init pxa_init_gpio(int gpio_nr)
+{
+	int i;
+
+	/* add a GPIO chip for each register bank.
+	 * the last PXA25x register only contains 21 GPIOs
+	 */
+	for (i = 0; i < gpio_nr; i += 32) {
+		if (i+32 > gpio_nr)
+			pxa_gpio_chip[i/32].chip.ngpio = gpio_nr - i;
+		gpiochip_add(&pxa_gpio_chip[i/32].chip);
+	}
+}
diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index 07acb45..36c6a68 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/sysdev.h>
 
 #include <asm/hardware.h>
 #include <asm/irq.h>
@@ -310,6 +311,8 @@
 	/* Install handler for GPIO>=2 edge detect interrupts */
 	set_irq_chip(IRQ_GPIO_2_x, &pxa_internal_chip_low);
 	set_irq_chained_handler(IRQ_GPIO_2_x, pxa_gpio_demux_handler);
+
+	pxa_init_gpio(gpio_nr);
 }
 
 void __init pxa_init_irq_set_wake(int (*set_wake)(unsigned int, unsigned int))
@@ -321,3 +324,64 @@
 	pxa_low_gpio_chip.set_wake = set_wake;
 	pxa_muxed_gpio_chip.set_wake = set_wake;
 }
+
+#ifdef CONFIG_PM
+static unsigned long saved_icmr[2];
+
+static int pxa_irq_suspend(struct sys_device *dev, pm_message_t state)
+{
+	switch (dev->id) {
+	case 0:
+		saved_icmr[0] = ICMR;
+		ICMR = 0;
+		break;
+#if defined(CONFIG_PXA27x) || defined(CONFIG_PXA3xx)
+	case 1:
+		saved_icmr[1] = ICMR2;
+		ICMR2 = 0;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int pxa_irq_resume(struct sys_device *dev)
+{
+	switch (dev->id) {
+	case 0:
+		ICMR = saved_icmr[0];
+		ICLR = 0;
+		ICCR = 1;
+		break;
+#if defined(CONFIG_PXA27x) || defined(CONFIG_PXA3xx)
+	case 1:
+		ICMR2 = saved_icmr[1];
+		ICLR2 = 0;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+#define pxa_irq_suspend		NULL
+#define pxa_irq_resume		NULL
+#endif
+
+struct sysdev_class pxa_irq_sysclass = {
+	.name		= "irq",
+	.suspend	= pxa_irq_suspend,
+	.resume		= pxa_irq_resume,
+};
+
+static int __init pxa_irq_init(void)
+{
+	return sysdev_class_register(&pxa_irq_sysclass);
+}
+
+core_initcall(pxa_irq_init);
diff --git a/arch/arm/mach-pxa/mfp.c b/arch/arm/mach-pxa/mfp.c
index ec1b2d8..f5809ad 100644
--- a/arch/arm/mach-pxa/mfp.c
+++ b/arch/arm/mach-pxa/mfp.c
@@ -22,6 +22,7 @@
 #include <asm/hardware.h>
 #include <asm/arch/mfp.h>
 #include <asm/arch/mfp-pxa3xx.h>
+#include <asm/arch/pxa3xx-regs.h>
 
 /* mfp_spin_lock is used to ensure that MFP register configuration
  * (most likely a read-modify-write operation) is atomic, and that
@@ -223,11 +224,19 @@
 		struct pxa3xx_mfp_pin *p = &mfp_table[pin];
 		__mfp_config_run(p);
 	}
+
+	/* clear RDH bit when MFP settings are restored
+	 *
+	 * NOTE: the last 3 bits DxS are write-1-to-clear so carefully
+	 * preserve them here in case they will be referenced later
+	 */
+	ASCR &= ~(ASCR_RDH | ASCR_D1S | ASCR_D2S | ASCR_D3S);
+
 	return 0;
 }
 
 static struct sysdev_class mfp_sysclass = {
-	set_kset_name("mfp"),
+	.name		= "mfp",
 	.suspend	= pxa3xx_mfp_suspend,
 	.resume 	= pxa3xx_mfp_resume,
 };
diff --git a/arch/arm/mach-pxa/pcm027.c b/arch/arm/mach-pxa/pcm027.c
index 540c3bb..c14696b9 100644
--- a/arch/arm/mach-pxa/pcm027.c
+++ b/arch/arm/mach-pxa/pcm027.c
@@ -29,6 +29,7 @@
 #include <asm/mach/arch.h>
 #include <asm/arch/hardware.h>
 #include <asm/arch/pxa-regs.h>
+#include <asm/arch/pxa2xx-regs.h>
 #include <asm/arch/pxa2xx_spi.h>
 #include <asm/arch/pcm027.h>
 #include "generic.h"
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index dd54496..209eabf 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -164,7 +164,7 @@
 	},
 };
 
-static unsigned long poodle_get_hsync_len(void)
+static unsigned long poodle_get_hsync_invperiod(void)
 {
 	return 0;
 }
@@ -174,9 +174,9 @@
 }
 
 static struct corgits_machinfo  poodle_ts_machinfo = {
-	.get_hsync_len   = poodle_get_hsync_len,
-	.put_hsync       = poodle_null_hsync,
-	.wait_hsync      = poodle_null_hsync,
+	.get_hsync_invperiod	= poodle_get_hsync_invperiod,
+	.put_hsync       	= poodle_null_hsync,
+	.wait_hsync      	= poodle_null_hsync,
 };
 
 static struct platform_device poodle_ts_device = {
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index ddd05bf..599e53f 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/suspend.h>
+#include <linux/sysdev.h>
 
 #include <asm/hardware.h>
 #include <asm/arch/irqs.h>
@@ -141,11 +142,6 @@
 #define SAVE(x)		sleep_save[SLEEP_SAVE_##x] = x
 #define RESTORE(x)	x = sleep_save[SLEEP_SAVE_##x]
 
-#define RESTORE_GPLEVEL(n) do { \
-	GPSR##n = sleep_save[SLEEP_SAVE_GPLR##n]; \
-	GPCR##n = ~sleep_save[SLEEP_SAVE_GPLR##n]; \
-} while (0)
-
 /*
  * List of global PXA peripheral registers to preserve.
  * More ones like CP and general purpose register values are preserved
@@ -153,10 +149,6 @@
  */
 enum {	SLEEP_SAVE_START = 0,
 
-	SLEEP_SAVE_GPLR0, SLEEP_SAVE_GPLR1, SLEEP_SAVE_GPLR2,
-	SLEEP_SAVE_GPDR0, SLEEP_SAVE_GPDR1, SLEEP_SAVE_GPDR2,
-	SLEEP_SAVE_GRER0, SLEEP_SAVE_GRER1, SLEEP_SAVE_GRER2,
-	SLEEP_SAVE_GFER0, SLEEP_SAVE_GFER1, SLEEP_SAVE_GFER2,
 	SLEEP_SAVE_PGSR0, SLEEP_SAVE_PGSR1, SLEEP_SAVE_PGSR2,
 
 	SLEEP_SAVE_GAFR0_L, SLEEP_SAVE_GAFR0_U,
@@ -165,7 +157,6 @@
 
 	SLEEP_SAVE_PSTR,
 
-	SLEEP_SAVE_ICMR,
 	SLEEP_SAVE_CKEN,
 
 	SLEEP_SAVE_SIZE
@@ -174,17 +165,12 @@
 
 static void pxa25x_cpu_pm_save(unsigned long *sleep_save)
 {
-	SAVE(GPLR0); SAVE(GPLR1); SAVE(GPLR2);
-	SAVE(GPDR0); SAVE(GPDR1); SAVE(GPDR2);
-	SAVE(GRER0); SAVE(GRER1); SAVE(GRER2);
-	SAVE(GFER0); SAVE(GFER1); SAVE(GFER2);
 	SAVE(PGSR0); SAVE(PGSR1); SAVE(PGSR2);
 
 	SAVE(GAFR0_L); SAVE(GAFR0_U);
 	SAVE(GAFR1_L); SAVE(GAFR1_U);
 	SAVE(GAFR2_L); SAVE(GAFR2_U);
 
-	SAVE(ICMR); ICMR = 0;
 	SAVE(CKEN);
 	SAVE(PSTR);
 
@@ -198,22 +184,14 @@
 	PSPR = 0;
 
 	/* restore registers */
-	RESTORE_GPLEVEL(0); RESTORE_GPLEVEL(1); RESTORE_GPLEVEL(2);
-	RESTORE(GPDR0); RESTORE(GPDR1); RESTORE(GPDR2);
 	RESTORE(GAFR0_L); RESTORE(GAFR0_U);
 	RESTORE(GAFR1_L); RESTORE(GAFR1_U);
 	RESTORE(GAFR2_L); RESTORE(GAFR2_U);
-	RESTORE(GRER0); RESTORE(GRER1); RESTORE(GRER2);
-	RESTORE(GFER0); RESTORE(GFER1); RESTORE(GFER2);
 	RESTORE(PGSR0); RESTORE(PGSR1); RESTORE(PGSR2);
 
 	PSSR = PSSR_RDH | PSSR_PH;
 
 	RESTORE(CKEN);
-
-	ICLR = 0;
-	ICCR = 1;
-	RESTORE(ICMR);
 	RESTORE(PSTR);
 }
 
@@ -304,9 +282,17 @@
 	&pxa25x_device_assp,
 };
 
+static struct sys_device pxa25x_sysdev[] = {
+	{
+		.cls	= &pxa_irq_sysclass,
+	}, {
+		.cls	= &pxa_gpio_sysclass,
+	},
+};
+
 static int __init pxa25x_init(void)
 {
-	int ret = 0;
+	int i, ret = 0;
 
 	/* Only add HWUART for PXA255/26x; PXA210/250/27x do not have it. */
 	if (cpu_is_pxa25x())
@@ -320,9 +306,18 @@
 
 		pxa25x_init_pm();
 
+		for (i = 0; i < ARRAY_SIZE(pxa25x_sysdev); i++) {
+			ret = sysdev_register(&pxa25x_sysdev[i]);
+			if (ret)
+				pr_err("failed to register sysdev[%d]\n", i);
+		}
+
 		ret = platform_add_devices(pxa25x_devices,
 					   ARRAY_SIZE(pxa25x_devices));
+		if (ret)
+			return ret;
 	}
+
 	/* Only add HWUART for PXA255/26x; PXA210/250/27x do not have it. */
 	if (cpu_is_pxa25x())
 		ret = platform_device_register(&pxa_device_hwuart);
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 96cf274..46a951c 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/suspend.h>
 #include <linux/platform_device.h>
+#include <linux/sysdev.h>
 
 #include <asm/hardware.h>
 #include <asm/irq.h>
@@ -171,11 +172,6 @@
 #define SAVE(x)		sleep_save[SLEEP_SAVE_##x] = x
 #define RESTORE(x)	x = sleep_save[SLEEP_SAVE_##x]
 
-#define RESTORE_GPLEVEL(n) do { \
-	GPSR##n = sleep_save[SLEEP_SAVE_GPLR##n]; \
-	GPCR##n = ~sleep_save[SLEEP_SAVE_GPLR##n]; \
-} while (0)
-
 /*
  * List of global PXA peripheral registers to preserve.
  * More ones like CP and general purpose register values are preserved
@@ -183,10 +179,6 @@
  */
 enum {	SLEEP_SAVE_START = 0,
 
-	SLEEP_SAVE_GPLR0, SLEEP_SAVE_GPLR1, SLEEP_SAVE_GPLR2, SLEEP_SAVE_GPLR3,
-	SLEEP_SAVE_GPDR0, SLEEP_SAVE_GPDR1, SLEEP_SAVE_GPDR2, SLEEP_SAVE_GPDR3,
-	SLEEP_SAVE_GRER0, SLEEP_SAVE_GRER1, SLEEP_SAVE_GRER2, SLEEP_SAVE_GRER3,
-	SLEEP_SAVE_GFER0, SLEEP_SAVE_GFER1, SLEEP_SAVE_GFER2, SLEEP_SAVE_GFER3,
 	SLEEP_SAVE_PGSR0, SLEEP_SAVE_PGSR1, SLEEP_SAVE_PGSR2, SLEEP_SAVE_PGSR3,
 
 	SLEEP_SAVE_GAFR0_L, SLEEP_SAVE_GAFR0_U,
@@ -196,7 +188,6 @@
 
 	SLEEP_SAVE_PSTR,
 
-	SLEEP_SAVE_ICMR,
 	SLEEP_SAVE_CKEN,
 
 	SLEEP_SAVE_MDREFR,
@@ -208,10 +199,6 @@
 
 void pxa27x_cpu_pm_save(unsigned long *sleep_save)
 {
-	SAVE(GPLR0); SAVE(GPLR1); SAVE(GPLR2); SAVE(GPLR3);
-	SAVE(GPDR0); SAVE(GPDR1); SAVE(GPDR2); SAVE(GPDR3);
-	SAVE(GRER0); SAVE(GRER1); SAVE(GRER2); SAVE(GRER3);
-	SAVE(GFER0); SAVE(GFER1); SAVE(GFER2); SAVE(GFER3);
 	SAVE(PGSR0); SAVE(PGSR1); SAVE(PGSR2); SAVE(PGSR3);
 
 	SAVE(GAFR0_L); SAVE(GAFR0_U);
@@ -223,12 +210,8 @@
 	SAVE(PWER); SAVE(PCFR); SAVE(PRER);
 	SAVE(PFER); SAVE(PKWR);
 
-	SAVE(ICMR); ICMR = 0;
 	SAVE(CKEN);
 	SAVE(PSTR);
-
-	/* Clear GPIO transition detect bits */
-	GEDR0 = GEDR0; GEDR1 = GEDR1; GEDR2 = GEDR2; GEDR3 = GEDR3;
 }
 
 void pxa27x_cpu_pm_restore(unsigned long *sleep_save)
@@ -237,15 +220,10 @@
 	PSPR = 0;
 
 	/* restore registers */
-	RESTORE_GPLEVEL(0); RESTORE_GPLEVEL(1);
-	RESTORE_GPLEVEL(2); RESTORE_GPLEVEL(3);
-	RESTORE(GPDR0); RESTORE(GPDR1); RESTORE(GPDR2); RESTORE(GPDR3);
 	RESTORE(GAFR0_L); RESTORE(GAFR0_U);
 	RESTORE(GAFR1_L); RESTORE(GAFR1_U);
 	RESTORE(GAFR2_L); RESTORE(GAFR2_U);
 	RESTORE(GAFR3_L); RESTORE(GAFR3_U);
-	RESTORE(GRER0); RESTORE(GRER1); RESTORE(GRER2); RESTORE(GRER3);
-	RESTORE(GFER0); RESTORE(GFER1); RESTORE(GFER2); RESTORE(GFER3);
 	RESTORE(PGSR0); RESTORE(PGSR1); RESTORE(PGSR2); RESTORE(PGSR3);
 
 	RESTORE(MDREFR);
@@ -256,9 +234,6 @@
 
 	RESTORE(CKEN);
 
-	ICLR = 0;
-	ICCR = 1;
-	RESTORE(ICMR);
 	RESTORE(PSTR);
 }
 
@@ -409,9 +384,22 @@
 	&pxa27x_device_ssp3,
 };
 
+static struct sys_device pxa27x_sysdev[] = {
+	{
+		.id	= 0,
+		.cls	= &pxa_irq_sysclass,
+	}, {
+		.id	= 1,
+		.cls	= &pxa_irq_sysclass,
+	}, {
+		.cls	= &pxa_gpio_sysclass,
+	},
+};
+
 static int __init pxa27x_init(void)
 {
-	int ret = 0;
+	int i, ret = 0;
+
 	if (cpu_is_pxa27x()) {
 		clks_register(pxa27x_clks, ARRAY_SIZE(pxa27x_clks));
 
@@ -420,8 +408,15 @@
 
 		pxa27x_init_pm();
 
+		for (i = 0; i < ARRAY_SIZE(pxa27x_sysdev); i++) {
+			ret = sysdev_register(&pxa27x_sysdev[i]);
+			if (ret)
+				pr_err("failed to register sysdev[%d]\n", i);
+		}
+
 		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
+
 	return ret;
 }
 
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 5cbf057..e47e67c 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -20,6 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/sysdev.h>
 
 #include <asm/hardware.h>
 #include <asm/arch/pxa3xx-regs.h>
@@ -39,6 +40,7 @@
 #define RO_CLK		60000000
 
 #define ACCR_D0CS	(1 << 26)
+#define ACCR_PCCE	(1 << 11)
 
 /* crystal frequency to static memory controller multiplier (SMCFS) */
 static unsigned char smcfs_mult[8] = { 6, 0, 8, 0, 0, 16, };
@@ -203,7 +205,6 @@
 };
 
 #ifdef CONFIG_PM
-#define SLEEP_SAVE_SIZE	4
 
 #define ISRAM_START	0x5c000000
 #define ISRAM_SIZE	SZ_256K
@@ -211,25 +212,29 @@
 static void __iomem *sram;
 static unsigned long wakeup_src;
 
+#define SAVE(x)		sleep_save[SLEEP_SAVE_##x] = x
+#define RESTORE(x)	x = sleep_save[SLEEP_SAVE_##x]
+
+enum {	SLEEP_SAVE_START = 0,
+	SLEEP_SAVE_CKENA,
+	SLEEP_SAVE_CKENB,
+	SLEEP_SAVE_ACCR,
+
+	SLEEP_SAVE_SIZE,
+};
+
 static void pxa3xx_cpu_pm_save(unsigned long *sleep_save)
 {
-	pr_debug("PM: CKENA=%08x CKENB=%08x\n", CKENA, CKENB);
-
-	if (CKENA & (1 << CKEN_USBH)) {
-		printk(KERN_ERR "PM: USB host clock not stopped?\n");
-		CKENA &= ~(1 << CKEN_USBH);
-	}
-//	CKENA |= 1 << (CKEN_ISC & 31);
-
-	/*
-	 * Low power modes require the HSIO2 clock to be enabled.
-	 */
-	CKENB |= 1 << (CKEN_HSIO2 & 31);
+	SAVE(CKENA);
+	SAVE(CKENB);
+	SAVE(ACCR);
 }
 
 static void pxa3xx_cpu_pm_restore(unsigned long *sleep_save)
 {
-	CKENB &= ~(1 << (CKEN_HSIO2 & 31));
+	RESTORE(ACCR);
+	RESTORE(CKENA);
+	RESTORE(CKENB);
 }
 
 /*
@@ -265,6 +270,46 @@
 	printk("PM: AD2D0SR=%08x ASCR=%08x\n", AD2D0SR, ASCR);
 }
 
+/*
+ * NOTE:  currently, the OBM (OEM Boot Module) binary comes along with
+ * PXA3xx development kits assumes that the resuming process continues
+ * with the address stored within the first 4 bytes of SDRAM. The PSPR
+ * register is used privately by BootROM and OBM, and _must_ be set to
+ * 0x5c014000 for the moment.
+ */
+static void pxa3xx_cpu_pm_suspend(void)
+{
+	volatile unsigned long *p = (volatile void *)0xc0000000;
+	unsigned long saved_data = *p;
+
+	extern void pxa3xx_cpu_suspend(void);
+	extern void pxa3xx_cpu_resume(void);
+
+	/* resuming from D2 requires the HSIO2/BOOT/TPM clocks enabled */
+	CKENA |= (1 << CKEN_BOOT) | (1 << CKEN_TPM);
+	CKENB |= 1 << (CKEN_HSIO2 & 0x1f);
+
+	/* clear and setup wakeup source */
+	AD3SR = ~0;
+	AD3ER = wakeup_src;
+	ASCR = ASCR;
+	ARSR = ARSR;
+
+	PCFR |= (1u << 13);			/* L1_DIS */
+	PCFR &= ~((1u << 12) | (1u << 1));	/* L0_EN | SL_ROD */
+
+	PSPR = 0x5c014000;
+
+	/* overwrite with the resume address */
+	*p = virt_to_phys(pxa3xx_cpu_resume);
+
+	pxa3xx_cpu_suspend();
+
+	*p = saved_data;
+
+	AD3ER = 0;
+}
+
 static void pxa3xx_cpu_pm_enter(suspend_state_t state)
 {
 	/*
@@ -279,6 +324,7 @@
 		break;
 
 	case PM_SUSPEND_MEM:
+		pxa3xx_cpu_pm_suspend();
 		break;
 	}
 }
@@ -452,9 +498,21 @@
 	&pxa3xx_device_ssp4,
 };
 
+static struct sys_device pxa3xx_sysdev[] = {
+	{
+		.id	= 0,
+		.cls	= &pxa_irq_sysclass,
+	}, {
+		.id	= 1,
+		.cls	= &pxa_irq_sysclass,
+	}, {
+		.cls	= &pxa_gpio_sysclass,
+	},
+};
+
 static int __init pxa3xx_init(void)
 {
-	int ret = 0;
+	int i, ret = 0;
 
 	if (cpu_is_pxa3xx()) {
 		clks_register(pxa3xx_clks, ARRAY_SIZE(pxa3xx_clks));
@@ -464,9 +522,16 @@
 
 		pxa3xx_init_pm();
 
-		return platform_add_devices(devices, ARRAY_SIZE(devices));
+		for (i = 0; i < ARRAY_SIZE(pxa3xx_sysdev); i++) {
+			ret = sysdev_register(&pxa3xx_sysdev[i]);
+			if (ret)
+				pr_err("failed to register sysdev[%d]\n", i);
+		}
+
+		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
-	return 0;
+
+	return ret;
 }
 
 subsys_initcall(pxa3xx_init);
diff --git a/arch/arm/mach-pxa/sleep.S b/arch/arm/mach-pxa/sleep.S
index 14bb4a9..784716e 100644
--- a/arch/arm/mach-pxa/sleep.S
+++ b/arch/arm/mach-pxa/sleep.S
@@ -50,6 +50,108 @@
 	str	r0, [r1]
 	ldr	pc, [sp], #4
 
+#ifdef CONFIG_PXA3xx
+/*
+ * pxa3xx_cpu_suspend() - forces CPU into sleep state (S2D3C4)
+ *
+ * NOTE:  unfortunately, pxa_cpu_save_cp can not be reused here since
+ * the auxiliary control register address is different between pxa3xx
+ * and pxa{25x,27x}
+ */
+
+ENTRY(pxa3xx_cpu_suspend)
+
+#ifndef CONFIG_IWMMXT
+	mra	r2, r3, acc0
+#endif
+	stmfd	sp!, {r2 - r12, lr}	@ save registers on stack
+
+	mrc	p14, 0, r3, c6, c0, 0		@ clock configuration, for turbo mode
+	mrc	p15, 0, r4, c15, c1, 0		@ CP access reg
+	mrc	p15, 0, r5, c13, c0, 0		@ PID
+	mrc 	p15, 0, r6, c3, c0, 0		@ domain ID
+	mrc 	p15, 0, r7, c2, c0, 0		@ translation table base addr
+	mrc	p15, 0, r8, c1, c0, 1           @ auxiliary control reg
+	mrc 	p15, 0, r9, c1, c0, 0		@ control reg
+
+	bic	r3, r3, #2			@ clear frequency change bit
+
+	@ store them plus current virtual stack ptr on stack
+	mov	r10, sp
+	stmfd	sp!, {r3 - r10}
+
+	@ store physical address of stack pointer
+	mov	r0, sp
+	bl	sleep_phys_sp
+	ldr	r1, =sleep_save_sp
+	str	r0, [r1]
+
+	@ clean data cache
+	bl	xsc3_flush_kern_cache_all
+
+	mov	r0, #0x06		@ S2D3C4 mode
+	mcr	p14, 0, r0, c7, c0, 0	@ enter sleep
+
+20:	b	20b			@ waiting for sleep
+
+	.data
+	.align 5
+/*
+ * pxa3xx_cpu_resume
+ */
+
+ENTRY(pxa3xx_cpu_resume)
+
+	mov	r0, #PSR_I_BIT | PSR_F_BIT | SVC_MODE	@ set SVC, irqs off
+	msr	cpsr_c, r0
+
+	ldr	r0, sleep_save_sp		@ stack phys addr
+	ldmfd	r0, {r3 - r9, sp}		@ CP regs + virt stack ptr
+
+	mov	r1, #0
+	mcr	p15, 0, r1, c7, c7, 0		@ invalidate I & D caches, BTB
+	mcr	p15, 0, r1, c7, c10, 4		@ drain write (&fill) buffer
+	mcr	p15, 0, r1, c7, c5, 4		@ flush prefetch buffer
+	mcr	p15, 0, r1, c8, c7, 0   	@ invalidate I & D TLBs
+
+	mcr	p14, 0, r3, c6, c0, 0		@ clock configuration, turbo mode.
+	mcr	p15, 0, r4, c15, c1, 0		@ CP access reg
+	mcr	p15, 0, r5, c13, c0, 0		@ PID
+	mcr 	p15, 0, r6, c3, c0, 0		@ domain ID
+	mcr 	p15, 0, r7, c2, c0, 0		@ translation table base addr
+	mcr	p15, 0, r8, c1, c0, 1           @ auxiliary control reg
+
+	@ temporarily map resume_turn_on_mmu into the page table,
+	@ otherwise prefetch abort occurs after MMU is turned on
+	mov	r1, r7
+	bic	r1, r1, #0x00ff
+	bic	r1, r1, #0x3f00
+	ldr	r2, =0x542e
+
+	adr	r3, resume_turn_on_mmu
+	mov	r3, r3, lsr #20
+	orr	r4, r2, r3, lsl #20
+	ldr	r5, [r1, r3, lsl #2]
+	str     r4, [r1, r3, lsl #2]
+
+	@ Mapping page table address in the page table
+	mov	r6, r1, lsr #20
+	orr	r7, r2, r6, lsl #20
+	ldr	r8, [r1, r6, lsl #2]
+	str	r7, [r1, r6, lsl #2]
+
+	ldr	r2, =pxa3xx_resume_after_mmu	@ absolute virtual address
+	b	resume_turn_on_mmu		@ cache align execution
+
+	.text
+pxa3xx_resume_after_mmu:
+	/* restore the temporary mapping */
+	str	r5, [r1, r3, lsl #2]
+	str	r8, [r1, r6, lsl #2]
+	b	resume_after_mmu
+
+#endif /* CONFIG_PXA3xx */
+
 #ifdef CONFIG_PXA27x
 /*
  * pxa27x_cpu_suspend()
diff --git a/arch/arm/mach-pxa/smemc.c b/arch/arm/mach-pxa/smemc.c
new file mode 100644
index 0000000..ad346ad
--- /dev/null
+++ b/arch/arm/mach-pxa/smemc.c
@@ -0,0 +1,88 @@
+/*
+ * Static Memory Controller
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/sysdev.h>
+
+#define SMEMC_PHYS_BASE	(0x4A000000)
+#define SMEMC_PHYS_SIZE	(0x90)
+
+#define MSC0		(0x08)	/* Static Memory Controller Register 0 */
+#define MSC1		(0x0C)	/* Static Memory Controller Register 1 */
+#define SXCNFG		(0x1C)	/* Synchronous Static Memory Control Register */
+#define MEMCLKCFG	(0x68)	/* Clock Configuration */
+#define CSADRCFG0	(0x80)	/* Address Configuration Register for CS0 */
+#define CSADRCFG1	(0x84)	/* Address Configuration Register for CS1 */
+#define CSADRCFG2	(0x88)	/* Address Configuration Register for CS2 */
+#define CSADRCFG3	(0x8C)	/* Address Configuration Register for CS3 */
+
+#ifdef CONFIG_PM
+static void __iomem *smemc_mmio_base;
+
+static unsigned long msc[2];
+static unsigned long sxcnfg, memclkcfg;
+static unsigned long csadrcfg[4];
+
+static int pxa3xx_smemc_suspend(struct sys_device *dev, pm_message_t state)
+{
+	msc[0] = __raw_readl(smemc_mmio_base + MSC0);
+	msc[1] = __raw_readl(smemc_mmio_base + MSC1);
+	sxcnfg = __raw_readl(smemc_mmio_base + SXCNFG);
+	memclkcfg = __raw_readl(smemc_mmio_base + MEMCLKCFG);
+	csadrcfg[0] = __raw_readl(smemc_mmio_base + CSADRCFG0);
+	csadrcfg[1] = __raw_readl(smemc_mmio_base + CSADRCFG1);
+	csadrcfg[2] = __raw_readl(smemc_mmio_base + CSADRCFG2);
+	csadrcfg[3] = __raw_readl(smemc_mmio_base + CSADRCFG3);
+
+	return 0;
+}
+
+static int pxa3xx_smemc_resume(struct sys_device *dev)
+{
+	__raw_writel(msc[0], smemc_mmio_base + MSC0);
+	__raw_writel(msc[1], smemc_mmio_base + MSC1);
+	__raw_writel(sxcnfg, smemc_mmio_base + SXCNFG);
+	__raw_writel(memclkcfg, smemc_mmio_base + MEMCLKCFG);
+	__raw_writel(csadrcfg[0], smemc_mmio_base + CSADRCFG0);
+	__raw_writel(csadrcfg[1], smemc_mmio_base + CSADRCFG1);
+	__raw_writel(csadrcfg[2], smemc_mmio_base + CSADRCFG2);
+	__raw_writel(csadrcfg[3], smemc_mmio_base + CSADRCFG3);
+
+	return 0;
+}
+
+static struct sysdev_class smemc_sysclass = {
+	.name		= "smemc",
+	.suspend	= pxa3xx_smemc_suspend,
+	.resume		= pxa3xx_smemc_resume,
+};
+
+static struct sys_device smemc_sysdev = {
+	.id		= 0,
+	.cls		= &smemc_sysclass,
+};
+
+static int __init smemc_init(void)
+{
+	int ret = 0;
+
+	if (cpu_is_pxa3xx()) {
+		smemc_mmio_base = ioremap(SMEMC_PHYS_BASE, SMEMC_PHYS_SIZE);
+		if (smemc_mmio_base == NULL)
+			return -ENODEV;
+
+		ret = sysdev_class_register(&smemc_sysclass);
+		if (ret)
+			return ret;
+
+		ret = sysdev_register(&smemc_sysdev);
+	}
+
+	return ret;
+}
+subsys_initcall(smemc_init);
+#endif
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 5078ede..9e7773f 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -36,6 +36,7 @@
 #include <asm/mach/irq.h>
 
 #include <asm/arch/pxa-regs.h>
+#include <asm/arch/pxa2xx-regs.h>
 #include <asm/arch/irda.h>
 #include <asm/arch/mmc.h>
 #include <asm/arch/ohci.h>
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 1a9c844..9b26fa5 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -29,6 +29,7 @@
 #include <asm/irq.h>
 #include <asm/system.h>
 #include <asm/arch/pxa-regs.h>
+#include <asm/arch/pxa2xx-regs.h>
 #include <asm/arch/irda.h>
 #include <asm/arch/mmc.h>
 #include <asm/arch/udc.h>
diff --git a/arch/arm/mach-realview/Kconfig b/arch/arm/mach-realview/Kconfig
index 35156ca..39b3bb7 100644
--- a/arch/arm/mach-realview/Kconfig
+++ b/arch/arm/mach-realview/Kconfig
@@ -7,24 +7,21 @@
 	help
 	  Include support for the ARM(R) RealView Emulation Baseboard platform.
 
-config REALVIEW_MPCORE
-	bool "Support MPcore tile"
+config REALVIEW_EB_ARM11MP
+	bool "Support ARM11MPCore tile"
 	depends on MACH_REALVIEW_EB
 	select CACHE_L2X0
 	help
-	  Enable support for the MPCore tile on the Realview platform.
-	  Since there are device address and interrupt differences, a
-	  kernel built with this option enabled is not compatible with
-	  other tiles.
+	  Enable support for the ARM11MPCore tile on the Realview platform.
 
-config REALVIEW_MPCORE_REVB
-	bool "Support MPcore RevB tile"
-	depends on REALVIEW_MPCORE
+config REALVIEW_EB_ARM11MP_REVB
+	bool "Support ARM11MPCore RevB tile"
+	depends on REALVIEW_EB_ARM11MP
 	default n
 	help
-	  Enable support for the MPCore RevB tile on the Realview platform.
-	  Since there are device address differences, a
+	  Enable support for the ARM11MPCore RevB tile on the Realview
+	  platform. Since there are device address differences, a
 	  kernel built with this option enabled is not compatible with
-	  other tiles.
+	  other revisions of the ARM11MPCore tile.
 
 endmenu
diff --git a/arch/arm/mach-realview/Makefile b/arch/arm/mach-realview/Makefile
index 36e76ba..ca1e390 100644
--- a/arch/arm/mach-realview/Makefile
+++ b/arch/arm/mach-realview/Makefile
@@ -4,6 +4,5 @@
 
 obj-y					:= core.o clock.o
 obj-$(CONFIG_MACH_REALVIEW_EB)		+= realview_eb.o
-obj-$(CONFIG_SMP)			+= platsmp.o headsmp.o
+obj-$(CONFIG_SMP)			+= platsmp.o headsmp.o localtimer.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
-obj-$(CONFIG_LOCAL_TIMERS)		+= localtimer.o
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 61d7021..98aefc9 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -25,6 +25,8 @@
 #include <linux/interrupt.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
 
 #include <asm/system.h>
 #include <asm/hardware.h>
@@ -37,7 +39,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/irq.h>
-#include <asm/mach/time.h>
 #include <asm/mach/map.h>
 #include <asm/mach/mmc.h>
 
@@ -48,6 +49,9 @@
 
 #define REALVIEW_REFCOUNTER	(__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
 
+/* used by entry-macro.S */
+void __iomem *gic_cpu_base_addr;
+
 /*
  * This is the RealView sched_clock implementation.  This has
  * a resolution of 41.7ns, and a maximum value of about 179s.
@@ -121,26 +125,6 @@
 	.resource		= &realview_flash_resource,
 };
 
-static struct resource realview_smc91x_resources[] = {
-	[0] = {
-		.start		= REALVIEW_ETH_BASE,
-		.end		= REALVIEW_ETH_BASE + SZ_64K - 1,
-		.flags		= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start		= IRQ_ETH,
-		.end		= IRQ_ETH,
-		.flags		= IORESOURCE_IRQ,
-	},
-};
-
-struct platform_device realview_smc91x_device = {
-	.name		= "smc91x",
-	.id		= 0,
-	.num_resources	= ARRAY_SIZE(realview_smc91x_resources),
-	.resource	= realview_smc91x_resources,
-};
-
 static struct resource realview_i2c_resource = {
 	.start		= REALVIEW_I2C_BASE,
 	.end		= REALVIEW_I2C_BASE + SZ_4K - 1,
@@ -484,45 +468,64 @@
 #define TICKS2USECS(x)	((x) / TICKS_PER_uSEC)
 #endif
 
-/*
- * Returns number of ms since last clock interrupt.  Note that interrupts
- * will have been disabled by do_gettimeoffset()
- */
-static unsigned long realview_gettimeoffset(void)
+static void timer_set_mode(enum clock_event_mode mode,
+			   struct clock_event_device *clk)
 {
-	unsigned long ticks1, ticks2, status;
+	unsigned long ctrl;
 
-	/*
-	 * Get the current number of ticks.  Note that there is a race
-	 * condition between us reading the timer and checking for
-	 * an interrupt.  We get around this by ensuring that the
-	 * counter has not reloaded between our two reads.
-	 */
-	ticks2 = readl(TIMER0_VA_BASE + TIMER_VALUE) & 0xffff;
-	do {
-		ticks1 = ticks2;
-		status = __raw_readl(__io_address(REALVIEW_GIC_DIST_BASE + GIC_DIST_PENDING_SET)
-				     + ((IRQ_TIMERINT0_1 >> 5) << 2));
-		ticks2 = readl(TIMER0_VA_BASE + TIMER_VALUE) & 0xffff;
-	} while (ticks2 > ticks1);
+	switch(mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		writel(TIMER_RELOAD, TIMER0_VA_BASE + TIMER_LOAD);
 
-	/*
-	 * Number of ticks since last interrupt.
-	 */
-	ticks1 = TIMER_RELOAD - ticks2;
+		ctrl = TIMER_CTRL_PERIODIC;
+		ctrl |= TIMER_CTRL_32BIT | TIMER_CTRL_IE | TIMER_CTRL_ENABLE;
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* period set, and timer enabled in 'next_event' hook */
+		ctrl = TIMER_CTRL_ONESHOT;
+		ctrl |= TIMER_CTRL_32BIT | TIMER_CTRL_IE;
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		ctrl = 0;
+	}
 
-	/*
-	 * Interrupt pending?  If so, we've reloaded once already.
-	 *
-	 * FIXME: Need to check this is effectively timer 0 that expires
-	 */
-	if (status & IRQMASK_TIMERINT0_1)
-		ticks1 += TIMER_RELOAD;
+	writel(ctrl, TIMER0_VA_BASE + TIMER_CTRL);
+}
 
-	/*
-	 * Convert the ticks to usecs
-	 */
-	return TICKS2USECS(ticks1);
+static int timer_set_next_event(unsigned long evt,
+				struct clock_event_device *unused)
+{
+	unsigned long ctrl = readl(TIMER0_VA_BASE + TIMER_CTRL);
+
+	writel(evt, TIMER0_VA_BASE + TIMER_LOAD);
+	writel(ctrl | TIMER_CTRL_ENABLE, TIMER0_VA_BASE + TIMER_CTRL);
+
+	return 0;
+}
+
+static struct clock_event_device timer0_clockevent =	 {
+	.name		= "timer0",
+	.shift		= 32,
+	.features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+	.set_mode	= timer_set_mode,
+	.set_next_event	= timer_set_next_event,
+	.rating		= 300,
+	.cpumask	= CPU_MASK_ALL,
+};
+
+static void __init realview_clockevents_init(unsigned int timer_irq)
+{
+	timer0_clockevent.irq = timer_irq;
+	timer0_clockevent.mult =
+		div_sc(1000000, NSEC_PER_SEC, timer0_clockevent.shift);
+	timer0_clockevent.max_delta_ns =
+		clockevent_delta2ns(0xffffffff, &timer0_clockevent);
+	timer0_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xf, &timer0_clockevent);
+
+	clockevents_register_device(&timer0_clockevent);
 }
 
 /*
@@ -530,15 +533,12 @@
  */
 static irqreturn_t realview_timer_interrupt(int irq, void *dev_id)
 {
-	// ...clear the interrupt
+	struct clock_event_device *evt = &timer0_clockevent;
+
+	/* clear the interrupt */
 	writel(1, TIMER0_VA_BASE + TIMER_INTCLR);
 
-	timer_tick();
-
-#if defined(CONFIG_SMP) && !defined(CONFIG_LOCAL_TIMERS)
-	smp_send_timer();
-	update_process_times(user_mode(get_irq_regs()));
-#endif
+	evt->event_handler(evt);
 
 	return IRQ_HANDLED;
 }
@@ -549,13 +549,49 @@
 	.handler	= realview_timer_interrupt,
 };
 
+static cycle_t realview_get_cycles(void)
+{
+	return ~readl(TIMER3_VA_BASE + TIMER_VALUE);
+}
+
+static struct clocksource clocksource_realview = {
+	.name	= "timer3",
+	.rating	= 200,
+	.read	= realview_get_cycles,
+	.mask	= CLOCKSOURCE_MASK(32),
+	.shift	= 20,
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static void __init realview_clocksource_init(void)
+{
+	/* setup timer 0 as free-running clocksource */
+	writel(0, TIMER3_VA_BASE + TIMER_CTRL);
+	writel(0xffffffff, TIMER3_VA_BASE + TIMER_LOAD);
+	writel(0xffffffff, TIMER3_VA_BASE + TIMER_VALUE);
+	writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
+		TIMER3_VA_BASE + TIMER_CTRL);
+
+	clocksource_realview.mult =
+		clocksource_khz2mult(1000, clocksource_realview.shift);
+	clocksource_register(&clocksource_realview);
+}
+
 /*
- * Set up timer interrupt, and return the current time in seconds.
+ * Set up the clock source and clock events devices
  */
-static void __init realview_timer_init(void)
+void __init realview_timer_init(unsigned int timer_irq)
 {
 	u32 val;
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	/*
+	 * The dummy clock device has to be registered before the main device
+	 * so that the latter will broadcast the clock events
+	 */
+	local_timer_setup(smp_processor_id());
+#endif
+
 	/* 
 	 * set clock frequency: 
 	 *	REALVIEW_REFCLK is 32KHz
@@ -576,18 +612,11 @@
 	writel(0, TIMER2_VA_BASE + TIMER_CTRL);
 	writel(0, TIMER3_VA_BASE + TIMER_CTRL);
 
-	writel(TIMER_RELOAD, TIMER0_VA_BASE + TIMER_LOAD);
-	writel(TIMER_RELOAD, TIMER0_VA_BASE + TIMER_VALUE);
-	writel(TIMER_DIVISOR | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC |
-	       TIMER_CTRL_IE, TIMER0_VA_BASE + TIMER_CTRL);
-
 	/* 
 	 * Make irqs happen for the system timer
 	 */
-	setup_irq(IRQ_TIMERINT0_1, &realview_timer_irq);
-}
+	setup_irq(timer_irq, &realview_timer_irq);
 
-struct sys_timer realview_timer = {
-	.init		= realview_timer_init,
-	.offset		= realview_gettimeoffset,
-};
+	realview_clocksource_init();
+	realview_clockevents_init(timer_irq);
+}
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
index 2b53420..492a14c 100644
--- a/arch/arm/mach-realview/core.h
+++ b/arch/arm/mach-realview/core.h
@@ -27,8 +27,6 @@
 #include <asm/leds.h>
 #include <asm/io.h>
 
-extern struct sys_timer realview_timer;
-
 #define AMBA_DEVICE(name,busid,base,plat)			\
 static struct amba_device name##_device = {			\
 	.dev		= {					\
@@ -38,7 +36,7 @@
 	},							\
 	.res		= {					\
 		.start	= REALVIEW_##base##_BASE,		\
-		.end	= (REALVIEW_##base##_BASE) + SZ_4K - 1,\
+		.end	= (REALVIEW_##base##_BASE) + SZ_4K - 1,	\
 		.flags	= IORESOURCE_MEM,			\
 	},							\
 	.dma_mask	= ~0,					\
@@ -46,74 +44,19 @@
 	/* .dma		= base##_DMA,*/				\
 }
 
-/*
- * These devices are connected via the core APB bridge
- */
-#define GPIO2_IRQ	{ IRQ_GPIOINT2, NO_IRQ }
-#define GPIO2_DMA	{ 0, 0 }
-#define GPIO3_IRQ	{ IRQ_GPIOINT3, NO_IRQ }
-#define GPIO3_DMA	{ 0, 0 }
-
-#define AACI_IRQ	{ IRQ_AACI, NO_IRQ }
-#define AACI_DMA	{ 0x80, 0x81 }
-#define MMCI0_IRQ	{ IRQ_MMCI0A,IRQ_MMCI0B }
-#define MMCI0_DMA	{ 0x84, 0 }
-#define KMI0_IRQ	{ IRQ_KMI0, NO_IRQ }
-#define KMI0_DMA	{ 0, 0 }
-#define KMI1_IRQ	{ IRQ_KMI1, NO_IRQ }
-#define KMI1_DMA	{ 0, 0 }
-
-/*
- * These devices are connected directly to the multi-layer AHB switch
- */
-#define SMC_IRQ		{ NO_IRQ, NO_IRQ }
-#define SMC_DMA		{ 0, 0 }
-#define MPMC_IRQ	{ NO_IRQ, NO_IRQ }
-#define MPMC_DMA	{ 0, 0 }
-#define CLCD_IRQ	{ IRQ_CLCDINT, NO_IRQ }
-#define CLCD_DMA	{ 0, 0 }
-#define DMAC_IRQ	{ IRQ_DMAINT, NO_IRQ }
-#define DMAC_DMA	{ 0, 0 }
-
-/*
- * These devices are connected via the core APB bridge
- */
-#define SCTL_IRQ	{ NO_IRQ, NO_IRQ }
-#define SCTL_DMA	{ 0, 0 }
-#define WATCHDOG_IRQ	{ IRQ_WDOGINT, NO_IRQ }
-#define WATCHDOG_DMA	{ 0, 0 }
-#define GPIO0_IRQ	{ IRQ_GPIOINT0, NO_IRQ }
-#define GPIO0_DMA	{ 0, 0 }
-#define GPIO1_IRQ	{ IRQ_GPIOINT1, NO_IRQ }
-#define GPIO1_DMA	{ 0, 0 }
-#define RTC_IRQ		{ IRQ_RTCINT, NO_IRQ }
-#define RTC_DMA		{ 0, 0 }
-
-/*
- * These devices are connected via the DMA APB bridge
- */
-#define SCI_IRQ		{ IRQ_SCIINT, NO_IRQ }
-#define SCI_DMA		{ 7, 6 }
-#define UART0_IRQ	{ IRQ_UARTINT0, NO_IRQ }
-#define UART0_DMA	{ 15, 14 }
-#define UART1_IRQ	{ IRQ_UARTINT1, NO_IRQ }
-#define UART1_DMA	{ 13, 12 }
-#define UART2_IRQ	{ IRQ_UARTINT2, NO_IRQ }
-#define UART2_DMA	{ 11, 10 }
-#define UART3_IRQ	{ IRQ_UART3, NO_IRQ }
-#define UART3_DMA	{ 0x86, 0x87 }
-#define SSP_IRQ		{ IRQ_SSPINT, NO_IRQ }
-#define SSP_DMA		{ 9, 8 }
-
-
 extern struct platform_device realview_flash_device;
-extern struct platform_device realview_smc91x_device;
 extern struct platform_device realview_i2c_device;
 extern struct mmc_platform_data realview_mmc0_plat_data;
 extern struct mmc_platform_data realview_mmc1_plat_data;
 extern struct clk realview_clcd_clk;
 extern struct clcd_board clcd_plat_data;
+extern void __iomem *gic_cpu_base_addr;
+#ifdef CONFIG_LOCAL_TIMERS
+extern void __iomem *twd_base_addr;
+extern unsigned int twd_size;
+#endif
 
 extern void realview_leds_event(led_event_t ledevt);
+extern void realview_timer_init(unsigned int timer_irq);
 
 #endif
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index c7bdf04..5060436 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -14,19 +14,75 @@
 #include <linux/device.h>
 #include <linux/smp.h>
 #include <linux/jiffies.h>
+#include <linux/percpu.h>
+#include <linux/clockchips.h>
+#include <linux/irq.h>
 
-#include <asm/mach/time.h>
 #include <asm/hardware/arm_twd.h>
 #include <asm/hardware/gic.h>
 #include <asm/hardware.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 
-#define TWD_BASE(cpu)	(__io_address(REALVIEW_TWD_BASE) + \
-			 ((cpu) * REALVIEW_TWD_SIZE))
+static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
+
+/*
+ * Used on SMP for either the local timer or IPI_TIMER
+ */
+void local_timer_interrupt(void)
+{
+	struct clock_event_device *clk = &__get_cpu_var(local_clockevent);
+
+	clk->event_handler(clk);
+}
+
+#ifdef CONFIG_LOCAL_TIMERS
+
+#define TWD_BASE(cpu)	(twd_base_addr + (cpu) * twd_size)
+
+/* set up by the platform code */
+void __iomem *twd_base_addr;
+unsigned int twd_size;
 
 static unsigned long mpcore_timer_rate;
 
+static void local_timer_set_mode(enum clock_event_mode mode,
+				 struct clock_event_device *clk)
+{
+	void __iomem *base = TWD_BASE(smp_processor_id());
+	unsigned long ctrl;
+
+	switch(mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		/* timer load already set up */
+		ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE
+			| TWD_TIMER_CONTROL_PERIODIC;
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* period set, and timer enabled in 'next_event' hook */
+		ctrl = TWD_TIMER_CONTROL_IT_ENABLE | TWD_TIMER_CONTROL_ONESHOT;
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		ctrl = 0;
+	}
+
+	__raw_writel(ctrl, base + TWD_TIMER_CONTROL);
+}
+
+static int local_timer_set_next_event(unsigned long evt,
+				      struct clock_event_device *unused)
+{
+	void __iomem *base = TWD_BASE(smp_processor_id());
+	unsigned long ctrl = __raw_readl(base + TWD_TIMER_CONTROL);
+
+	__raw_writel(evt, base + TWD_TIMER_COUNTER);
+	__raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, base + TWD_TIMER_CONTROL);
+
+	return 0;
+}
+
 /*
  * local_timer_ack: checks for a local timer interrupt.
  *
@@ -45,12 +101,11 @@
 	return 0;
 }
 
-void __cpuinit local_timer_setup(unsigned int cpu)
+static void __cpuinit twd_calibrate_rate(unsigned int cpu)
 {
 	void __iomem *base = TWD_BASE(cpu);
-	unsigned int load, offset;
+	unsigned long load, count;
 	u64 waitjiffies;
-	unsigned int count;
 
 	/*
 	 * If this is the first time round, we need to work out how fast
@@ -88,36 +143,36 @@
 	load = mpcore_timer_rate / HZ;
 
 	__raw_writel(load, base + TWD_TIMER_LOAD);
-	__raw_writel(0x7,  base + TWD_TIMER_CONTROL);
+}
 
-	/*
-	 * Now maneuver our local tick into the right part of the jiffy.
-	 * Start by working out where within the tick our local timer
-	 * interrupt should go.
-	 */
-	offset = ((mpcore_timer_rate / HZ) / (NR_CPUS + 1)) * (cpu + 1);
+/*
+ * Setup the local clock events for a CPU.
+ */
+void __cpuinit local_timer_setup(unsigned int cpu)
+{
+	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
+	unsigned long flags;
 
-	/*
-	 * gettimeoffset() will return a number of us since the last tick.
-	 * Convert this number of us to a local timer tick count.
-	 * Be careful of integer overflow whilst keeping maximum precision.
-	 *
-	 * with HZ=100 and 1MHz (fpga) ~ 1GHz processor:
-	 * load = 1 ~ 10,000
-	 * mpcore_timer_rate/10000 = 100 ~ 100,000
-	 *
-	 * so the multiply value will be less than 10^9 always.
-	 */
-	load = (system_timer->offset() * (mpcore_timer_rate / 10000)) / 100;
+	twd_calibrate_rate(cpu);
 
-	/* Add on our offset to get the load value */
-	load = (load + offset) % (mpcore_timer_rate / HZ);
-
-	__raw_writel(load, base + TWD_TIMER_COUNTER);
+	clk->name		= "local_timer";
+	clk->features		= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	clk->rating		= 350;
+	clk->set_mode		= local_timer_set_mode;
+	clk->set_next_event	= local_timer_set_next_event;
+	clk->irq		= IRQ_LOCALTIMER;
+	clk->cpumask		= cpumask_of_cpu(cpu);
+	clk->shift		= 20;
+	clk->mult		= div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
+	clk->max_delta_ns	= clockevent_delta2ns(0xffffffff, clk);
+	clk->min_delta_ns	= clockevent_delta2ns(0xf, clk);
 
 	/* Make sure our local interrupt controller has this enabled */
-	__raw_writel(1 << IRQ_LOCALTIMER,
-		     __io_address(REALVIEW_GIC_DIST_BASE) + GIC_DIST_ENABLE_SET);
+	local_irq_save(flags);
+	get_irq_chip(IRQ_LOCALTIMER)->unmask(IRQ_LOCALTIMER);
+	local_irq_restore(flags);
+
+	clockevents_register_device(clk);
 }
 
 /*
@@ -127,3 +182,26 @@
 {
 	__raw_writel(0, TWD_BASE(cpu) + TWD_TIMER_CONTROL);
 }
+
+#else	/* CONFIG_LOCAL_TIMERS */
+
+static void dummy_timer_set_mode(enum clock_event_mode mode,
+				 struct clock_event_device *clk)
+{
+}
+
+void __cpuinit local_timer_setup(unsigned int cpu)
+{
+	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
+
+	clk->name		= "dummy_timer";
+	clk->features		= CLOCK_EVT_FEAT_DUMMY;
+	clk->rating		= 200;
+	clk->set_mode		= dummy_timer_set_mode;
+	clk->broadcast		= smp_timer_broadcast;
+	clk->cpumask		= cpumask_of_cpu(cpu);
+
+	clockevents_register_device(clk);
+}
+
+#endif	/* !CONFIG_LOCAL_TIMERS */
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index fce3596..de2b715 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -18,6 +18,7 @@
 #include <asm/hardware/arm_scu.h>
 #include <asm/hardware.h>
 #include <asm/io.h>
+#include <asm/mach-types.h>
 
 extern void realview_secondary_startup(void);
 
@@ -31,9 +32,13 @@
 {
 	unsigned int ncores;
 
-	ncores = __raw_readl(__io_address(REALVIEW_MPCORE_SCU_BASE) + SCU_CONFIG);
+	if (machine_is_realview_eb() && core_tile_eb11mp()) {
+		ncores = __raw_readl(__io_address(REALVIEW_EB11MP_SCU_BASE) + SCU_CONFIG);
+		ncores = (ncores & 0x03) + 1;
+	} else
+		ncores = 1;
 
-	return (ncores & 0x03) + 1;
+	return ncores;
 }
 
 static DEFINE_SPINLOCK(boot_lock);
@@ -52,7 +57,7 @@
 	 * core (e.g. timer irq), then they will not have been enabled
 	 * for us: do so
 	 */
-	gic_cpu_init(0, __io_address(REALVIEW_GIC_CPU_BASE));
+	gic_cpu_init(0, __io_address(REALVIEW_EB11MP_GIC_CPU_BASE));
 
 	/*
 	 * let the primary processor know we're out of the
@@ -187,10 +192,15 @@
 	if (max_cpus > ncores)
 		max_cpus = ncores;
 
+#ifdef CONFIG_LOCAL_TIMERS
 	/*
-	 * Enable the local timer for primary CPU
+	 * Enable the local timer for primary CPU. If the device is
+	 * dummy (!CONFIG_LOCAL_TIMERS), it was already registers in
+	 * realview_timer_init
 	 */
-	local_timer_setup(cpu);
+	if (machine_is_realview_eb() && core_tile_eb11mp())
+		local_timer_setup(cpu);
+#endif
 
 	/*
 	 * Initialise the present map, which describes the set of CPUs
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index ecec2f8..60d9eb8 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -36,7 +36,9 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/mmc.h>
+#include <asm/mach/time.h>
 
+#include <asm/arch/board-eb.h>
 #include <asm/arch/irqs.h>
 
 #include "core.h"
@@ -58,26 +60,7 @@
 		.pfn		= __phys_to_pfn(REALVIEW_GIC_DIST_BASE),
 		.length		= SZ_4K,
 		.type		= MT_DEVICE,
-	},
-#ifdef CONFIG_REALVIEW_MPCORE
-	{
-		.virtual	= IO_ADDRESS(REALVIEW_GIC1_CPU_BASE),
-		.pfn		= __phys_to_pfn(REALVIEW_GIC1_CPU_BASE),
-		.length		= SZ_4K,
-		.type		= MT_DEVICE,
 	}, {
-		.virtual	= IO_ADDRESS(REALVIEW_GIC1_DIST_BASE),
-		.pfn		= __phys_to_pfn(REALVIEW_GIC1_DIST_BASE),
-		.length		= SZ_4K,
-		.type		= MT_DEVICE,
-	}, {
-		.virtual	= IO_ADDRESS(REALVIEW_MPCORE_L220_BASE),
-		.pfn		= __phys_to_pfn(REALVIEW_MPCORE_L220_BASE),
-		.length		= SZ_8K,
-		.type		= MT_DEVICE,
-	},
-#endif
-	{
 		.virtual	= IO_ADDRESS(REALVIEW_SCTL_BASE),
 		.pfn		= __phys_to_pfn(REALVIEW_SCTL_BASE),
 		.length		= SZ_4K,
@@ -103,11 +86,95 @@
 #endif
 };
 
+static struct map_desc realview_eb11mp_io_desc[] __initdata = {
+	{
+		.virtual	= IO_ADDRESS(REALVIEW_EB11MP_GIC_CPU_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_EB11MP_GIC_CPU_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_EB11MP_GIC_DIST_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_EB11MP_GIC_DIST_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_EB11MP_L220_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_EB11MP_L220_BASE),
+		.length		= SZ_8K,
+		.type		= MT_DEVICE,
+	}
+};
+
 static void __init realview_eb_map_io(void)
 {
 	iotable_init(realview_eb_io_desc, ARRAY_SIZE(realview_eb_io_desc));
+	if (core_tile_eb11mp())
+		iotable_init(realview_eb11mp_io_desc, ARRAY_SIZE(realview_eb11mp_io_desc));
 }
 
+/*
+ * RealView EB AMBA devices
+ */
+
+/*
+ * These devices are connected via the core APB bridge
+ */
+#define GPIO2_IRQ	{ IRQ_EB_GPIO2, NO_IRQ }
+#define GPIO2_DMA	{ 0, 0 }
+#define GPIO3_IRQ	{ IRQ_EB_GPIO3, NO_IRQ }
+#define GPIO3_DMA	{ 0, 0 }
+
+#define AACI_IRQ	{ IRQ_EB_AACI, NO_IRQ }
+#define AACI_DMA	{ 0x80, 0x81 }
+#define MMCI0_IRQ	{ IRQ_EB_MMCI0A, IRQ_EB_MMCI0B }
+#define MMCI0_DMA	{ 0x84, 0 }
+#define KMI0_IRQ	{ IRQ_EB_KMI0, NO_IRQ }
+#define KMI0_DMA	{ 0, 0 }
+#define KMI1_IRQ	{ IRQ_EB_KMI1, NO_IRQ }
+#define KMI1_DMA	{ 0, 0 }
+
+/*
+ * These devices are connected directly to the multi-layer AHB switch
+ */
+#define SMC_IRQ		{ NO_IRQ, NO_IRQ }
+#define SMC_DMA		{ 0, 0 }
+#define MPMC_IRQ	{ NO_IRQ, NO_IRQ }
+#define MPMC_DMA	{ 0, 0 }
+#define CLCD_IRQ	{ IRQ_EB_CLCD, NO_IRQ }
+#define CLCD_DMA	{ 0, 0 }
+#define DMAC_IRQ	{ IRQ_EB_DMA, NO_IRQ }
+#define DMAC_DMA	{ 0, 0 }
+
+/*
+ * These devices are connected via the core APB bridge
+ */
+#define SCTL_IRQ	{ NO_IRQ, NO_IRQ }
+#define SCTL_DMA	{ 0, 0 }
+#define WATCHDOG_IRQ	{ IRQ_EB_WDOG, NO_IRQ }
+#define WATCHDOG_DMA	{ 0, 0 }
+#define GPIO0_IRQ	{ IRQ_EB_GPIO0, NO_IRQ }
+#define GPIO0_DMA	{ 0, 0 }
+#define GPIO1_IRQ	{ IRQ_EB_GPIO1, NO_IRQ }
+#define GPIO1_DMA	{ 0, 0 }
+#define RTC_IRQ		{ IRQ_EB_RTC, NO_IRQ }
+#define RTC_DMA		{ 0, 0 }
+
+/*
+ * These devices are connected via the DMA APB bridge
+ */
+#define SCI_IRQ		{ IRQ_EB_SCI, NO_IRQ }
+#define SCI_DMA		{ 7, 6 }
+#define UART0_IRQ	{ IRQ_EB_UART0, NO_IRQ }
+#define UART0_DMA	{ 15, 14 }
+#define UART1_IRQ	{ IRQ_EB_UART1, NO_IRQ }
+#define UART1_DMA	{ 13, 12 }
+#define UART2_IRQ	{ IRQ_EB_UART2, NO_IRQ }
+#define UART2_DMA	{ 11, 10 }
+#define UART3_IRQ	{ IRQ_EB_UART3, NO_IRQ }
+#define UART3_DMA	{ 0x86, 0x87 }
+#define SSP_IRQ		{ IRQ_EB_SSP, NO_IRQ }
+#define SSP_DMA		{ 9, 8 }
+
 /* FPGA Primecells */
 AMBA_DEVICE(aaci,  "fpga:04", AACI,     NULL);
 AMBA_DEVICE(mmc0,  "fpga:05", MMCI0,    &realview_mmc0_plat_data);
@@ -153,38 +220,127 @@
 	&kmi1_device,
 };
 
+/*
+ * RealView EB platform devices
+ */
+
+static struct resource realview_eb_smc91x_resources[] = {
+	[0] = {
+		.start		= REALVIEW_ETH_BASE,
+		.end		= REALVIEW_ETH_BASE + SZ_64K - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start		= IRQ_EB_ETH,
+		.end		= IRQ_EB_ETH,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device realview_eb_smc91x_device = {
+	.name		= "smc91x",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(realview_eb_smc91x_resources),
+	.resource	= realview_eb_smc91x_resources,
+};
+
 static void __init gic_init_irq(void)
 {
-#ifdef CONFIG_REALVIEW_MPCORE
-	unsigned int pldctrl;
-	writel(0x0000a05f, __io_address(REALVIEW_SYS_LOCK));
-	pldctrl = readl(__io_address(REALVIEW_SYS_BASE)	+ REALVIEW_MPCORE_SYS_PLD_CTRL1);
-	pldctrl |= 0x00800000;	/* New irq mode */
-	writel(pldctrl, __io_address(REALVIEW_SYS_BASE) + REALVIEW_MPCORE_SYS_PLD_CTRL1);
-	writel(0x00000000, __io_address(REALVIEW_SYS_LOCK));
+	if (core_tile_eb11mp()) {
+		unsigned int pldctrl;
+
+		/* new irq mode */
+		writel(0x0000a05f, __io_address(REALVIEW_SYS_LOCK));
+		pldctrl = readl(__io_address(REALVIEW_SYS_BASE)	+ REALVIEW_EB11MP_SYS_PLD_CTRL1);
+		pldctrl |= 0x00800000;
+		writel(pldctrl, __io_address(REALVIEW_SYS_BASE) + REALVIEW_EB11MP_SYS_PLD_CTRL1);
+		writel(0x00000000, __io_address(REALVIEW_SYS_LOCK));
+
+		/* core tile GIC, primary */
+		gic_cpu_base_addr = __io_address(REALVIEW_EB11MP_GIC_CPU_BASE);
+		gic_dist_init(0, __io_address(REALVIEW_EB11MP_GIC_DIST_BASE), 29);
+		gic_cpu_init(0, gic_cpu_base_addr);
+
+#ifndef CONFIG_REALVIEW_EB_ARM11MP_REVB
+		/* board GIC, secondary */
+		gic_dist_init(1, __io_address(REALVIEW_GIC_DIST_BASE), 64);
+		gic_cpu_init(1, __io_address(REALVIEW_GIC_CPU_BASE));
+		gic_cascade_irq(1, IRQ_EB11MP_EB_IRQ1);
 #endif
-	gic_dist_init(0, __io_address(REALVIEW_GIC_DIST_BASE), 29);
-	gic_cpu_init(0, __io_address(REALVIEW_GIC_CPU_BASE));
-#if defined(CONFIG_REALVIEW_MPCORE) && !defined(CONFIG_REALVIEW_MPCORE_REVB)
-	gic_dist_init(1, __io_address(REALVIEW_GIC1_DIST_BASE), 64);
-	gic_cpu_init(1, __io_address(REALVIEW_GIC1_CPU_BASE));
-	gic_cascade_irq(1, IRQ_EB_IRQ1);
-#endif
+	} else {
+		/* board GIC, primary */
+		gic_cpu_base_addr = __io_address(REALVIEW_GIC_CPU_BASE);
+		gic_dist_init(0, __io_address(REALVIEW_GIC_DIST_BASE), 29);
+		gic_cpu_init(0, gic_cpu_base_addr);
+	}
 }
 
+/*
+ * Fix up the IRQ numbers for the RealView EB/ARM11MPCore tile
+ */
+static void realview_eb11mp_fixup(void)
+{
+	/* AMBA devices */
+	dmac_device.irq[0]	= IRQ_EB11MP_DMA;
+	uart0_device.irq[0]	= IRQ_EB11MP_UART0;
+	uart1_device.irq[0]	= IRQ_EB11MP_UART1;
+	uart2_device.irq[0]	= IRQ_EB11MP_UART2;
+	uart3_device.irq[0]	= IRQ_EB11MP_UART3;
+	clcd_device.irq[0]	= IRQ_EB11MP_CLCD;
+	wdog_device.irq[0]	= IRQ_EB11MP_WDOG;
+	gpio0_device.irq[0]	= IRQ_EB11MP_GPIO0;
+	gpio1_device.irq[0]	= IRQ_EB11MP_GPIO1;
+	gpio2_device.irq[0]	= IRQ_EB11MP_GPIO2;
+	rtc_device.irq[0]	= IRQ_EB11MP_RTC;
+	sci0_device.irq[0]	= IRQ_EB11MP_SCI;
+	ssp0_device.irq[0]	= IRQ_EB11MP_SSP;
+	aaci_device.irq[0]	= IRQ_EB11MP_AACI;
+	mmc0_device.irq[0]	= IRQ_EB11MP_MMCI0A;
+	mmc0_device.irq[1]	= IRQ_EB11MP_MMCI0B;
+	kmi0_device.irq[0]	= IRQ_EB11MP_KMI0;
+	kmi1_device.irq[0]	= IRQ_EB11MP_KMI1;
+
+	/* platform devices */
+	realview_eb_smc91x_resources[1].start	= IRQ_EB11MP_ETH;
+	realview_eb_smc91x_resources[1].end	= IRQ_EB11MP_ETH;
+}
+
+static void __init realview_eb_timer_init(void)
+{
+	unsigned int timer_irq;
+
+	if (core_tile_eb11mp()) {
+#ifdef CONFIG_LOCAL_TIMERS
+		twd_base_addr = __io_address(REALVIEW_EB11MP_TWD_BASE);
+		twd_size = REALVIEW_EB11MP_TWD_SIZE;
+#endif
+		timer_irq = IRQ_EB11MP_TIMER0_1;
+	} else
+		timer_irq = IRQ_EB_TIMER0_1;
+
+	realview_timer_init(timer_irq);
+}
+
+static struct sys_timer realview_eb_timer = {
+	.init		= realview_eb_timer_init,
+};
+
 static void __init realview_eb_init(void)
 {
 	int i;
 
-#ifdef CONFIG_REALVIEW_MPCORE
-	/* 1MB (128KB/way), 8-way associativity, evmon/parity/share enabled
-	 * Bits:  .... ...0 0111 1001 0000 .... .... .... */
-	l2x0_init(__io_address(REALVIEW_MPCORE_L220_BASE), 0x00790000, 0xfe000fff);
-#endif
+	if (core_tile_eb11mp()) {
+		realview_eb11mp_fixup();
+
+		/* 1MB (128KB/way), 8-way associativity, evmon/parity/share enabled
+		 * Bits:  .... ...0 0111 1001 0000 .... .... .... */
+		l2x0_init(__io_address(REALVIEW_EB11MP_L220_BASE), 0x00790000, 0xfe000fff);
+	}
+
 	clk_register(&realview_clcd_clk);
 
 	platform_device_register(&realview_flash_device);
-	platform_device_register(&realview_smc91x_device);
+	platform_device_register(&realview_eb_smc91x_device);
 	platform_device_register(&realview_i2c_device);
 
 	for (i = 0; i < ARRAY_SIZE(amba_devs); i++) {
@@ -204,6 +360,6 @@
 	.boot_params	= 0x00000100,
 	.map_io		= realview_eb_map_io,
 	.init_irq	= gic_init_irq,
-	.timer		= &realview_timer,
+	.timer		= &realview_eb_timer,
 	.init_machine	= realview_eb_init,
 MACHINE_END
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 9e13c83..5c84c60 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -470,7 +470,7 @@
  * If the system is going to use the SA-1111 DMA engines, set up
  * the memory bus request/grant pins.
  */
-void __init sa1110_mb_enable(void)
+void __devinit sa1110_mb_enable(void)
 {
 	unsigned long flags;
 
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 7595277..303a7ff 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -162,7 +162,7 @@
 			 * Free the page table, if there was one.
 			 */
 			if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE)
-				pte_free_kernel(pmd_page_vaddr(pmd));
+				pte_free_kernel(&init_mm, pmd_page_vaddr(pmd));
 		}
 
 		addr += PGDIR_SIZE;
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index 50b9aed..500c961 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -65,14 +65,14 @@
 	return new_pgd;
 
 no_pte:
-	pmd_free(new_pmd);
+	pmd_free(mm, new_pmd);
 no_pmd:
 	free_pages((unsigned long)new_pgd, 2);
 no_pgd:
 	return NULL;
 }
 
-void free_pgd_slow(pgd_t *pgd)
+void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd)
 {
 	pmd_t *pmd;
 	struct page *pte;
@@ -94,8 +94,8 @@
 	pmd_clear(pmd);
 	dec_zone_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE);
 	pte_lock_deinit(pte);
-	pte_free(pte);
-	pmd_free(pmd);
+	pte_free(mm, pte);
+	pmd_free(mm, pmd);
 free:
 	free_pages((unsigned long) pgd, 2);
 }
diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c
index ba3d21d..6fe481f 100644
--- a/arch/arm/plat-iop/time.c
+++ b/arch/arm/plat-iop/time.c
@@ -57,8 +57,6 @@
 static irqreturn_t
 iop_timer_interrupt(int irq, void *dev_id)
 {
-	write_seqlock(&xtime_lock);
-
 	write_tisr(1);
 
 	while ((signed long)(next_jiffy_time - read_tcr1())
@@ -67,8 +65,6 @@
 		next_jiffy_time -= ticks_per_jiffy;
 	}
 
-	write_sequnlock(&xtime_lock);
-
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/arm/plat-s3c24xx/time.c b/arch/arm/plat-s3c24xx/time.c
index 2ec1daa..766473b 100644
--- a/arch/arm/plat-s3c24xx/time.c
+++ b/arch/arm/plat-s3c24xx/time.c
@@ -130,9 +130,7 @@
 static irqreturn_t
 s3c2410_timer_interrupt(int irq, void *dev_id)
 {
-	write_seqlock(&xtime_lock);
 	timer_tick();
-	write_sequnlock(&xtime_lock);
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index c816f29..28e0caf 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -82,6 +82,7 @@
 	select SUBARCH_AVR32B
 	select MMU
 	select PERFORMANCE_COUNTERS
+	select HAVE_GPIO_LIB
 
 #
 # CPU types
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index d61a02d..38a8fa3 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -24,11 +24,11 @@
 #define MAX_NR_PIO_DEVICES		8
 
 struct pio_device {
+	struct gpio_chip chip;
 	void __iomem *regs;
 	const struct platform_device *pdev;
 	struct clk *clk;
 	u32 pinmux_mask;
-	u32 gpio_mask;
 	char name[8];
 };
 
@@ -64,7 +64,8 @@
 		goto fail;
 	}
 
-	if (unlikely(test_and_set_bit(pin_index, &pio->pinmux_mask))) {
+	if (unlikely(test_and_set_bit(pin_index, &pio->pinmux_mask)
+			 || gpiochip_is_requested(&pio->chip, pin_index))) {
 		printk("%s: pin %u is busy\n", pio->name, pin_index);
 		goto fail;
 	}
@@ -79,9 +80,6 @@
 	if (!(flags & AT32_GPIOF_PULLUP))
 		pio_writel(pio, PUDR, mask);
 
-	/* gpio_request NOT allowed */
-	set_bit(pin_index, &pio->gpio_mask);
-
 	return;
 
 fail:
@@ -130,9 +128,6 @@
 
 	pio_writel(pio, PER, mask);
 
-	/* gpio_request now allowed */
-	clear_bit(pin_index, &pio->gpio_mask);
-
 	return;
 
 fail:
@@ -166,96 +161,50 @@
 
 /* GPIO API */
 
-int gpio_request(unsigned int gpio, const char *label)
+static int direction_input(struct gpio_chip *chip, unsigned offset)
 {
-	struct pio_device *pio;
-	unsigned int pin;
+	struct pio_device *pio = container_of(chip, struct pio_device, chip);
+	u32 mask = 1 << offset;
 
-	pio = gpio_to_pio(gpio);
-	if (!pio)
-		return -ENODEV;
+	if (!(pio_readl(pio, PSR) & mask))
+		return -EINVAL;
 
-	pin = gpio & 0x1f;
-	if (test_and_set_bit(pin, &pio->gpio_mask))
-		return -EBUSY;
-
+	pio_writel(pio, ODR, mask);
 	return 0;
 }
-EXPORT_SYMBOL(gpio_request);
 
-void gpio_free(unsigned int gpio)
+static int gpio_get(struct gpio_chip *chip, unsigned offset)
 {
-	struct pio_device *pio;
-	unsigned int pin;
+	struct pio_device *pio = container_of(chip, struct pio_device, chip);
 
-	pio = gpio_to_pio(gpio);
-	if (!pio) {
-		printk(KERN_ERR
-		       "gpio: attempted to free invalid pin %u\n", gpio);
-		return;
-	}
-
-	pin = gpio & 0x1f;
-	if (!test_and_clear_bit(pin, &pio->gpio_mask))
-		printk(KERN_ERR "gpio: freeing free or non-gpio pin %s-%u\n",
-		       pio->name, pin);
+	return (pio_readl(pio, PDSR) >> offset) & 1;
 }
-EXPORT_SYMBOL(gpio_free);
 
-int gpio_direction_input(unsigned int gpio)
+static void gpio_set(struct gpio_chip *chip, unsigned offset, int value);
+
+static int direction_output(struct gpio_chip *chip, unsigned offset, int value)
 {
-	struct pio_device *pio;
-	unsigned int pin;
+	struct pio_device *pio = container_of(chip, struct pio_device, chip);
+	u32 mask = 1 << offset;
 
-	pio = gpio_to_pio(gpio);
-	if (!pio)
-		return -ENODEV;
+	if (!(pio_readl(pio, PSR) & mask))
+		return -EINVAL;
 
-	pin = gpio & 0x1f;
-	pio_writel(pio, ODR, 1 << pin);
-
+	gpio_set(chip, offset, value);
+	pio_writel(pio, OER, mask);
 	return 0;
 }
-EXPORT_SYMBOL(gpio_direction_input);
 
-int gpio_direction_output(unsigned int gpio, int value)
+static void gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 {
-	struct pio_device *pio;
-	unsigned int pin;
+	struct pio_device *pio = container_of(chip, struct pio_device, chip);
+	u32 mask = 1 << offset;
 
-	pio = gpio_to_pio(gpio);
-	if (!pio)
-		return -ENODEV;
-
-	gpio_set_value(gpio, value);
-
-	pin = gpio & 0x1f;
-	pio_writel(pio, OER, 1 << pin);
-
-	return 0;
-}
-EXPORT_SYMBOL(gpio_direction_output);
-
-int gpio_get_value(unsigned int gpio)
-{
-	struct pio_device *pio = &pio_dev[gpio >> 5];
-
-	return (pio_readl(pio, PDSR) >> (gpio & 0x1f)) & 1;
-}
-EXPORT_SYMBOL(gpio_get_value);
-
-void gpio_set_value(unsigned int gpio, int value)
-{
-	struct pio_device *pio = &pio_dev[gpio >> 5];
-	u32 mask;
-
-	mask = 1 << (gpio & 0x1f);
 	if (value)
 		pio_writel(pio, SODR, mask);
 	else
 		pio_writel(pio, CODR, mask);
 }
-EXPORT_SYMBOL(gpio_set_value);
 
 /*--------------------------------------------------------------------------*/
 
@@ -339,6 +288,63 @@
 
 /*--------------------------------------------------------------------------*/
 
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/seq_file.h>
+
+/*
+ * This shows more info than the generic gpio dump code:
+ * pullups, deglitching, open drain drive.
+ */
+static void pio_bank_show(struct seq_file *s, struct gpio_chip *chip)
+{
+	struct pio_device *pio = container_of(chip, struct pio_device, chip);
+	u32			psr, osr, imr, pdsr, pusr, ifsr, mdsr;
+	unsigned		i;
+	u32			mask;
+	char			bank;
+
+	psr = pio_readl(pio, PSR);
+	osr = pio_readl(pio, OSR);
+	imr = pio_readl(pio, IMR);
+	pdsr = pio_readl(pio, PDSR);
+	pusr = pio_readl(pio, PUSR);
+	ifsr = pio_readl(pio, IFSR);
+	mdsr = pio_readl(pio, MDSR);
+
+	bank = 'A' + pio->pdev->id;
+
+	for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+		const char *label;
+
+		label = gpiochip_is_requested(chip, i);
+		if (!label)
+			continue;
+
+		seq_printf(s, " gpio-%-3d P%c%-2d (%-12s) %s %s %s",
+			chip->base + i, bank, i,
+			label,
+			(osr & mask) ? "out" : "in ",
+			(mask & pdsr) ? "hi" : "lo",
+			(mask & pusr) ? "  " : "up");
+		if (ifsr & mask)
+			seq_printf(s, " deglitch");
+		if ((osr & mdsr) & mask)
+			seq_printf(s, " open-drain");
+		if (imr & mask)
+			seq_printf(s, " irq-%d edge-both",
+				gpio_to_irq(chip->base + i));
+		seq_printf(s, "\n");
+	}
+}
+
+#else
+#define pio_bank_show	NULL
+#endif
+
+
+/*--------------------------------------------------------------------------*/
+
 static int __init pio_probe(struct platform_device *pdev)
 {
 	struct pio_device *pio = NULL;
@@ -349,6 +355,18 @@
 	pio = &pio_dev[pdev->id];
 	BUG_ON(!pio->regs);
 
+	pio->chip.label = pio->name;
+	pio->chip.base = pdev->id * 32;
+	pio->chip.ngpio = 32;
+
+	pio->chip.direction_input = direction_input;
+	pio->chip.get = gpio_get;
+	pio->chip.direction_output = direction_output;
+	pio->chip.set = gpio_set;
+	pio->chip.dbg_show = pio_bank_show;
+
+	gpiochip_add(&pio->chip);
+
 	gpio_irq_setup(pio, irq, gpio_irq_base);
 
 	platform_set_drvdata(pdev, pio);
@@ -406,12 +424,6 @@
 	pio->pdev = pdev;
 	pio->regs = ioremap(regs->start, regs->end - regs->start + 1);
 
-	/*
-	 * request_gpio() is only valid for pins that have been
-	 * explicitly configured as GPIO and not previously requested
-	 */
-	pio->gpio_mask = ~0UL;
-
 	/* start with irqs disabled and acked */
 	pio_writel(pio, IDR, ~0UL);
 	(void) pio_readl(pio, ISR);
diff --git a/arch/avr32/mach-at32ap/pio.h b/arch/avr32/mach-at32ap/pio.h
index 50fa3ac..7795116 100644
--- a/arch/avr32/mach-at32ap/pio.h
+++ b/arch/avr32/mach-at32ap/pio.h
@@ -19,7 +19,7 @@
 #define PIO_OSR                                0x0018
 #define PIO_IFER                               0x0020
 #define PIO_IFDR                               0x0024
-#define PIO_ISFR                               0x0028
+#define PIO_IFSR                               0x0028
 #define PIO_SODR                               0x0030
 #define PIO_CODR                               0x0034
 #define PIO_ODSR                               0x0038
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 56ff51b..fdd9bf4 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1373,7 +1373,7 @@
 	.long _sys_epoll_pwait
 	.long _sys_utimensat
 	.long _sys_signalfd
-	.long _sys_timerfd
+	.long _sys_ni_syscall
 	.long _sys_eventfd	/* 350 */
 	.long _sys_pread64
 	.long _sys_pwrite64
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 7f0be4c..27b082a 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -150,6 +150,7 @@
 	  Width in bytes of the Flash bus (1, 2 or 4). Is usually 2.
 
 source arch/cris/arch-v10/Kconfig
+source arch/cris/arch-v32/Kconfig
 
 endmenu
 
@@ -157,8 +158,8 @@
 
 # bring in ETRAX built-in drivers
 menu "Drivers for built-in interfaces"
-# arch/cris/arch is a symlink to correct arch (arch-v10 or arch-v32)
-source arch/cris/arch/drivers/Kconfig
+source arch/cris/arch-v10/drivers/Kconfig
+source arch/cris/arch-v32/drivers/Kconfig
 
 endmenu
 
diff --git a/arch/cris/arch-v10/Kconfig b/arch/cris/arch-v10/Kconfig
index f1ce6f6..1d61fae 100644
--- a/arch/cris/arch-v10/Kconfig
+++ b/arch/cris/arch-v10/Kconfig
@@ -1,3 +1,5 @@
+if ETRAX_ARCH_V10
+
 # ETRAX 100LX v1 has a MMU "feature" requiring a low mapping
 config CRIS_LOW_MAP
 	bool
@@ -451,3 +453,5 @@
 	default "25"
 	help
 	  Configure where power button is connected.
+
+endif
diff --git a/arch/cris/arch-v10/drivers/Kconfig b/arch/cris/arch-v10/drivers/Kconfig
index e3c0f29..96740ef 100644
--- a/arch/cris/arch-v10/drivers/Kconfig
+++ b/arch/cris/arch-v10/drivers/Kconfig
@@ -1,3 +1,5 @@
+if ETRAX_ARCH_V10
+
 config ETRAX_ETHERNET
 	bool "Ethernet support"
 	depends on ETRAX_ARCH_V10
@@ -806,3 +808,5 @@
 	  1 = 2kohm, 2 = 4kohm, 3 = 4kohm
 	  4 = 1 diode, 8 = 2 diodes
 	  Allowed values are (increasing current): 0, 11, 10, 9, 7, 6, 5
+
+endif
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index ec62c95..d1361dc 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -1167,7 +1167,7 @@
 	.long sys_epoll_pwait
 	.long sys_utimensat		/* 320 */
 	.long sys_signalfd
-	.long sys_timerfd
+	.long sys_ni_syscall
 	.long sys_eventfd
 	.long sys_fallocate
 
diff --git a/arch/cris/arch-v32/Kconfig b/arch/cris/arch-v32/Kconfig
index 4f79d8e..d8acaa9 100644
--- a/arch/cris/arch-v32/Kconfig
+++ b/arch/cris/arch-v32/Kconfig
@@ -1,3 +1,5 @@
+if ETRAX_ARCH_V32
+
 config ETRAX_DRAM_VIRTUAL_BASE
 	hex
 	depends on ETRAX_ARCH_V32
@@ -294,3 +296,5 @@
 	help
 	  Configures the initial data for the general port E bits.  Most
 	  products should use 00000 here.
+
+endif
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 9bccb5e..c329cce 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -1,3 +1,5 @@
+if ETRAX_ARCH_V32
+
 config ETRAX_ETHERNET
 	bool "Ethernet support"
 	depends on ETRAX_ARCH_V32
@@ -610,3 +612,5 @@
 	help
 	  This option enables a driver for the stream co-processor
 	  for cryptographic operations.
+
+endif
diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c
index 66f9500..e036465 100644
--- a/arch/cris/arch-v32/drivers/pci/dma.c
+++ b/arch/cris/arch-v32/drivers/pci/dma.c
@@ -93,7 +93,7 @@
 
 	dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
 	if (!dev->dma_mem)
-		goto out;
+		goto iounmap_out;
 	dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
 	if (!dev->dma_mem->bitmap)
 		goto free1_out;
@@ -110,6 +110,8 @@
 
  free1_out:
 	kfree(dev->dma_mem);
+ iounmap_out:
+	iounmap(mem_base);
  out:
 	return 0;
 }
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index bf0468c..96f7d70 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -138,6 +138,15 @@
 
 endchoice
 
+config PAGE_OFFSET
+	hex
+	default 0x20000000 if UCPAGE_OFFSET_20000000
+	default 0x40000000 if UCPAGE_OFFSET_40000000
+	default 0x60000000 if UCPAGE_OFFSET_60000000
+	default 0x80000000 if UCPAGE_OFFSET_80000000
+	default 0xA0000000 if UCPAGE_OFFSET_A0000000
+	default 0xC0000000
+
 config PROTECT_KERNEL
 	bool "Protect core kernel against userspace"
 	depends on !MMU
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index f42b328..ef7527b 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -13,7 +13,7 @@
 
 jiffies = jiffies_64 + 4;
 
-__page_offset = 0xc0000000;		/* start of area covered by struct pages */
+__page_offset = CONFIG_PAGE_OFFSET;	/* start of area covered by struct pages */
 __kernel_image_start = __page_offset;	/* address at which kernel image resides */
 
 SECTIONS
diff --git a/arch/frv/mm/mmu-context.c b/arch/frv/mm/mmu-context.c
index 1530a411..81757d5 100644
--- a/arch/frv/mm/mmu-context.c
+++ b/arch/frv/mm/mmu-context.c
@@ -181,7 +181,7 @@
 
 	/* get a handle on the mm_struct */
 	read_lock(&tasklist_lock);
-	tsk = find_task_by_pid(pid);
+	tsk = find_task_by_vpid(pid);
 	if (tsk) {
 		ret = -EINVAL;
 
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index 7787c3c..1a2e5c8 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -140,7 +140,7 @@
 	return pgd;
 }
 
-void pgd_free(pgd_t *pgd)
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	/* in the non-PAE case, clear_page_tables() clears user pgd entries */
  	quicklist_free(0, pgd_dtor, pgd);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 8c0ae4f..a944454 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1265,7 +1265,7 @@
  * the sglist do both.
  */
 static SBA_INLINE int
-sba_coalesce_chunks( struct ioc *ioc,
+sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
 	struct scatterlist *startsg,
 	int nents)
 {
@@ -1275,6 +1275,7 @@
 	struct scatterlist *dma_sg;        /* next DMA stream head */
 	unsigned long dma_offset, dma_len; /* start/len of DMA stream */
 	int n_mappings = 0;
+	unsigned int max_seg_size = dma_get_max_seg_size(dev);
 
 	while (nents > 0) {
 		unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
@@ -1314,6 +1315,9 @@
 			    > DMA_CHUNK_SIZE)
 				break;
 
+			if (dma_len + startsg->length > max_seg_size)
+				break;
+
 			/*
 			** Then look for virtually contiguous blocks.
 			**
@@ -1441,7 +1445,7 @@
 	** w/o this association, we wouldn't have coherent DMA!
 	** Access to the virtual address is what forces a two pass algorithm.
 	*/
-	coalesced = sba_coalesce_chunks(ioc, sglist, nents);
+	coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents);
 
 	/*
 	** Program the I/O Pdir
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index c36f43c..f5d3efb 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1586,7 +1586,7 @@
 	data8 sys_epoll_pwait			// 1305
 	data8 sys_utimensat
 	data8 sys_signalfd
-	data8 sys_timerfd
+	data8 sys_ni_syscall
 	data8 sys_eventfd
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/arch/m32r/boot/compressed/m32r_sio.c b/arch/m32r/boot/compressed/m32r_sio.c
index ee3c8be..01d877c 100644
--- a/arch/m32r/boot/compressed/m32r_sio.c
+++ b/arch/m32r/boot/compressed/m32r_sio.c
@@ -17,7 +17,7 @@
 	return 0;
 }
 
-#if defined(CONFIG_PLAT_M32700UT_Alpha) || defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_OPSPUT)
+#if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_OPSPUT)
 #include <asm/m32r.h>
 #include <asm/io.h>
 
@@ -52,7 +52,7 @@
 	}
 	*BOOT_SIO0TXB = c;
 }
-#else /* !(CONFIG_PLAT_M32700UT_Alpha) && !(CONFIG_PLAT_M32700UT) */
+#else /* !(CONFIG_PLAT_M32700UT) */
 #if defined(CONFIG_PLAT_MAPPI2)
 #define SIO0STS	(volatile unsigned short *)(0xa0efd000 + 14)
 #define SIO0TXB	(volatile unsigned short *)(0xa0efd000 + 30)
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 8236e42..ffabd01 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -577,20 +577,6 @@
 	depends on INPUT_ADBHID
 	default y
 
-config MAC_ADBKEYCODES
-	bool "Support for ADB raw keycodes"
-	depends on INPUT_ADBHID
-	help
-	  This provides support for sending raw ADB keycodes to console
-	  devices.  This is the default up to 2.4.0, but in future this may be
-	  phased out in favor of generic Linux keycodes.  If you say Y here,
-	  you can dynamically switch via the
-	  /proc/sys/dev/mac_hid/keyboard_sends_linux_keycodes
-	  sysctl and with the "keyboard_sends_linux_keycodes=" kernel
-	  argument.
-
-	  If unsure, say Y here.
-
 config ADB_KEYBOARD
 	bool "Support for ADB keyboard (old driver)"
 	depends on MAC && !INPUT_ADBHID
diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index 4a1bd44..2cba605 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -13,16 +13,15 @@
 # Copyright (C) 1994 by Hamish Macdonald
 #
 
-# test for cross compiling
-COMPILE_ARCH = $(shell uname -m)
-
 # override top level makefile
 AS += -m68020
 LDFLAGS := -m m68kelf
 LDFLAGS_MODULE += -T $(srctree)/arch/m68k/kernel/module.lds
-ifneq ($(COMPILE_ARCH),$(ARCH))
-	# prefix for cross-compiling binaries
-	CROSS_COMPILE = m68k-linux-gnu-
+ifneq ($(SUBARCH),$(ARCH))
+	ifeq ($(CROSS_COMPILE),)
+		CROSS_COMPILE := $(call cc-cross-prefix, \
+			m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-)
+	endif
 endif
 
 ifdef CONFIG_SUN3
diff --git a/arch/m68k/amiga/Makefile b/arch/m68k/amiga/Makefile
index 8b41565..6a0d765 100644
--- a/arch/m68k/amiga/Makefile
+++ b/arch/m68k/amiga/Makefile
@@ -2,6 +2,6 @@
 # Makefile for Linux arch/m68k/amiga source directory
 #
 
-obj-y		:= config.o amiints.o cia.o chipram.o amisound.o amiga_ksyms.o
+obj-y		:= config.o amiints.o cia.o chipram.o amisound.o
 
 obj-$(CONFIG_AMIGA_PCMCIA)	+= pcmcia.o
diff --git a/arch/m68k/amiga/amiga_ksyms.c b/arch/m68k/amiga/amiga_ksyms.c
deleted file mode 100644
index 7fdcf6b..0000000
--- a/arch/m68k/amiga/amiga_ksyms.c
+++ /dev/null
@@ -1,33 +0,0 @@
-#include <linux/module.h>
-#include <linux/types.h>
-#include <asm/ptrace.h>
-#include <asm/amigahw.h>
-#include <asm/amigaints.h>
-#include <asm/amipcmcia.h>
-
-extern volatile u_short amiga_audio_min_period;
-extern u_short amiga_audio_period;
-
-/*
- * Add things here when you find the need for it.
- */
-EXPORT_SYMBOL(amiga_model);
-EXPORT_SYMBOL(amiga_chipset);
-EXPORT_SYMBOL(amiga_hw_present);
-EXPORT_SYMBOL(amiga_eclock);
-EXPORT_SYMBOL(amiga_colorclock);
-EXPORT_SYMBOL(amiga_chip_alloc);
-EXPORT_SYMBOL(amiga_chip_free);
-EXPORT_SYMBOL(amiga_chip_avail);
-EXPORT_SYMBOL(amiga_chip_size);
-EXPORT_SYMBOL(amiga_audio_period);
-EXPORT_SYMBOL(amiga_audio_min_period);
-
-#ifdef CONFIG_AMIGA_PCMCIA
-  EXPORT_SYMBOL(pcmcia_reset);
-  EXPORT_SYMBOL(pcmcia_copy_tuple);
-  EXPORT_SYMBOL(pcmcia_program_voltage);
-  EXPORT_SYMBOL(pcmcia_access_speed);
-  EXPORT_SYMBOL(pcmcia_write_enable);
-  EXPORT_SYMBOL(pcmcia_write_disable);
-#endif
diff --git a/arch/m68k/amiga/amisound.c b/arch/m68k/amiga/amisound.c
index 1f5bfb5..61e5c54 100644
--- a/arch/m68k/amiga/amisound.c
+++ b/arch/m68k/amiga/amisound.c
@@ -12,6 +12,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/string.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 #include <asm/amigahw.h>
@@ -21,7 +22,7 @@
 	0,  39,  75,  103,  121,  127,  121,  103,  75,  39,
 	0, -39, -75, -103, -121, -127, -121, -103, -75, -39
 };
-#define DATA_SIZE	(sizeof(sine_data)/sizeof(sine_data[0]))
+#define DATA_SIZE	ARRAY_SIZE(sine_data)
 
 #define custom amiga_custom
 
@@ -31,6 +32,7 @@
      */
 
 volatile unsigned short amiga_audio_min_period = 124; /* Default for pre-OCS */
+EXPORT_SYMBOL(amiga_audio_min_period);
 
 #define MAX_PERIOD	(65535)
 
@@ -40,6 +42,7 @@
      */
 
 unsigned short amiga_audio_period = MAX_PERIOD;
+EXPORT_SYMBOL(amiga_audio_period);
 
 static unsigned long clock_constant;
 
diff --git a/arch/m68k/amiga/chipram.c b/arch/m68k/amiga/chipram.c
index fa015d8..d10726f 100644
--- a/arch/m68k/amiga/chipram.c
+++ b/arch/m68k/amiga/chipram.c
@@ -13,10 +13,13 @@
 #include <linux/ioport.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/module.h>
+
 #include <asm/page.h>
 #include <asm/amigahw.h>
 
 unsigned long amiga_chip_size;
+EXPORT_SYMBOL(amiga_chip_size);
 
 static struct resource chipram_res = {
     .name = "Chip RAM", .start = CHIP_PHYSADDR
@@ -67,6 +70,7 @@
 #endif
     return (void *)ZTWO_VADDR(res->start);
 }
+EXPORT_SYMBOL(amiga_chip_alloc);
 
 
     /*
@@ -120,6 +124,7 @@
     }
     printk("amiga_chip_free: trying to free nonexistent region at %p\n", ptr);
 }
+EXPORT_SYMBOL(amiga_chip_free);
 
 
 unsigned long amiga_chip_avail(void)
@@ -129,3 +134,5 @@
 #endif
 	return chipavail;
 }
+EXPORT_SYMBOL(amiga_chip_avail);
+
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 3574853..50f5daa 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -23,6 +23,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/zorro.h>
+#include <linux/module.h>
 
 #include <asm/bootinfo.h>
 #include <asm/setup.h>
@@ -36,13 +37,24 @@
 #include <asm/io.h>
 
 unsigned long amiga_model;
+EXPORT_SYMBOL(amiga_model);
+
 unsigned long amiga_eclock;
+EXPORT_SYMBOL(amiga_eclock);
+
 unsigned long amiga_masterclock;
+
 unsigned long amiga_colorclock;
+EXPORT_SYMBOL(amiga_colorclock);
+
 unsigned long amiga_chipset;
+EXPORT_SYMBOL(amiga_chipset);
+
 unsigned char amiga_vblank;
 unsigned char amiga_psfreq;
+
 struct amiga_hw_present amiga_hw_present;
+EXPORT_SYMBOL(amiga_hw_present);
 
 static char s_a500[] __initdata = "A500";
 static char s_a500p[] __initdata = "A500+";
diff --git a/arch/m68k/amiga/pcmcia.c b/arch/m68k/amiga/pcmcia.c
index 186662c..7106f0c 100644
--- a/arch/m68k/amiga/pcmcia.c
+++ b/arch/m68k/amiga/pcmcia.c
@@ -15,6 +15,8 @@
 #include <linux/types.h>
 #include <linux/jiffies.h>
 #include <linux/timer.h>
+#include <linux/module.h>
+
 #include <asm/amigayle.h>
 #include <asm/amipcmcia.h>
 
@@ -30,6 +32,7 @@
 	while (time_before(jiffies, reset_start_time + 1*HZ/100));
 	b = gayle_reset;
 }
+EXPORT_SYMBOL(pcmcia_reset);
 
 
 /* copy a tuple, including tuple header. return nb bytes copied */
@@ -61,6 +64,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(pcmcia_copy_tuple);
 
 void pcmcia_program_voltage(int voltage)
 {
@@ -84,6 +88,7 @@
 	gayle.config = cfg_byte;
 
 }
+EXPORT_SYMBOL(pcmcia_program_voltage);
 
 void pcmcia_access_speed(int speed)
 {
@@ -101,13 +106,17 @@
 	cfg_byte = (cfg_byte & 0xf3) | s;
 	gayle.config = cfg_byte;
 }
+EXPORT_SYMBOL(pcmcia_access_speed);
 
 void pcmcia_write_enable(void)
 {
 	gayle.cardstatus = GAYLE_CS_WR|GAYLE_CS_DA;
 }
+EXPORT_SYMBOL(pcmcia_write_enable);
 
 void pcmcia_write_disable(void)
 {
 	gayle.cardstatus = 0;
 }
+EXPORT_SYMBOL(pcmcia_write_disable);
+
diff --git a/arch/m68k/atari/Makefile b/arch/m68k/atari/Makefile
index 2cb8619..2cd905e 100644
--- a/arch/m68k/atari/Makefile
+++ b/arch/m68k/atari/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-y		:= config.o time.o debug.o ataints.o stdma.o \
-			atasound.o stram.o atari_ksyms.o
+			atasound.o stram.o
 
 ifeq ($(CONFIG_PCI),y)
 obj-$(CONFIG_HADES)	+= hades-pci.o
diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c
index b85ca22..b45593a 100644
--- a/arch/m68k/atari/ataints.c
+++ b/arch/m68k/atari/ataints.c
@@ -40,6 +40,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 #include <asm/traps.h>
@@ -446,6 +447,7 @@
 	free_vme_vec_bitmap |= 1 << i;
 	return VME_SOURCE_BASE + i;
 }
+EXPORT_SYMBOL(atari_register_vme_int);
 
 
 void atari_unregister_vme_int(unsigned long irq)
@@ -455,5 +457,6 @@
 		free_vme_vec_bitmap &= ~(1 << irq);
 	}
 }
+EXPORT_SYMBOL(atari_unregister_vme_int);
 
 
diff --git a/arch/m68k/atari/atari_ksyms.c b/arch/m68k/atari/atari_ksyms.c
deleted file mode 100644
index a047571..0000000
--- a/arch/m68k/atari/atari_ksyms.c
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <linux/module.h>
-
-#include <asm/ptrace.h>
-#include <asm/traps.h>
-#include <asm/atarihw.h>
-#include <asm/atariints.h>
-#include <asm/atarikb.h>
-#include <asm/atari_joystick.h>
-#include <asm/atari_stdma.h>
-#include <asm/atari_stram.h>
-
-extern void atari_microwire_cmd( int cmd );
-extern int atari_MFP_init_done;
-extern int atari_SCC_init_done;
-extern int atari_SCC_reset_done;
-
-EXPORT_SYMBOL(atari_mch_cookie);
-EXPORT_SYMBOL(atari_mch_type);
-EXPORT_SYMBOL(atari_hw_present);
-EXPORT_SYMBOL(atari_switches);
-EXPORT_SYMBOL(atari_dont_touch_floppy_select);
-EXPORT_SYMBOL(atari_register_vme_int);
-EXPORT_SYMBOL(atari_unregister_vme_int);
-EXPORT_SYMBOL(stdma_lock);
-EXPORT_SYMBOL(stdma_release);
-EXPORT_SYMBOL(stdma_others_waiting);
-EXPORT_SYMBOL(stdma_islocked);
-EXPORT_SYMBOL(atari_stram_alloc);
-EXPORT_SYMBOL(atari_stram_free);
-
-EXPORT_SYMBOL(atari_MFP_init_done);
-EXPORT_SYMBOL(atari_SCC_init_done);
-EXPORT_SYMBOL(atari_SCC_reset_done);
-
-EXPORT_SYMBOL(atari_microwire_cmd);
diff --git a/arch/m68k/atari/atasound.c b/arch/m68k/atari/atasound.c
index ee04250..d266fe8 100644
--- a/arch/m68k/atari/atasound.c
+++ b/arch/m68k/atari/atasound.c
@@ -22,6 +22,7 @@
 #include <linux/fcntl.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 
 #include <asm/atarihw.h>
 #include <asm/system.h>
@@ -43,6 +44,7 @@
 	while( tt_microwire.mask != 0x7ff)
 		;
 }
+EXPORT_SYMBOL(atari_microwire_cmd);
 
 
 /* PSG base frequency */
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index e40e5dc..5945e15 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -31,6 +31,7 @@
 #include <linux/delay.h>
 #include <linux/ioport.h>
 #include <linux/vt_kern.h>
+#include <linux/module.h>
 
 #include <asm/bootinfo.h>
 #include <asm/setup.h>
@@ -43,10 +44,20 @@
 #include <asm/io.h>
 
 u_long atari_mch_cookie;
+EXPORT_SYMBOL(atari_mch_cookie);
+
 u_long atari_mch_type;
+EXPORT_SYMBOL(atari_mch_type);
+
 struct atari_hw_present atari_hw_present;
+EXPORT_SYMBOL(atari_hw_present);
+
 u_long atari_switches;
+EXPORT_SYMBOL(atari_switches);
+
 int atari_dont_touch_floppy_select;
+EXPORT_SYMBOL(atari_dont_touch_floppy_select);
+
 int atari_rtc_year_offset;
 
 /* local function prototypes */
diff --git a/arch/m68k/atari/debug.c b/arch/m68k/atari/debug.c
index fbeed8c..043ddbc 100644
--- a/arch/m68k/atari/debug.c
+++ b/arch/m68k/atari/debug.c
@@ -15,17 +15,23 @@
 #include <linux/console.h>
 #include <linux/init.h>
 #include <linux/delay.h>
+#include <linux/module.h>
 
 #include <asm/atarihw.h>
 #include <asm/atariints.h>
 
 /* Flag that Modem1 port is already initialized and used */
 int atari_MFP_init_done;
+EXPORT_SYMBOL(atari_MFP_init_done);
+
 /* Flag that Modem1 port is already initialized and used */
 int atari_SCC_init_done;
+EXPORT_SYMBOL(atari_SCC_init_done);
+
 /* Can be set somewhere, if a SCC master reset has already be done and should
  * not be repeated; used by kgdb */
 int atari_SCC_reset_done;
+EXPORT_SYMBOL(atari_SCC_reset_done);
 
 static struct console atari_console_driver = {
 	.name	= "debug",
diff --git a/arch/m68k/atari/hades-pci.c b/arch/m68k/atari/hades-pci.c
index bee2b14..2bbabc0 100644
--- a/arch/m68k/atari/hades-pci.c
+++ b/arch/m68k/atari/hades-pci.c
@@ -376,8 +376,8 @@
 	 */
 
 	bus = kzalloc(sizeof(struct pci_bus_info), GFP_KERNEL);
-	if (!bus)
-		return NULL;
+	if (unlikely(!bus))
+		goto iounmap_base_virt;
 
 	/*
 	 * Claim resources. The m68k has no separate I/O space, both
@@ -385,43 +385,25 @@
 	 * the I/O resources are requested in memory space as well.
 	 */
 
-	if (request_resource(&iomem_resource, &config_space) != 0)
-	{
-		kfree(bus);
-		return NULL;
-	}
+	if (unlikely(request_resource(&iomem_resource, &config_space) != 0))
+		goto free_bus;
 
-	if (request_resource(&iomem_resource, &io_space) != 0)
-	{
-		release_resource(&config_space);
-		kfree(bus);
-		return NULL;
-	}
+	if (unlikely(request_resource(&iomem_resource, &io_space) != 0))
+		goto release_config_space;
 
 	bus->mem_space.start = HADES_MEM_BASE;
 	bus->mem_space.end = HADES_MEM_BASE + HADES_MEM_SIZE - 1;
 	bus->mem_space.name = pci_mem_name;
 #if 1
-	if (request_resource(&iomem_resource, &bus->mem_space) != 0)
-	{
-		release_resource(&io_space);
-		release_resource(&config_space);
-		kfree(bus);
-		return NULL;
-	}
+	if (unlikely(request_resource(&iomem_resource, &bus->mem_space) != 0))
+		goto release_io_space;
 #endif
 	bus->io_space.start = pci_io_base_virt;
 	bus->io_space.end = pci_io_base_virt + HADES_VIRT_IO_SIZE - 1;
 	bus->io_space.name = pci_io_name;
 #if 1
-	if (request_resource(&ioport_resource, &bus->io_space) != 0)
-	{
-		release_resource(&bus->mem_space);
-		release_resource(&io_space);
-		release_resource(&config_space);
-		kfree(bus);
-		return NULL;
-	}
+	if (unlikely(request_resource(&ioport_resource, &bus->io_space) != 0))
+		goto release_bus_mem_space;
 #endif
 	/*
 	 * Set hardware dependent functions.
@@ -438,5 +420,21 @@
 	tt_mfp.active_edge &= ~0x27;
 
 	return bus;
+
+release_bus_mem_space:
+	release_resource(&bus->mem_space);
+release_io_space:
+	release_resource(&io_space);
+release_config_space:
+	release_resource(&config_space);
+free_bus:
+	kfree(bus);
+iounmap_base_virt:
+	iounmap((void *)pci_io_base_virt);
+
+	for (i = 0; i < N_SLOTS; i++)
+		iounmap((void *)pci_conf_base_virt[i]);
+
+	return NULL;
 }
 #endif
diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c
index ab3fd52..d1bd029 100644
--- a/arch/m68k/atari/stdma.c
+++ b/arch/m68k/atari/stdma.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/wait.h>
+#include <linux/module.h>
 
 #include <asm/atari_stdma.h>
 #include <asm/atariints.h>
@@ -91,6 +92,7 @@
 	stdma_isr_data = data;
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL(stdma_lock);
 
 
 /*
@@ -117,6 +119,7 @@
 
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL(stdma_release);
 
 
 /*
@@ -134,6 +137,7 @@
 {
 	return waitqueue_active(&stdma_wait);
 }
+EXPORT_SYMBOL(stdma_others_waiting);
 
 
 /*
@@ -155,6 +159,7 @@
 {
 	return stdma_locked;
 }
+EXPORT_SYMBOL(stdma_islocked);
 
 
 /*
diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c
index bf4588c..8dda651 100644
--- a/arch/m68k/atari/stram.c
+++ b/arch/m68k/atari/stram.c
@@ -20,6 +20,7 @@
 #include <linux/bootmem.h>
 #include <linux/mount.h>
 #include <linux/blkdev.h>
+#include <linux/module.h>
 
 #include <asm/setup.h>
 #include <asm/machdep.h>
@@ -208,6 +209,7 @@
 	}
 	return( addr );
 }
+EXPORT_SYMBOL(atari_stram_alloc);
 
 void atari_stram_free( void *addr )
 
@@ -237,6 +239,7 @@
 	printk( KERN_ERR "atari_stram_free: cannot free block at %p "
 			"(called from %p)\n", addr, __builtin_return_address(0) );
 }
+EXPORT_SYMBOL(atari_stram_free);
 
 
 /* ------------------------------------------------------------------------ */
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 15b80ab..ff9dffa 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -678,7 +678,6 @@
 #
 CONFIG_MAC_SCC=y
 CONFIG_MAC_HID=y
-CONFIG_MAC_ADBKEYCODES=y
 CONFIG_SERIAL_CONSOLE=y
 
 #
diff --git a/arch/m68k/hp300/Makefile b/arch/m68k/hp300/Makefile
index 288b9c6..96d4244 100644
--- a/arch/m68k/hp300/Makefile
+++ b/arch/m68k/hp300/Makefile
@@ -2,4 +2,4 @@
 # Makefile for Linux arch/m68k/hp300 source directory
 #
 
-obj-y		:= ksyms.o config.o time.o reboot.o
+obj-y		:= config.o time.o reboot.o
diff --git a/arch/m68k/hp300/ksyms.c b/arch/m68k/hp300/ksyms.c
deleted file mode 100644
index 8202830..0000000
--- a/arch/m68k/hp300/ksyms.c
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- *  linux/arch/m68k/hp300/ksyms.c
- *
- *  Copyright (C) 1998 Philip Blundell <philb@gnu.org>
- *
- *  This file contains the HP300-specific kernel symbols.  None yet. :-)
- */
-
-#include <linux/module.h>
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 918f5db..6dfa3b3 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -742,7 +742,7 @@
 	.long sys_epoll_pwait		/* 315 */
 	.long sys_utimensat
 	.long sys_signalfd
-	.long sys_timerfd
+	.long sys_ni_syscall
 	.long sys_eventfd
 	.long sys_fallocate		/* 320 */
 
diff --git a/arch/m68k/mac/Makefile b/arch/m68k/mac/Makefile
index 995a09d9..1d265ba 100644
--- a/arch/m68k/mac/Makefile
+++ b/arch/m68k/mac/Makefile
@@ -3,4 +3,4 @@
 #
 
 obj-y		:= config.o bootparse.o macints.o iop.o via.o oss.o psc.o \
-			baboon.o macboing.o debug.o misc.o mac_ksyms.o
+			baboon.o macboing.o debug.o misc.o
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 01b468b..735a49b 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -58,8 +58,6 @@
 
 extern struct mem_info m68k_ramdisk;
 
-extern char m68k_command_line[CL_SIZE];
-
 void *mac_env;					/* Loaded by the boot asm */
 
 /* The phys. video addr. - might be bogus on some machines */
diff --git a/arch/m68k/mac/mac_ksyms.c b/arch/m68k/mac/mac_ksyms.c
deleted file mode 100644
index 6e37ceb..0000000
--- a/arch/m68k/mac/mac_ksyms.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <linux/module.h>
-#include <asm/ptrace.h>
-#include <asm/traps.h>
-
-/* Says whether we're using A/UX interrupts or not */
-extern int via_alt_mapping;
-
-EXPORT_SYMBOL(via_alt_mapping);
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 8df270e..fa485df 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -28,6 +28,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/ide.h>
+#include <linux/module.h>
 
 #include <asm/bootinfo.h>
 #include <asm/macintosh.h>
@@ -41,7 +42,9 @@
 /* See note in mac_via.h about how this is possibly not useful */
 volatile long *via_memory_bogon=(long *)&via_memory_bogon;
 #endif
-int rbv_present, via_alt_mapping;
+int rbv_present;
+int via_alt_mapping;
+EXPORT_SYMBOL(via_alt_mapping);
 __u8 rbv_clear;
 
 /*
diff --git a/arch/m68k/mvme16x/Makefile b/arch/m68k/mvme16x/Makefile
index 950e82f..edb3f6e 100644
--- a/arch/m68k/mvme16x/Makefile
+++ b/arch/m68k/mvme16x/Makefile
@@ -2,4 +2,4 @@
 # Makefile for Linux arch/m68k/mvme16x source directory
 #
 
-obj-y		:= config.o rtc.o mvme16x_ksyms.o
+obj-y		:= config.o rtc.o
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index daa7851..24cbc30 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -25,6 +25,7 @@
 #include <linux/genhd.h>
 #include <linux/rtc.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 
 #include <asm/bootinfo.h>
 #include <asm/system.h>
@@ -58,6 +59,7 @@
 
 
 unsigned short mvme16x_config;
+EXPORT_SYMBOL(mvme16x_config);
 
 
 int mvme16x_parse_bootinfo(const struct bi_record *bi)
diff --git a/arch/m68k/mvme16x/mvme16x_ksyms.c b/arch/m68k/mvme16x/mvme16x_ksyms.c
deleted file mode 100644
index 4a8a363..0000000
--- a/arch/m68k/mvme16x/mvme16x_ksyms.c
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <linux/module.h>
-#include <linux/types.h>
-#include <asm/ptrace.h>
-#include <asm/mvme16xhw.h>
-
-EXPORT_SYMBOL(mvme16x_config);
diff --git a/arch/m68knommu/Kconfig.debug b/arch/m68knommu/Kconfig.debug
index 9ff47bd..ed6d9a83 100644
--- a/arch/m68knommu/Kconfig.debug
+++ b/arch/m68knommu/Kconfig.debug
@@ -21,13 +21,6 @@
 	default 'console=ttyS0,19200'
 	depends on BOOTPARAM
 
-config DUMPTOFLASH
-	bool "Panic/Dump to FLASH"
-	depends on COLDFIRE
-	help
-	  Dump any panic of trap output into a flash memory segment
-	  for later analysis.
-
 config NO_KERNEL_MSG
 	bool "Suppress Kernel BUG Messages"
 	help
diff --git a/arch/m68knommu/defconfig b/arch/m68knommu/defconfig
index 5a0ecaa..6481130 100644
--- a/arch/m68knommu/defconfig
+++ b/arch/m68knommu/defconfig
@@ -597,7 +597,6 @@
 # CONFIG_FULLDEBUG is not set
 # CONFIG_HIGHPROFILE is not set
 # CONFIG_BOOTPARAM is not set
-# CONFIG_DUMPTOFLASH is not set
 # CONFIG_NO_KERNEL_MSG is not set
 # CONFIG_BDM_DISABLE is not set
 
diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68knommu/kernel/m68k_ksyms.c
index f795062..53fad14 100644
--- a/arch/m68knommu/kernel/m68k_ksyms.c
+++ b/arch/m68knommu/kernel/m68k_ksyms.c
@@ -24,14 +24,6 @@
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(dump_fpu);
-EXPORT_SYMBOL(strnlen);
-EXPORT_SYMBOL(strrchr);
-EXPORT_SYMBOL(strstr);
-EXPORT_SYMBOL(strchr);
-EXPORT_SYMBOL(strcat);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strcmp);
-EXPORT_SYMBOL(strncmp);
 
 EXPORT_SYMBOL(ip_fast_csum);
 
@@ -46,9 +38,6 @@
    it's OK to leave it out of version control.  */
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcmp);
-EXPORT_SYMBOL(memscan);
-EXPORT_SYMBOL(memmove);
 
 EXPORT_SYMBOL(__down_failed);
 EXPORT_SYMBOL(__down_failed_interruptible);
diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c
index 332345d..81507c5 100644
--- a/arch/m68knommu/kernel/setup.c
+++ b/arch/m68knommu/kernel/setup.c
@@ -64,9 +64,6 @@
 #ifdef CONFIG_M68VZ328
 	#define CPU "MC68VZ328"
 #endif
-#ifdef CONFIG_M68332
-	#define CPU "MC68332"
-#endif
 #ifdef CONFIG_M68360
 	#define CPU "MC68360"
 #endif
diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 9620093..1b02b88 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -336,7 +336,7 @@
 	.long sys_epoll_pwait		/* 315 */
 	.long sys_utimensat
 	.long sys_signalfd
-	.long sys_timerfd
+	.long sys_ni_syscall
 	.long sys_eventfd
 	.long sys_fallocate		/* 320 */
 
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 82480a1..f798139 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -660,7 +660,7 @@
 	sys	sys_ioprio_get		2	/* 4315 */
 	sys	sys_utimensat		4
 	sys	sys_signalfd		3
-	sys	sys_timerfd		4
+	sys	sys_ni_syscall		0
 	sys	sys_eventfd		1
 	sys	sys_fallocate		6	/* 4320 */
 	.endm
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index c2c1087..a626be6 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -475,7 +475,7 @@
 	PTR	sys_ioprio_get
 	PTR	sys_utimensat			/* 5275 */
 	PTR	sys_signalfd
-	PTR	sys_timerfd
+	PTR	sys_ni_syscall
 	PTR	sys_eventfd
 	PTR	sys_fallocate
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 01993ec..9d5bcaf 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -401,7 +401,7 @@
 	PTR	sys_ioprio_get
 	PTR	compat_sys_utimensat
 	PTR	compat_sys_signalfd		/* 5280 */
-	PTR	compat_sys_timerfd
+	PTR	sys_ni_syscall
 	PTR	sys_eventfd
 	PTR	sys_fallocate
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index dd68afc..fd2019c 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -523,7 +523,7 @@
 	PTR	sys_ioprio_get			/* 4315 */
 	PTR	compat_sys_utimensat
 	PTR	compat_sys_signalfd
-	PTR	compat_sys_timerfd
+	PTR	sys_ni_syscall
 	PTR	sys_eventfd
 	PTR	sys32_fallocate			/* 4320 */
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b94d450..cf030b0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -256,6 +256,9 @@
 
 	  Most drivers don't have this problem; it is safe to say Y here.
 
+config IOMMU_HELPER
+	def_bool PPC64
+
 config HOTPLUG_CPU
 	bool "Support for enabling/disabling CPUs"
 	depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
index 8423907..3a317cb 100644
--- a/arch/powerpc/kernel/dma_64.c
+++ b/arch/powerpc/kernel/dma_64.c
@@ -31,8 +31,8 @@
 static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
 				      dma_addr_t *dma_handle, gfp_t flag)
 {
-	return iommu_alloc_coherent(dev->archdata.dma_data, size, dma_handle,
-				    device_to_mask(dev), flag,
+	return iommu_alloc_coherent(dev, dev->archdata.dma_data, size,
+				    dma_handle, device_to_mask(dev), flag,
 				    dev->archdata.numa_node);
 }
 
@@ -52,7 +52,7 @@
 				       size_t size,
 				       enum dma_data_direction direction)
 {
-	return iommu_map_single(dev->archdata.dma_data, vaddr, size,
+	return iommu_map_single(dev, dev->archdata.dma_data, vaddr, size,
 			        device_to_mask(dev), direction);
 }
 
@@ -68,7 +68,7 @@
 static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 			    int nelems, enum dma_data_direction direction)
 {
-	return iommu_map_sg(dev->archdata.dma_data, sglist, nelems,
+	return iommu_map_sg(dev, sglist, nelems,
 			    device_to_mask(dev), direction);
 }
 
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index a3c406a..8f1f4e5 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -31,6 +31,7 @@
 #include <linux/string.h>
 #include <linux/dma-mapping.h>
 #include <linux/bitops.h>
+#include <linux/iommu-helper.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/iommu.h>
@@ -81,17 +82,19 @@
 __setup("protect4gb=", setup_protect4gb);
 __setup("iommu=", setup_iommu);
 
-static unsigned long iommu_range_alloc(struct iommu_table *tbl,
+static unsigned long iommu_range_alloc(struct device *dev,
+				       struct iommu_table *tbl,
                                        unsigned long npages,
                                        unsigned long *handle,
                                        unsigned long mask,
                                        unsigned int align_order)
 { 
-	unsigned long n, end, i, start;
+	unsigned long n, end, start;
 	unsigned long limit;
 	int largealloc = npages > 15;
 	int pass = 0;
 	unsigned long align_mask;
+	unsigned long boundary_size;
 
 	align_mask = 0xffffffffffffffffl >> (64 - align_order);
 
@@ -136,14 +139,17 @@
 			start &= mask;
 	}
 
-	n = find_next_zero_bit(tbl->it_map, limit, start);
+	if (dev)
+		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+				      1 << IOMMU_PAGE_SHIFT);
+	else
+		boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT);
+	/* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
 
-	/* Align allocation */
-	n = (n + align_mask) & ~align_mask;
-
-	end = n + npages;
-
-	if (unlikely(end >= limit)) {
+	n = iommu_area_alloc(tbl->it_map, limit, start, npages,
+			     tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT,
+			     align_mask);
+	if (n == -1) {
 		if (likely(pass < 2)) {
 			/* First failure, just rescan the half of the table.
 			 * Second failure, rescan the other half of the table.
@@ -158,14 +164,7 @@
 		}
 	}
 
-	for (i = n; i < end; i++)
-		if (test_bit(i, tbl->it_map)) {
-			start = i+1;
-			goto again;
-		}
-
-	for (i = n; i < end; i++)
-		__set_bit(i, tbl->it_map);
+	end = n + npages;
 
 	/* Bump the hint to a new block for small allocs. */
 	if (largealloc) {
@@ -184,16 +183,17 @@
 	return n;
 }
 
-static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
-		       unsigned int npages, enum dma_data_direction direction,
-		       unsigned long mask, unsigned int align_order)
+static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
+			      void *page, unsigned int npages,
+			      enum dma_data_direction direction,
+			      unsigned long mask, unsigned int align_order)
 {
 	unsigned long entry, flags;
 	dma_addr_t ret = DMA_ERROR_CODE;
 
 	spin_lock_irqsave(&(tbl->it_lock), flags);
 
-	entry = iommu_range_alloc(tbl, npages, NULL, mask, align_order);
+	entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
 
 	if (unlikely(entry == DMA_ERROR_CODE)) {
 		spin_unlock_irqrestore(&(tbl->it_lock), flags);
@@ -224,7 +224,6 @@
 			 unsigned int npages)
 {
 	unsigned long entry, free_entry;
-	unsigned long i;
 
 	entry = dma_addr >> IOMMU_PAGE_SHIFT;
 	free_entry = entry - tbl->it_offset;
@@ -246,9 +245,7 @@
 	}
 
 	ppc_md.tce_free(tbl, entry, npages);
-	
-	for (i = 0; i < npages; i++)
-		__clear_bit(free_entry+i, tbl->it_map);
+	iommu_area_free(tbl->it_map, free_entry, npages);
 }
 
 static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
@@ -270,16 +267,18 @@
 	spin_unlock_irqrestore(&(tbl->it_lock), flags);
 }
 
-int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
+int iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 		 int nelems, unsigned long mask,
 		 enum dma_data_direction direction)
 {
+	struct iommu_table *tbl = dev->archdata.dma_data;
 	dma_addr_t dma_next = 0, dma_addr;
 	unsigned long flags;
 	struct scatterlist *s, *outs, *segstart;
 	int outcount, incount, i;
 	unsigned int align;
 	unsigned long handle;
+	unsigned int max_seg_size;
 
 	BUG_ON(direction == DMA_NONE);
 
@@ -298,6 +297,7 @@
 
 	spin_lock_irqsave(&(tbl->it_lock), flags);
 
+	max_seg_size = dma_get_max_seg_size(dev);
 	for_each_sg(sglist, s, nelems, i) {
 		unsigned long vaddr, npages, entry, slen;
 
@@ -314,7 +314,7 @@
 		if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE &&
 		    (vaddr & ~PAGE_MASK) == 0)
 			align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
-		entry = iommu_range_alloc(tbl, npages, &handle,
+		entry = iommu_range_alloc(dev, tbl, npages, &handle,
 					  mask >> IOMMU_PAGE_SHIFT, align);
 
 		DBG("  - vaddr: %lx, size: %lx\n", vaddr, slen);
@@ -344,7 +344,8 @@
 			/* We cannot merge if:
 			 * - allocated dma_addr isn't contiguous to previous allocation
 			 */
-			if (novmerge || (dma_addr != dma_next)) {
+			if (novmerge || (dma_addr != dma_next) ||
+			    (outs->dma_length + s->length > max_seg_size)) {
 				/* Can't merge: create a new segment */
 				segstart = s;
 				outcount++;
@@ -452,9 +453,6 @@
 struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
 {
 	unsigned long sz;
-	unsigned long start_index, end_index;
-	unsigned long entries_per_4g;
-	unsigned long index;
 	static int welcomed = 0;
 	struct page *page;
 
@@ -476,6 +474,7 @@
 
 #ifdef CONFIG_CRASH_DUMP
 	if (ppc_md.tce_get) {
+		unsigned long index;
 		unsigned long tceval;
 		unsigned long tcecount = 0;
 
@@ -506,23 +505,6 @@
 	ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
 #endif
 
-	/*
-	 * DMA cannot cross 4 GB boundary.  Mark last entry of each 4
-	 * GB chunk as reserved.
-	 */
-	if (protect4gb) {
-		entries_per_4g = 0x100000000l >> IOMMU_PAGE_SHIFT;
-
-		/* Mark the last bit before a 4GB boundary as used */
-		start_index = tbl->it_offset | (entries_per_4g - 1);
-		start_index -= tbl->it_offset;
-
-		end_index = tbl->it_size;
-
-		for (index = start_index; index < end_index - 1; index += entries_per_4g)
-			__set_bit(index, tbl->it_map);
-	}
-
 	if (!welcomed) {
 		printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
 		       novmerge ? "disabled" : "enabled");
@@ -570,9 +552,9 @@
  * need not be page aligned, the dma_addr_t returned will point to the same
  * byte within the page as vaddr.
  */
-dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
-		size_t size, unsigned long mask,
-		enum dma_data_direction direction)
+dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl,
+			    void *vaddr, size_t size, unsigned long mask,
+			    enum dma_data_direction direction)
 {
 	dma_addr_t dma_handle = DMA_ERROR_CODE;
 	unsigned long uaddr;
@@ -589,7 +571,7 @@
 		    ((unsigned long)vaddr & ~PAGE_MASK) == 0)
 			align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
 
-		dma_handle = iommu_alloc(tbl, vaddr, npages, direction,
+		dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
 					 mask >> IOMMU_PAGE_SHIFT, align);
 		if (dma_handle == DMA_ERROR_CODE) {
 			if (printk_ratelimit())  {
@@ -621,8 +603,9 @@
  * Returns the virtual address of the buffer and sets dma_handle
  * to the dma address (mapping) of the first page.
  */
-void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
-		dma_addr_t *dma_handle, unsigned long mask, gfp_t flag, int node)
+void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
+			   size_t size,	dma_addr_t *dma_handle,
+			   unsigned long mask, gfp_t flag, int node)
 {
 	void *ret = NULL;
 	dma_addr_t mapping;
@@ -656,7 +639,7 @@
 	/* Set up tces to cover the allocated range */
 	nio_pages = size >> IOMMU_PAGE_SHIFT;
 	io_order = get_iommu_order(size);
-	mapping = iommu_alloc(tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
+	mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
 			      mask >> IOMMU_PAGE_SHIFT, io_order);
 	if (mapping == DMA_ERROR_CODE) {
 		free_pages((unsigned long)ret, order);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 6448872..f80f90c 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -86,7 +86,7 @@
 	return ret;
 }
 
-void pgd_free(pgd_t *pgd)
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	free_pages((unsigned long)pgd, PGDIR_ORDER);
 }
@@ -123,7 +123,7 @@
 	return ptepage;
 }
 
-void pte_free_kernel(pte_t *pte)
+void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 #ifdef CONFIG_SMP
 	hash_page_sync();
@@ -131,7 +131,7 @@
 	free_page((unsigned long)pte);
 }
 
-void pte_free(struct page *ptepage)
+void pte_free(struct mm_struct *mm, struct page *ptepage)
 {
 #ifdef CONFIG_SMP
 	hash_page_sync();
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index 6a0c6f6..11fa3c7 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -199,7 +199,7 @@
 
 void *iseries_hv_alloc(size_t size, dma_addr_t *dma_handle, gfp_t flag)
 {
-	return iommu_alloc_coherent(&vio_iommu_table, size, dma_handle,
+	return iommu_alloc_coherent(NULL, &vio_iommu_table, size, dma_handle,
 				DMA_32BIT_MASK, flag, -1);
 }
 EXPORT_SYMBOL_GPL(iseries_hv_alloc);
@@ -213,7 +213,7 @@
 dma_addr_t iseries_hv_map(void *vaddr, size_t size,
 			enum dma_data_direction direction)
 {
-	return iommu_map_single(&vio_iommu_table, vaddr, size,
+	return iommu_map_single(NULL, &vio_iommu_table, vaddr, size,
 				DMA_32BIT_MASK, direction);
 }
 
diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
index fadacfd..409fcaa 100644
--- a/arch/ppc/mm/pgtable.c
+++ b/arch/ppc/mm/pgtable.c
@@ -74,7 +74,7 @@
 	return ret;
 }
 
-void pgd_free(pgd_t *pgd)
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	free_pages((unsigned long)pgd, PGDIR_ORDER);
 }
@@ -111,7 +111,7 @@
 	return ptepage;
 }
 
-void pte_free_kernel(pte_t *pte)
+void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 #ifdef CONFIG_SMP
 	hash_page_sync();
@@ -119,7 +119,7 @@
 	free_page((unsigned long)pte);
 }
 
-void pte_free(struct page *ptepage)
+void pte_free(struct mm_struct *mm, struct page *ptepage)
 {
 #ifdef CONFIG_SMP
 	hash_page_sync();
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 82cbffd..92a4f7b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -16,6 +16,9 @@
 config STACKTRACE_SUPPORT
 	def_bool y
 
+config HAVE_LATENCYTOP_SUPPORT
+	def_bool y
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 
@@ -47,6 +50,11 @@
 config NO_DMA
 	def_bool y
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 mainmenu "Linux Kernel Configuration"
 
 config S390
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 2283933..4599fa0 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -6,4 +6,12 @@
 
 source "lib/Kconfig.debug"
 
+config DEBUG_PAGEALLOC
+	bool "Debug page memory allocations"
+	depends on DEBUG_KERNEL
+	help
+	  Unmap pages from the kernel linear mapping after free_pages().
+	  This results in a slowdown, but helps to find certain types of
+	  memory corruptions.
+
 endmenu
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 6ee1bed..062c3d4 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1698,14 +1698,6 @@
 	llgfr	%r4,%r4			# compat_size_t
 	jg	compat_sys_signalfd
 
-	.globl	compat_sys_timerfd_wrapper
-compat_sys_timerfd_wrapper:
-	lgfr	%r2,%r2			# int
-	lgfr	%r3,%r3			# int
-	lgfr	%r4,%r4			# int
-	llgtr	%r5,%r5			# struct compat_itimerspec *
-	jg	compat_sys_timerfd
-
 	.globl	sys_eventfd_wrapper
 sys_eventfd_wrapper:
 	llgfr	%r2,%r2			# unsigned int
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 1a6dac8..6766e37 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -11,6 +11,7 @@
 
 #include <linux/sys.h>
 #include <linux/linkage.h>
+#include <linux/init.h>
 #include <asm/cache.h>
 #include <asm/lowcore.h>
 #include <asm/errno.h>
@@ -830,9 +831,7 @@
  * Restart interruption handler, kick starter for additional CPUs
  */
 #ifdef CONFIG_SMP
-#ifndef CONFIG_HOTPLUG_CPU
-	.section .init.text,"ax"
-#endif
+	__CPUINIT
 	.globl restart_int_handler
 restart_int_handler:
 	l	%r15,__LC_SAVE_AREA+60	# load ksp
@@ -845,9 +844,7 @@
 	br	%r14			# branch to start_secondary
 restart_addr:
 	.long	start_secondary
-#ifndef CONFIG_HOTPLUG_CPU
 	.previous
-#endif
 #else
 /*
  * If we do not run with SMP enabled, let the new CPU crash ...
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index a3e47b8..efde6e1 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -11,6 +11,7 @@
 
 #include <linux/sys.h>
 #include <linux/linkage.h>
+#include <linux/init.h>
 #include <asm/cache.h>
 #include <asm/lowcore.h>
 #include <asm/errno.h>
@@ -801,9 +802,7 @@
  * Restart interruption handler, kick starter for additional CPUs
  */
 #ifdef CONFIG_SMP
-#ifndef CONFIG_HOTPLUG_CPU
-	.section .init.text,"ax"
-#endif
+	__CPUINIT
 	.globl restart_int_handler
 restart_int_handler:
 	lg	%r15,__LC_SAVE_AREA+120 # load ksp
@@ -814,9 +813,7 @@
 	lmg	%r6,%r15,__SF_GPRS(%r15) # load registers from clone
 	stosm	__SF_EMPTY(%r15),0x04	# now we can turn dat on
 	jg	start_secondary
-#ifndef CONFIG_HOTPLUG_CPU
 	.previous
-#endif
 #else
 /*
  * If we do not run with SMP enabled, let the new CPU crash ...
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index db28cca..60acdc2 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -439,7 +439,7 @@
 		reipl_ccw_dev(&ipl_info.data.ccw.dev_id);
 }
 
-static int ipl_init(void)
+static int __init ipl_init(void)
 {
 	int rc;
 
@@ -471,8 +471,11 @@
 	return 0;
 }
 
-static struct shutdown_action ipl_action = {SHUTDOWN_ACTION_IPL_STR, ipl_run,
-					    ipl_init};
+static struct shutdown_action __refdata ipl_action = {
+	.name	= SHUTDOWN_ACTION_IPL_STR,
+	.fn	= ipl_run,
+	.init	= ipl_init,
+};
 
 /*
  * reipl shutdown action: Reboot Linux on shutdown.
@@ -792,7 +795,7 @@
 	return 0;
 }
 
-static int reipl_init(void)
+static int __init reipl_init(void)
 {
 	int rc;
 
@@ -819,8 +822,11 @@
 	return 0;
 }
 
-static struct shutdown_action reipl_action = {SHUTDOWN_ACTION_REIPL_STR,
-					      reipl_run, reipl_init};
+static struct shutdown_action __refdata reipl_action = {
+	.name	= SHUTDOWN_ACTION_REIPL_STR,
+	.fn	= reipl_run,
+	.init	= reipl_init,
+};
 
 /*
  * dump shutdown action: Dump Linux on shutdown.
@@ -998,7 +1004,7 @@
 	return 0;
 }
 
-static int dump_init(void)
+static int __init dump_init(void)
 {
 	int rc;
 
@@ -1020,8 +1026,11 @@
 	return 0;
 }
 
-static struct shutdown_action dump_action = {SHUTDOWN_ACTION_DUMP_STR,
-					     dump_run, dump_init};
+static struct shutdown_action __refdata dump_action = {
+	.name	= SHUTDOWN_ACTION_DUMP_STR,
+	.fn	= dump_run,
+	.init	= dump_init,
+};
 
 /*
  * vmcmd shutdown action: Trigger vm command on shutdown.
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 766c783..29ae165 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -77,7 +77,7 @@
 unsigned long elf_hwcap = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 
-struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
+struct mem_chunk __meminitdata memory_chunk[MEMORY_CHUNKS];
 volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
 static unsigned long __initdata memory_end;
 
@@ -145,7 +145,7 @@
 
 static int __init conmode_setup(char *str)
 {
-#if defined(CONFIG_SCLP_CONSOLE)
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 	if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
                 SET_CONSOLE_SCLP;
 #endif
@@ -183,7 +183,7 @@
 		 */
 		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
 		if (ptr == NULL) {
-#if defined(CONFIG_SCLP_CONSOLE)
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 			SET_CONSOLE_SCLP;
 #endif
 			return;
@@ -193,7 +193,7 @@
 			SET_CONSOLE_3270;
 #elif defined(CONFIG_TN3215_CONSOLE)
 			SET_CONSOLE_3215;
-#elif defined(CONFIG_SCLP_CONSOLE)
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 			SET_CONSOLE_SCLP;
 #endif
 		} else if (strncmp(ptr + 8, "3215", 4) == 0) {
@@ -201,7 +201,7 @@
 			SET_CONSOLE_3215;
 #elif defined(CONFIG_TN3270_CONSOLE)
 			SET_CONSOLE_3270;
-#elif defined(CONFIG_SCLP_CONSOLE)
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 			SET_CONSOLE_SCLP;
 #endif
 		}
@@ -212,7 +212,7 @@
 		SET_CONSOLE_3270;
 #endif
 	} else {
-#if defined(CONFIG_SCLP_CONSOLE)
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 		SET_CONSOLE_SCLP;
 #endif
 	}
@@ -528,7 +528,7 @@
 	memory_size = 0;
 	memory_end &= PAGE_MASK;
 
-	max_mem = memory_end ? min(VMALLOC_START, memory_end) : VMALLOC_START;
+	max_mem = memory_end ? min(VMEM_MAX_PHYS, memory_end) : VMEM_MAX_PHYS;
 	memory_end = min(max_mem, memory_end);
 
 	/*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index aa37fa1..8506065 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -225,12 +225,11 @@
  * You must not call this function with disabled interrupts or from a
  * hardware interrupt handler or from a bottom half handler.
  */
-int
-smp_call_function_mask(cpumask_t mask,
-			void (*func)(void *), void *info,
-			int wait)
+int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
+			   int wait)
 {
 	preempt_disable();
+	cpu_clear(smp_processor_id(), mask);
 	__smp_call_function_map(func, info, 0, wait, mask);
 	preempt_enable();
 	return 0;
@@ -1008,7 +1007,7 @@
 	.notifier_call = smp_cpu_notify,
 };
 
-static int smp_add_present_cpu(int cpu)
+static int __devinit smp_add_present_cpu(int cpu)
 {
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
 	struct sys_device *s = &c->sysdev;
@@ -1036,8 +1035,8 @@
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static ssize_t rescan_store(struct sys_device *dev, const char *buf,
-			    size_t count)
+static ssize_t __ref rescan_store(struct sys_device *dev,
+				  const char *buf, size_t count)
 {
 	cpumask_t newcpus;
 	int cpu;
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index da69247..85e46a5 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -14,7 +14,8 @@
 static unsigned long save_context_stack(struct stack_trace *trace,
 					unsigned long sp,
 					unsigned long low,
-					unsigned long high)
+					unsigned long high,
+					int savesched)
 {
 	struct stack_frame *sf;
 	struct pt_regs *regs;
@@ -47,10 +48,12 @@
 			return sp;
 		regs = (struct pt_regs *)sp;
 		addr = regs->psw.addr & PSW_ADDR_INSN;
-		if (!trace->skip)
-			trace->entries[trace->nr_entries++] = addr;
-		else
-			trace->skip--;
+		if (savesched || !in_sched_functions(addr)) {
+			if (!trace->skip)
+				trace->entries[trace->nr_entries++] = addr;
+			else
+				trace->skip--;
+		}
 		if (trace->nr_entries >= trace->max_entries)
 			return sp;
 		low = sp;
@@ -66,15 +69,27 @@
 	orig_sp = sp & PSW_ADDR_INSN;
 	new_sp = save_context_stack(trace, orig_sp,
 				    S390_lowcore.panic_stack - PAGE_SIZE,
-				    S390_lowcore.panic_stack);
+				    S390_lowcore.panic_stack, 1);
 	if (new_sp != orig_sp)
 		return;
 	new_sp = save_context_stack(trace, new_sp,
 				    S390_lowcore.async_stack - ASYNC_SIZE,
-				    S390_lowcore.async_stack);
+				    S390_lowcore.async_stack, 1);
 	if (new_sp != orig_sp)
 		return;
 	save_context_stack(trace, new_sp,
 			   S390_lowcore.thread_info,
-			   S390_lowcore.thread_info + THREAD_SIZE);
+			   S390_lowcore.thread_info + THREAD_SIZE, 1);
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	unsigned long sp, low, high;
+
+	sp = tsk->thread.ksp & PSW_ADDR_INSN;
+	low = (unsigned long) task_stack_page(tsk);
+	high = (unsigned long) task_pt_regs(tsk);
+	save_context_stack(trace, sp, low, high, 0);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9e26ed9..25eac78 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -325,5 +325,5 @@
 SYSCALL(s390_fallocate,sys_fallocate,sys_fallocate_wrapper)
 SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper)	/* 315 */
 SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd_wrapper)
-SYSCALL(sys_timerfd,sys_timerfd,compat_sys_timerfd_wrapper)
+NI_SYSCALL						/* 317 old sys_timer_fd */
 SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 52b8342..1a2fdb6 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -271,7 +271,10 @@
 	printk("PREEMPT ");
 #endif
 #ifdef CONFIG_SMP
-	printk("SMP");
+	printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
 	notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV);
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 7d43c3c..b460715 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -35,7 +35,7 @@
 		KPROBES_TEXT
 		*(.fixup)
 		*(.gnu.warning)
-	} = 0x0700
+	} :text = 0x0700
 
 	_etext = .;		/* End of text section */
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index b234bb4..983ec6e 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -167,6 +167,33 @@
 	       PFN_ALIGN((unsigned long)&_eshared) - 1);
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long address;
+	int i;
+
+	for (i = 0; i < numpages; i++) {
+		address = page_to_phys(page + i);
+		pgd = pgd_offset_k(address);
+		pud = pud_offset(pgd, address);
+		pmd = pmd_offset(pud, address);
+		pte = pte_offset_kernel(pmd, address);
+		if (!enable) {
+			ptep_invalidate(address, pte);
+			continue;
+		}
+		*pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
+		/* Flush cpu write queue. */
+		mb();
+	}
+}
+#endif
+
 void free_initmem(void)
 {
         unsigned long addr;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 79d13a1..7c1287c 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -62,7 +62,7 @@
 	}
 }
 
-static void __init_refok *vmem_alloc_pages(unsigned int order)
+static void __ref *vmem_alloc_pages(unsigned int order)
 {
 	if (slab_is_available())
 		return (void *)__get_free_pages(GFP_KERNEL, order);
@@ -250,7 +250,7 @@
 {
 	struct memory_segment *tmp;
 
-	if (seg->start + seg->size >= VMALLOC_START ||
+	if (seg->start + seg->size >= VMEM_MAX_PHYS ||
 	    seg->start + seg->size < seg->start)
 		return -ERANGE;
 
@@ -360,7 +360,6 @@
 {
 	int i;
 
-	BUILD_BUG_ON((unsigned long)VMEM_MAP + VMEM_MAP_SIZE > VMEM_MAP_MAX);
 	NODE_DATA(0)->node_mem_map = VMEM_MAP;
 	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
 		vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
index 5572284..ee010f4 100644
--- a/arch/sparc/kernel/systbls.S
+++ b/arch/sparc/kernel/systbls.S
@@ -79,7 +79,7 @@
 /*295*/	.long sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
 /*300*/	.long sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy
 /*305*/	.long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
-/*310*/	.long sys_utimensat, sys_signalfd, sys_timerfd, sys_eventfd, sys_fallocate
+/*310*/	.long sys_utimensat, sys_signalfd, sys_ni_syscall, sys_eventfd, sys_fallocate
 
 #ifdef CONFIG_SUNOS_EMUL
 	/* Now the SunOS syscall table. */
diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c
index 070a484..4b9115a 100644
--- a/arch/sparc64/kernel/iommu.c
+++ b/arch/sparc64/kernel/iommu.c
@@ -580,7 +580,7 @@
 
 	/* Step 1: Prepare scatter list. */
 
-	npages = prepare_sg(sglist, nelems);
+	npages = prepare_sg(dev, sglist, nelems);
 
 	/* Step 2: Allocate a cluster and context, if necessary. */
 
diff --git a/arch/sparc64/kernel/iommu_common.c b/arch/sparc64/kernel/iommu_common.c
index efd5dff..72a4acf 100644
--- a/arch/sparc64/kernel/iommu_common.c
+++ b/arch/sparc64/kernel/iommu_common.c
@@ -4,6 +4,7 @@
  * Copyright (C) 1999 David S. Miller (davem@redhat.com)
  */
 
+#include <linux/dma-mapping.h>
 #include "iommu_common.h"
 
 /* You are _strongly_ advised to enable the following debugging code
@@ -201,21 +202,24 @@
 }
 #endif
 
-unsigned long prepare_sg(struct scatterlist *sg, int nents)
+unsigned long prepare_sg(struct device *dev, struct scatterlist *sg, int nents)
 {
 	struct scatterlist *dma_sg = sg;
 	unsigned long prev;
 	u32 dent_addr, dent_len;
+	unsigned int max_seg_size;
 
 	prev  = (unsigned long) sg_virt(sg);
 	prev += (unsigned long) (dent_len = sg->length);
 	dent_addr = (u32) ((unsigned long)(sg_virt(sg)) & (IO_PAGE_SIZE - 1UL));
+	max_seg_size = dma_get_max_seg_size(dev);
 	while (--nents) {
 		unsigned long addr;
 
 		sg = sg_next(sg);
 		addr = (unsigned long) sg_virt(sg);
-		if (! VCONTIG(prev, addr)) {
+		if (! VCONTIG(prev, addr) ||
+			dent_len + sg->length > max_seg_size) {
 			dma_sg->dma_address = dent_addr;
 			dma_sg->dma_length = dent_len;
 			dma_sg = sg_next(dma_sg);
diff --git a/arch/sparc64/kernel/iommu_common.h b/arch/sparc64/kernel/iommu_common.h
index 75b5a58..a90d046 100644
--- a/arch/sparc64/kernel/iommu_common.h
+++ b/arch/sparc64/kernel/iommu_common.h
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
+#include <linux/device.h>
 
 #include <asm/iommu.h>
 #include <asm/scatterlist.h>
@@ -46,4 +47,4 @@
 #define VCONTIG(__X, __Y)	(((__X) == (__Y)) || \
 				 (((__X) | (__Y)) << (64UL - PAGE_SHIFT)) == 0UL)
 
-extern unsigned long prepare_sg(struct scatterlist *sg, int nents);
+extern unsigned long prepare_sg(struct device *dev, struct scatterlist *sg, int nents);
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index 1aa8e04..5ea2eab 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -490,7 +490,7 @@
 		goto bad;
 
 	/* Step 1: Prepare scatter list. */
-	npages = prepare_sg(sglist, nelems);
+	npages = prepare_sg(dev, sglist, nelems);
 
 	/* Step 2: Allocate a cluster and context, if necessary. */
 	spin_lock_irqsave(&iommu->lock, flags);
@@ -625,8 +625,8 @@
 	/* XXX register error interrupt handlers XXX */
 }
 
-static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
-					    struct iommu *iommu)
+static unsigned long __init probe_existing_entries(struct pci_pbm_info *pbm,
+						   struct iommu *iommu)
 {
 	struct iommu_arena *arena = &iommu->arena;
 	unsigned long i, cnt = 0;
@@ -653,7 +653,7 @@
 	return cnt;
 }
 
-static void pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
+static void __init pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
 {
 	struct iommu *iommu = pbm->iommu;
 	struct property *prop;
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index 06d1090..b805890 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -80,7 +80,7 @@
 	.word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare
 /*300*/	.word compat_sys_set_robust_list, compat_sys_get_robust_list, compat_sys_migrate_pages, compat_sys_mbind, compat_sys_get_mempolicy
 	.word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
-/*310*/	.word compat_sys_utimensat, compat_sys_signalfd, compat_sys_timerfd, sys_eventfd, compat_sys_fallocate
+/*310*/	.word compat_sys_utimensat, compat_sys_signalfd, sys_ni_syscall, sys_eventfd, compat_sys_fallocate
 
 #endif /* CONFIG_COMPAT */
 
@@ -152,7 +152,7 @@
 	.word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
 /*300*/	.word sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy
 	.word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
-/*310*/	.word sys_utimensat, sys_signalfd, sys_timerfd, sys_eventfd, sys_fallocate
+/*310*/	.word sys_utimensat, sys_signalfd, sys_ni_syscall, sys_eventfd, sys_fallocate
 
 #if defined(CONFIG_SUNOS_EMUL) || defined(CONFIG_SOLARIS_EMUL) || \
     defined(CONFIG_SOLARIS_EMUL_MODULE)
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 55945db..99e51d0 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -68,6 +68,10 @@
 	bool
 	default y
 
+config HZ
+	int
+	default 100
+
 menu "UML-specific options"
 
 config STATIC_LINK
@@ -95,23 +99,6 @@
 	default y
 	depends on !LD_SCRIPT_STATIC
 
-config NET
-	bool "Networking support"
-	help
-	  Unless you really know what you are doing, you should say Y here.
-	  The reason is that some programs need kernel networking support even
-	  when running on a stand-alone machine that isn't connected to any
-	  other computer. If you are upgrading from an older kernel, you
-	  should consider updating your networking tools too because changes
-	  in the kernel and the tools often go hand in hand. The tools are
-	  contained in the package net-tools, the location and version number
-	  of which are given in <file:Documentation/Changes>.
-
-	  For a general introduction to Linux networking, it is highly
-	  recommended to read the NET-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>.
-
-
 source "fs/Kconfig.binfmt"
 
 config HOSTFS
@@ -145,7 +132,7 @@
 	  by removing or changing anything in /proc which gives away the
 	  identity of a UML.
 
-	  See <http://user-mode-linux.sf.net/hppfs.html> for more information.
+	  See <http://user-mode-linux.sf.net/old/hppfs.html> for more information.
 
 	  You only need this if you are setting up a UML honeypot.  Otherwise,
 	  it is safe to say 'N' here.
@@ -189,8 +176,7 @@
 config SMP
 	bool "Symmetric multi-processing support (EXPERIMENTAL)"
 	default n
-	#SMP_BROKEN is for x86_64.
-	depends on EXPERIMENTAL && (!SMP_BROKEN || (BROKEN && SMP_BROKEN))
+	depends on BROKEN
 	help
 	  This option enables UML SMP support.
 	  It is NOT related to having a real SMP box. Not directly, at least.
diff --git a/arch/um/Kconfig.char b/arch/um/Kconfig.char
index 9a78d35..3a4b396 100644
--- a/arch/um/Kconfig.char
+++ b/arch/um/Kconfig.char
@@ -18,7 +18,7 @@
           lines on the UML that are usually made to show up on the host as
           ttys or ptys.
 
-          See <http://user-mode-linux.sourceforge.net/input.html> for more
+          See <http://user-mode-linux.sourceforge.net/old/input.html> for more
           information and command line examples of how to use this facility.
 
           Unless you have a specific reason for disabling this, say Y.
diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug
index 1f6462f..8fce5e5 100644
--- a/arch/um/Kconfig.debug
+++ b/arch/um/Kconfig.debug
@@ -4,12 +4,12 @@
 
 config GPROF
 	bool "Enable gprof support"
-	depends on DEBUG_INFO
+	depends on DEBUG_INFO && FRAME_POINTER
 	help
 	  This allows profiling of a User-Mode Linux kernel with the gprof
 	  utility.
 
-	  See <http://user-mode-linux.sourceforge.net/gprof.html> for more
+	  See <http://user-mode-linux.sourceforge.net/old/gprof.html> for more
 	  details.
 
 	  If you're involved in UML kernel development and want to use gprof,
@@ -22,7 +22,7 @@
 	  This option allows developers to retrieve coverage data from a UML
 	  session.
 
-	  See <http://user-mode-linux.sourceforge.net/gprof.html> for more
+	  See <http://user-mode-linux.sourceforge.net/old/gprof.html> for more
 	  details.
 
 	  If you're involved in UML kernel development and want to use gcov,
diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net
index 66e5002..9e9a4aa 100644
--- a/arch/um/Kconfig.net
+++ b/arch/um/Kconfig.net
@@ -14,7 +14,7 @@
 
         For more information, including explanations of the networking and
         sample configurations, see
-        <http://user-mode-linux.sourceforge.net/networking.html>.
+        <http://user-mode-linux.sourceforge.net/old/networking.html>.
 
         If you'd like to be able to enable networking in the User-Mode
         linux environment, say Y; otherwise say N.  Note that you must
@@ -38,7 +38,7 @@
         CONFIG_NETLINK_DEV configured as Y or M.
 
         For more information, see
-        <http://user-mode-linux.sourceforge.net/networking.html>  That site
+        <http://user-mode-linux.sourceforge.net/old/networking.html>  That site
         has examples of the UML command line to use to enable Ethertap
         networking.
 
@@ -72,7 +72,7 @@
         To use this, your host must support slip devices.
 
         For more information, see
-        <http://user-mode-linux.sourceforge.net/networking.html>.  That site
+        <http://user-mode-linux.sourceforge.net/old/networking.html>.
         has examples of the UML command line to use to enable slip
         networking, and details of a few quirks with it.
 
@@ -96,7 +96,7 @@
         networking daemon on the host.
 
         For more information, see
-        <http://user-mode-linux.sourceforge.net/networking.html>  That site
+        <http://user-mode-linux.sourceforge.net/old/networking.html>  That site
         has examples of the UML command line to use to enable Daemon
         networking.
 
@@ -144,7 +144,7 @@
         To use this, your host kernel(s) must support IP Multicasting.
 
         For more information, see
-        <http://user-mode-linux.sourceforge.net/networking.html>  That site
+        <http://user-mode-linux.sourceforge.net/old/networking.html>  That site
         has examples of the UML command line to use to enable Multicast
         networking, and notes about the security of this approach.
 
@@ -165,7 +165,7 @@
 	installed in order to build the pcap transport into UML.
 
         For more information, see
-        <http://user-mode-linux.sourceforge.net/networking.html>  That site
+        <http://user-mode-linux.sourceforge.net/old/networking.html>  That site
         has examples of the UML command line to use to enable this option.
 
 	If you intend to use UML as a network monitor for the host, say
diff --git a/arch/um/Makefile b/arch/um/Makefile
index ba6813a..cb4af9b 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -49,7 +49,7 @@
 #
 # These apply to USER_CFLAGS to.
 
-KBUILD_CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\"	\
+KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
 	$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap	\
 	-Din6addr_loopback=kernel_in6addr_loopback \
 	-Din6addr_any=kernel_in6addr_any
@@ -58,7 +58,7 @@
 
 USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\
 	$(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \
-	-D_FILE_OFFSET_BITS=64
+	$(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64
 
 include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH)
 
@@ -130,7 +130,9 @@
 # The wrappers will select whether using "malloc" or the kernel allocator.
 LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
 
-CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS)
+LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt))
+
+CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE)
 define cmd_vmlinux__
 	$(CC) $(CFLAGS_vmlinux) -o $@ \
 	-Wl,-T,$(vmlinux-lds) $(vmlinux-init) \
@@ -158,7 +160,7 @@
 	$(Q)mkdir -p $(objtree)/include/asm-um
 	$(Q)ln -fsn $(srctree)/include/asm-um/$(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $@
 else
-	$(Q)cd $(TOPDIR)/$(dir $@) ; \
+	$(Q)cd $(srctree)/$(dir $@) ; \
 	ln -sf $(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $(notdir $@)
 endif
 
@@ -168,7 +170,7 @@
 	$(Q)mkdir -p $(objtree)/include/asm-um
 	$(Q)ln -fsn $(srctree)/include/asm-$(HEADER_ARCH) include/asm-um/arch
 else
-	$(Q)cd $(TOPDIR)/include/asm-um && ln -fsn ../asm-$(HEADER_ARCH) arch
+	$(Q)cd $(srctree)/include/asm-um && ln -fsn ../asm-$(HEADER_ARCH) arch
 endif
 
 $(objtree)/$(ARCH_DIR)/include:
diff --git a/arch/um/Makefile-tt b/arch/um/Makefile-tt
deleted file mode 100644
index 03f7b10..0000000
--- a/arch/um/Makefile-tt
+++ /dev/null
@@ -1,5 +0,0 @@
-# 
-# Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
-# Licensed under the GPL
-#
-
diff --git a/arch/um/defconfig b/arch/um/defconfig
index f609ede..86db286 100644
--- a/arch/um/defconfig
+++ b/arch/um/defconfig
@@ -77,7 +77,7 @@
 CONFIG_NET=y
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=m
-# CONFIG_HOSTFS is not set
+CONFIG_HOSTFS=y
 # CONFIG_HPPFS is not set
 CONFIG_MCONSOLE=y
 CONFIG_MAGIC_SYSRQ=y
@@ -188,7 +188,7 @@
 CONFIG_SSL_CHAN="pts"
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
+CONFIG_LEGACY_PTY_COUNT=32
 # CONFIG_WATCHDOG is not set
 CONFIG_UML_SOUND=m
 CONFIG_SOUND=m
@@ -508,7 +508,7 @@
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_SCHEDSTATS is not set
-CONFIG_DEBUG_SLAB=y
+# CONFIG_DEBUG_SLAB is not set
 # CONFIG_DEBUG_SLAB_LEAK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK is not set
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 83bf15a..2c898c4 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -8,6 +8,7 @@
 #include "chan_kern.h"
 #include "irq_kern.h"
 #include "irq_user.h"
+#include "kern_util.h"
 #include "os.h"
 
 #define LINE_BUFSIZE 4096
@@ -48,7 +49,7 @@
 	n = line->head - line->tail;
 
 	if (n <= 0)
-		n = LINE_BUFSIZE + n; /* The other case */
+		n += LINE_BUFSIZE; /* The other case */
 	return n - 1;
 }
 
@@ -58,17 +59,10 @@
 	unsigned long flags;
 	int room;
 
-	if (tty->stopped)
-		return 0;
-
 	spin_lock_irqsave(&line->lock, flags);
 	room = write_room(line);
 	spin_unlock_irqrestore(&line->lock, flags);
 
-	/*XXX: Warning to remove */
-	if (0 == room)
-		printk(KERN_DEBUG "%s: %s: no room left in buffer\n",
-		       __FUNCTION__,tty->name);
 	return room;
 }
 
@@ -79,8 +73,7 @@
 	int ret;
 
 	spin_lock_irqsave(&line->lock, flags);
-
-	/*write_room subtracts 1 for the needed NULL, so we readd it.*/
+	/* write_room subtracts 1 for the needed NULL, so we readd it.*/
 	ret = LINE_BUFSIZE - (write_room(line) + 1);
 	spin_unlock_irqrestore(&line->lock, flags);
 
@@ -184,10 +177,6 @@
 	unsigned long flags;
 	int err;
 
-	/*XXX: copied from line_write, verify if it is correct!*/
-	if (tty->stopped)
-		return;
-
 	spin_lock_irqsave(&line->lock, flags);
 	err = flush_buffer(line);
 	spin_unlock_irqrestore(&line->lock, flags);
@@ -213,9 +202,6 @@
 	unsigned long flags;
 	int n, ret = 0;
 
-	if (tty->stopped)
-		return 0;
-
 	spin_lock_irqsave(&line->lock, flags);
 	if (line->head != line->tail)
 		ret = buffer_data(line, buf, len);
@@ -788,9 +774,11 @@
 	tty = winch->tty;
 	if (tty != NULL) {
 		line = tty->driver_data;
-		chan_window_size(&line->chan_list, &tty->winsize.ws_row,
-				 &tty->winsize.ws_col);
-		kill_pgrp(tty->pgrp, SIGWINCH, 1);
+		if (line != NULL) {
+			chan_window_size(&line->chan_list, &tty->winsize.ws_row,
+					 &tty->winsize.ws_col);
+			kill_pgrp(tty->pgrp, SIGWINCH, 1);
+		}
 	}
  out:
 	if (winch->fd != -1)
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 0f3c7d1..ebb265c 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -1,23 +1,25 @@
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2001 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/console.h"
-#include "linux/ctype.h"
-#include "linux/interrupt.h"
-#include "linux/list.h"
-#include "linux/mm.h"
-#include "linux/module.h"
-#include "linux/notifier.h"
-#include "linux/reboot.h"
-#include "linux/proc_fs.h"
-#include "linux/slab.h"
-#include "linux/syscalls.h"
-#include "linux/utsname.h"
-#include "linux/workqueue.h"
-#include "asm/uaccess.h"
+#include <linux/console.h>
+#include <linux/ctype.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/utsname.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <asm/uaccess.h>
+
 #include "init.h"
 #include "irq_kern.h"
 #include "irq_user.h"
@@ -305,7 +307,9 @@
 	deactivate_fd(req->originating_fd, MCONSOLE_IRQ);
 	os_set_fd_block(req->originating_fd, 1);
 	mconsole_reply(req, "stopped", 0, 0);
-	while (mconsole_get_request(req->originating_fd, req)) {
+	for (;;) {
+		if (!mconsole_get_request(req->originating_fd, req))
+			continue;
 		if (req->cmd->handler == mconsole_go)
 			break;
 		if (req->cmd->handler == mconsole_stop) {
@@ -358,7 +362,7 @@
 	void *pages[UNPLUGGED_PER_PAGE];
 };
 
-static DECLARE_MUTEX(plug_mem_mutex);
+static DEFINE_MUTEX(plug_mem_mutex);
 static unsigned long long unplugged_pages_count = 0;
 static LIST_HEAD(unplugged_pages);
 static int unplug_index = UNPLUGGED_PER_PAGE;
@@ -394,7 +398,7 @@
 
 	diff /= PAGE_SIZE;
 
-	down(&plug_mem_mutex);
+	mutex_lock(&plug_mem_mutex);
 	for (i = 0; i < diff; i++) {
 		struct unplugged_pages *unplugged;
 		void *addr;
@@ -451,7 +455,7 @@
 
 	err = 0;
 out_unlock:
-	up(&plug_mem_mutex);
+	mutex_unlock(&plug_mem_mutex);
 out:
 	return err;
 }
@@ -741,7 +745,6 @@
 {
 	char *ptr = req->request.data;
 	int pid_requested= -1;
-	struct task_struct *from = NULL;
 	struct task_struct *to = NULL;
 
 	/*
@@ -763,9 +766,7 @@
 		return;
 	}
 
-	from = current;
-
-	to = find_task_by_pid(pid_requested);
+	to = find_task_by_pid_ns(pid_requested, &init_pid_ns);
 	if ((to == NULL) || (pid_requested == 0)) {
 		mconsole_reply(req, "Couldn't find that pid", 1, 0);
 		return;
@@ -795,6 +796,8 @@
 		printk(KERN_ERR "Failed to initialize management console\n");
 		return 1;
 	}
+	if (os_set_fd_block(sock, 0))
+		goto out;
 
 	register_reboot_notifier(&reboot_notifier);
 
@@ -803,7 +806,7 @@
 			     "mconsole", (void *)sock);
 	if (err) {
 		printk(KERN_ERR "Failed to get IRQ for management console\n");
-		return 1;
+		goto out;
 	}
 
 	if (notify_socket != NULL) {
@@ -819,6 +822,10 @@
 	printk(KERN_INFO "mconsole (version %d) initialized on %s\n",
 	       MCONSOLE_VERSION, mconsole_socket_name);
 	return 0;
+
+ out:
+	os_close_file(sock);
+	return 1;
 }
 
 __initcall(mconsole_init);
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index 430c024..13af2f0 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -83,9 +83,8 @@
 	int len;
 
 	req->originlen = sizeof(req->origin);
-	req->len = recvfrom(fd, &req->request, sizeof(req->request),
-			    MSG_DONTWAIT, (struct sockaddr *) req->origin,
-			    &req->originlen);
+	req->len = recvfrom(fd, &req->request, sizeof(req->request), 0,
+			    (struct sockaddr *) req->origin, &req->originlen);
 	if (req->len < 0)
 		return 0;
 
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 3c6c44c..1e8f41a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -318,7 +318,7 @@
 	if (str == NULL)
 		goto random;
 
-	for (i = 0;i < 6; i++) {
+	for (i = 0; i < 6; i++) {
 		addr[i] = simple_strtoul(str, &end, 16);
 		if ((end == str) ||
 		   ((*end != ':') && (*end != ',') && (*end != '\0'))) {
@@ -343,14 +343,13 @@
 	}
 	if (!is_local_ether_addr(addr)) {
 		printk(KERN_WARNING
-		       "Warning: attempt to assign a globally valid ethernet "
+		       "Warning: Assigning a globally valid ethernet "
 		       "address to a device\n");
-		printk(KERN_WARNING "You should better enable the 2nd "
-		       "rightmost bit in the first byte of the MAC,\n");
+		printk(KERN_WARNING "You should set the 2nd rightmost bit in "
+		       "the first byte of the MAC,\n");
 		printk(KERN_WARNING "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n",
 		       addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4],
 		       addr[5]);
-		goto random;
 	}
 	return;
 
@@ -368,7 +367,6 @@
 		.name  = DRIVER_NAME,
 	},
 };
-static int driver_registered;
 
 static void net_device_release(struct device *dev)
 {
@@ -383,6 +381,12 @@
 	free_netdev(netdev);
 }
 
+/*
+ * Ensures that platform_driver_register is called only once by
+ * eth_configure.  Will be set in an initcall.
+ */
+static int driver_registered;
+
 static void eth_configure(int n, void *init, char *mac,
 			  struct transport *transport)
 {
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
index 29185ca..abf2653 100644
--- a/arch/um/drivers/net_user.c
+++ b/arch/um/drivers/net_user.c
@@ -201,7 +201,7 @@
 	close(fds[1]);
 
 	if (pid > 0)
-		helper_wait(pid, 0, "change_tramp");
+		helper_wait(pid);
 	return pid;
 }
 
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 330543b..1993008 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -6,6 +6,7 @@
 #include "linux/completion.h"
 #include "linux/interrupt.h"
 #include "linux/list.h"
+#include "linux/mutex.h"
 #include "asm/atomic.h"
 #include "init.h"
 #include "irq_kern.h"
@@ -120,7 +121,7 @@
 	return 0;
 }
 
-static DECLARE_MUTEX(ports_sem);
+static DEFINE_MUTEX(ports_mutex);
 static LIST_HEAD(ports);
 
 static void port_work_proc(struct work_struct *unused)
@@ -161,7 +162,7 @@
 	struct port_dev *dev = NULL;
 	int fd;
 
-	down(&ports_sem);
+	mutex_lock(&ports_mutex);
 	list_for_each(ele, &ports) {
 		port = list_entry(ele, struct port_list, list);
 		if (port->port == port_num)
@@ -216,7 +217,7 @@
  out_free:
 	kfree(port);
  out:
-	up(&ports_sem);
+	mutex_unlock(&ports_mutex);
 	return dev;
 }
 
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index e942e83..71f0959 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -5,6 +5,7 @@
  * This software may be used and distributed according to the terms
  * of the GNU General Public License, incorporated herein by reference.
  */
+#include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/miscdevice.h>
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
index b8711e5..8b80505 100644
--- a/arch/um/drivers/slip_user.c
+++ b/arch/um/drivers/slip_user.c
@@ -109,7 +109,7 @@
 	read_output(fds[0], output, output_len);
 	printk("%s", output);
 
-	err = helper_wait(pid, 0, argv[0]);
+	err = helper_wait(pid);
 	close(fds[0]);
 
 out_free:
diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c
index 89c1be2..a0ada8f 100644
--- a/arch/um/drivers/slirp_user.c
+++ b/arch/um/drivers/slirp_user.c
@@ -98,7 +98,7 @@
 		       "(%d)\n", pri->pid, errno);
 	}
 #endif
-	err = helper_wait(pri->pid, 1, "slirp_close");
+	err = helper_wait(pri->pid);
 	if (err < 0)
 		return;
 
diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
index 875d60d..f1786e6 100644
--- a/arch/um/drivers/ssl.c
+++ b/arch/um/drivers/ssl.c
@@ -15,7 +15,6 @@
 #include "line.h"
 #include "ssl.h"
 #include "chan_kern.h"
-#include "kern_util.h"
 #include "kern.h"
 #include "init.h"
 #include "irq_user.h"
diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index 656036e..cec0c33 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -22,7 +22,6 @@
 #include "stdio_console.h"
 #include "line.h"
 #include "chan_kern.h"
-#include "kern_util.h"
 #include "irq_user.h"
 #include "mconsole_kern.h"
 #include "init.h"
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 99f9f96..be3a279 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -49,6 +49,7 @@
 #include "irq_user.h"
 #include "irq_kern.h"
 #include "ubd_user.h"
+#include "kern_util.h"
 #include "os.h"
 #include "mem.h"
 #include "mem_kern.h"
@@ -229,7 +230,7 @@
 	return len;
 }
 
-static void make_ide_entries(char *dev_name)
+static void make_ide_entries(const char *dev_name)
 {
 	struct proc_dir_entry *dir, *ent;
 	char name[64];
@@ -244,7 +245,7 @@
 	ent->data = NULL;
 	ent->read_proc = proc_ide_read_media;
 	ent->write_proc = NULL;
-	sprintf(name,"ide0/%s", dev_name);
+	snprintf(name, sizeof(name), "ide0/%s", dev_name);
 	proc_symlink(dev_name, proc_ide_root, name);
 }
 
@@ -437,7 +438,10 @@
 "    machine by running 'dd' on the device. <n> must be in the range\n"
 "    0 to 7. Appending an 'r' to the number will cause that device\n"
 "    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
-"    an 's' will cause data to be written to disk on the host immediately.\n\n"
+"    an 's' will cause data to be written to disk on the host immediately.\n"
+"    'c' will cause the device to be treated as being shared between multiple\n"
+"    UMLs and file locking will be turned off - this is appropriate for a\n"
+"    cluster filesystem and inappropriate at almost all other times.\n\n"
 );
 
 static int udb_setup(char *str)
@@ -456,20 +460,6 @@
 "    in the boot output.\n\n"
 );
 
-static int fakehd_set = 0;
-static int fakehd(char *str)
-{
-	printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
-	fakehd_set = 1;
-	return 1;
-}
-
-__setup("fakehd", fakehd);
-__uml_help(fakehd,
-"fakehd\n"
-"    Change the ubd device name to \"hd\".\n\n"
-);
-
 static void do_ubd_request(struct request_queue * q);
 
 /* Only changed by ubd_init, which is an initcall. */
@@ -718,8 +708,10 @@
 		ubd_disk_register(fake_major, ubd_dev->size, n,
 				  &fake_gendisk[n]);
 
-	/* perhaps this should also be under the "if (fake_major)" above */
-	/* using the fake_disk->disk_name and also the fakehd_set name */
+	/*
+	 * Perhaps this should also be under the "if (fake_major)" above
+	 * using the fake_disk->disk_name
+	 */
 	if (fake_ide)
 		make_ide_entries(ubd_gendisk[n]->disk_name);
 
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index 48fc745..b591bb9 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -16,7 +16,6 @@
 #include <sys/mman.h>
 #include <sys/param.h>
 #include "asm/types.h"
-#include "kern_util.h"
 #include "user.h"
 #include "ubd_user.h"
 #include "os.h"
diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c
index d9941fe..56533db 100644
--- a/arch/um/drivers/vde_user.c
+++ b/arch/um/drivers/vde_user.c
@@ -80,7 +80,7 @@
 
 	vpri->args = kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL);
 	if (vpri->args == NULL) {
-		printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args"
+		printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args "
 		       "allocation failed");
 		return;
 	}
diff --git a/arch/um/include/arch.h b/arch/um/include/arch.h
index 49c601f..2de92a0 100644
--- a/arch/um/include/arch.h
+++ b/arch/um/include/arch.h
@@ -10,6 +10,6 @@
 
 extern void arch_check_bugs(void);
 extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs);
-extern int arch_handle_signal(int sig, struct uml_pt_regs *regs);
+extern void arch_examine_signal(int sig, struct uml_pt_regs *regs);
 
 #endif
diff --git a/arch/um/include/as-layout.h b/arch/um/include/as-layout.h
index a5cdf95..606bb5c 100644
--- a/arch/um/include/as-layout.h
+++ b/arch/um/include/as-layout.h
@@ -10,24 +10,32 @@
 #include "kern_constants.h"
 
 /*
- * Assembly doesn't want any casting, but C does, so define these
- * without casts here, and define new symbols with casts inside the C
- * section.
+ * Stolen from linux/const.h, which can't be directly included since
+ * this is used in userspace code, which has no access to the kernel
+ * headers.  Changed to be suitable for adding casts to the start,
+ * rather than "UL" to the end.
  */
-#define ASM_STUB_CODE (UML_CONFIG_TOP_ADDR - 2 * UM_KERN_PAGE_SIZE)
-#define ASM_STUB_DATA (UML_CONFIG_TOP_ADDR - UM_KERN_PAGE_SIZE)
-#define ASM_STUB_START ASM_STUB_CODE
 
-/*
- * This file is included by the assembly stubs, which just want the
- * definitions above.
+/* Some constant macros are used in both assembler and
+ * C code.  Therefore we cannot annotate them always with
+ * 'UL' and other type specifiers unilaterally.  We
+ * use the following macros to deal with this.
  */
+
+#ifdef __ASSEMBLY__
+#define _AC(X, Y)	(Y)
+#else
+#define __AC(X, Y)	(X (Y))
+#define _AC(X, Y)	__AC(X, Y)
+#endif
+
+#define STUB_START _AC(, 0x100000)
+#define STUB_CODE _AC((unsigned long), STUB_START)
+#define STUB_DATA _AC((unsigned long), STUB_CODE + UM_KERN_PAGE_SIZE)
+#define STUB_END _AC((unsigned long), STUB_DATA + UM_KERN_PAGE_SIZE)
+
 #ifndef __ASSEMBLY__
 
-#define STUB_CODE ((unsigned long) ASM_STUB_CODE)
-#define STUB_DATA ((unsigned long) ASM_STUB_DATA)
-#define STUB_START ((unsigned long) ASM_STUB_START)
-
 #include "sysdep/ptrace.h"
 
 struct cpu_task {
diff --git a/arch/um/include/chan_user.h b/arch/um/include/chan_user.h
index 5a2263e..9b9ced8 100644
--- a/arch/um/include/chan_user.h
+++ b/arch/um/include/chan_user.h
@@ -48,7 +48,7 @@
 #define __channel_help(fn, prefix) \
 __uml_help(fn, prefix "[0-9]*=<channel description>\n" \
 "    Attach a console or serial line to a host channel.  See\n" \
-"    http://user-mode-linux.sourceforge.net/input.html for a complete\n" \
+"    http://user-mode-linux.sourceforge.net/old/input.html for a complete\n" \
 "    description of this switch.\n\n" \
 );
 
diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h
index 0edab69..b54bd35 100644
--- a/arch/um/include/common-offsets.h
+++ b/arch/um/include/common-offsets.h
@@ -18,6 +18,7 @@
 DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE);
 DEFINE_STR(UM_KERN_INFO, KERN_INFO);
 DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG);
+DEFINE_STR(UM_KERN_CONT, KERN_CONT);
 
 DEFINE(UM_ELF_CLASS, ELF_CLASS);
 DEFINE(UM_ELFCLASS32, ELFCLASS32);
diff --git a/arch/um/include/init.h b/arch/um/include/init.h
index cebc6ca..b00a957 100644
--- a/arch/um/include/init.h
+++ b/arch/um/include/init.h
@@ -40,6 +40,20 @@
 typedef int (*initcall_t)(void);
 typedef void (*exitcall_t)(void);
 
+#ifndef __KERNEL__
+#ifndef __section
+# define __section(S) __attribute__ ((__section__(#S)))
+#endif
+
+#if __GNUC_MINOR__ >= 3
+# define __used			__attribute__((__used__))
+#else
+# define __used			__attribute__((__unused__))
+#endif
+
+#else
+#include <linux/compiler.h>
+#endif
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
 #define __init		__section(.init.text)
@@ -127,14 +141,3 @@
 #endif
 
 #endif /* _LINUX_UML_INIT_H */
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/irq_user.h b/arch/um/include/irq_user.h
index 884a9c1..e60b318 100644
--- a/arch/um/include/irq_user.h
+++ b/arch/um/include/irq_user.h
@@ -14,7 +14,6 @@
 	int fd;
 	int type;
 	int irq;
-	int pid;
 	int events;
 	int current_events;
 };
diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
index 74ce8e5..3c34122 100644
--- a/arch/um/include/kern_util.h
+++ b/arch/um/include/kern_util.h
@@ -9,107 +9,61 @@
 #include "sysdep/ptrace.h"
 #include "sysdep/faultinfo.h"
 
-typedef void (*kern_hndl)(int, struct uml_pt_regs *);
-
-struct kern_handlers {
-	kern_hndl relay_signal;
-	kern_hndl winch;
-	kern_hndl bus_handler;
-	kern_hndl page_fault;
-	kern_hndl sigio_handler;
-	kern_hndl timer_handler;
-};
-
-extern const struct kern_handlers handlinfo_kern;
+extern int uml_exitcode;
 
 extern int ncpus;
-extern char *gdb_init;
 extern int kmalloc_ok;
-extern int jail;
-extern int nsyscalls;
 
-#define UML_ROUND_DOWN(addr) ((void *)(((unsigned long) addr) & PAGE_MASK))
 #define UML_ROUND_UP(addr) \
-	UML_ROUND_DOWN(((unsigned long) addr) + PAGE_SIZE - 1)
+	((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK)
 
-extern int kernel_fork(unsigned long flags, int (*fn)(void *), void * arg);
-extern int kernel_thread_proc(void *data);
-extern void syscall_segv(int sig);
-extern int current_pid(void);
 extern unsigned long alloc_stack(int order, int atomic);
+extern void free_stack(unsigned long stack, int order);
+
 extern int do_signal(void);
-extern int is_stack_fault(unsigned long sp);
+extern void copy_sc(struct uml_pt_regs *regs, void *from);
+extern void interrupt_end(void);
+extern void relay_signal(int sig, struct uml_pt_regs *regs);
+
 extern unsigned long segv(struct faultinfo fi, unsigned long ip,
 			  int is_user, struct uml_pt_regs *regs);
 extern int handle_page_fault(unsigned long address, unsigned long ip,
 			     int is_write, int is_user, int *code_out);
-extern void syscall_ready(void);
-extern void set_tracing(void *t, int tracing);
-extern int is_tracing(void *task);
-extern int segv_syscall(void);
-extern void kern_finish_exec(void *task, int new_pid, unsigned long stack);
-extern unsigned long page_mask(void);
-extern int need_finish_fork(void);
-extern void free_stack(unsigned long stack, int order);
-extern void add_input_request(int op, void (*proc)(int), void *arg);
-extern char *current_cmd(void);
-extern void timer_handler(int sig, struct uml_pt_regs *regs);
-extern int set_signals(int enable);
-extern int pid_to_processor_id(int pid);
-extern void deliver_signals(void *t);
-extern int next_trap_index(int max);
-extern void default_idle(void);
-extern void finish_fork(void);
-extern void paging_init(void);
-extern void init_flush_vm(void);
-extern void *syscall_sp(void *t);
-extern void syscall_trace(struct uml_pt_regs *regs, int entryexit);
+
 extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs);
-extern void interrupt_end(void);
-extern void initial_thread_cb(void (*proc)(void *), void *arg);
-extern int debugger_signal(int status, int pid);
-extern void debugger_parent_signal(int status, int pid);
-extern void child_signal(int pid, int status);
-extern int init_ptrace_proxy(int idle_pid, int startup, int stop);
-extern int init_parent_proxy(int pid);
-extern int singlestepping(void *t);
-extern void check_stack_overflow(void *ptr);
-extern void relay_signal(int sig, struct uml_pt_regs *regs);
-extern int user_context(unsigned long sp);
-extern void timer_irq(struct uml_pt_regs *regs);
-extern void do_uml_exitcalls(void);
-extern int attach_debugger(int idle_pid, int pid, int stop);
-extern int config_gdb(char *str);
-extern int remove_gdb(void);
-extern char *uml_strdup(char *string);
-extern void unprotect_kernel_mem(void);
-extern void protect_kernel_mem(void);
-extern void uml_cleanup(void);
-extern void lock_signalled_task(void *t);
-extern void IPI_handler(int cpu);
-extern int jail_setup(char *line, int *add);
-extern void *get_init_task(void);
-extern int clear_user_proc(void *buf, int size);
-extern int copy_to_user_proc(void *to, void *from, int size);
-extern int copy_from_user_proc(void *to, void *from, int size);
-extern int strlen_user_proc(char *str);
-extern long execute_syscall(void *r);
 extern int smp_sigio_handler(void);
-extern void *get_current(void);
-extern struct task_struct *get_task(int pid, int require);
-extern void machine_halt(void);
+extern void initial_thread_cb(void (*proc)(void *), void *arg);
 extern int is_syscall(unsigned long addr);
+extern void timer_handler(int sig, struct uml_pt_regs *regs);
 
-extern void free_irq(unsigned int, void *);
-extern int cpu(void);
+extern void timer_handler(int sig, struct uml_pt_regs *regs);
 
-extern void time_init_kern(void);
-
-/* Are we disallowed to sleep? Used to choose between GFP_KERNEL and GFP_ATOMIC. */
-extern int __cant_sleep(void);
-extern void sigio_handler(int sig, struct uml_pt_regs *regs);
-extern void copy_sc(struct uml_pt_regs *regs, void *from);
-extern unsigned long to_irq_stack(unsigned long *mask_out);
-unsigned long from_irq_stack(int nested);
 extern int start_uml(void);
+extern void paging_init(void);
+
+extern void uml_cleanup(void);
+extern void do_uml_exitcalls(void);
+
+/*
+ * Are we disallowed to sleep? Used to choose between GFP_KERNEL and
+ * GFP_ATOMIC.
+ */
+extern int __cant_sleep(void);
+extern void *get_current(void);
+extern int copy_from_user_proc(void *to, void *from, int size);
+extern int cpu(void);
+extern char *uml_strdup(const char *string);
+
+extern unsigned long to_irq_stack(unsigned long *mask_out);
+extern unsigned long from_irq_stack(int nested);
+
+extern void syscall_trace(struct uml_pt_regs *regs, int entryexit);
+extern int singlestepping(void *t);
+
+extern void segv_handler(int sig, struct uml_pt_regs *regs);
+extern void bus_handler(int sig, struct uml_pt_regs *regs);
+extern void winch(int sig, struct uml_pt_regs *regs);
+extern void fatal_sigsegv(void) __attribute__ ((noreturn));
+
+
 #endif
diff --git a/arch/um/include/mem_user.h b/arch/um/include/mem_user.h
index a54514d..46384ac 100644
--- a/arch/um/include/mem_user.h
+++ b/arch/um/include/mem_user.h
@@ -46,9 +46,6 @@
 
 #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1))
 
-extern unsigned long host_task_size;
-extern unsigned long task_size;
-
 extern int init_mem_user(void);
 extern void setup_memory(void *entry);
 extern unsigned long find_iomem(char *driver, unsigned long *len_out);
@@ -59,9 +56,7 @@
 			  unsigned long len, unsigned long long highmem);
 extern void add_iomem(char *name, int fd, unsigned long size);
 extern unsigned long phys_offset(unsigned long phys);
-extern void unmap_physmem(void);
 extern void map_memory(unsigned long virt, unsigned long phys,
 		       unsigned long len, int r, int w, int x);
-extern unsigned long get_kmem_end(void);
 
 #endif
diff --git a/arch/um/include/misc_constants.h b/arch/um/include/misc_constants.h
deleted file mode 100644
index 989bc08..0000000
--- a/arch/um/include/misc_constants.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __MISC_CONSTANT_H_
-#define __MISC_CONSTANT_H_
-
-#include <user_constants.h>
-
-#endif
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 6f0d1c7..0b6b627 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -8,7 +8,6 @@
 
 #include <stdarg.h>
 #include "irq_user.h"
-#include "kern_util.h"
 #include "longjmp.h"
 #include "mm_id.h"
 #include "sysdep/tls.h"
@@ -128,33 +127,31 @@
 extern int os_stat_file(const char *file_name, struct uml_stat *buf);
 extern int os_stat_fd(const int fd, struct uml_stat *buf);
 extern int os_access(const char *file, int mode);
-extern int os_get_exec_close(int fd, int *close_on_exec);
 extern int os_set_exec_close(int fd);
 extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg);
 extern int os_get_ifname(int fd, char *namebuf);
 extern int os_set_slip(int fd);
-extern int os_set_owner(int fd, int pid);
 extern int os_mode_fd(int fd, int mode);
 
 extern int os_seek_file(int fd, unsigned long long offset);
-extern int os_open_file(char *file, struct openflags flags, int mode);
+extern int os_open_file(const char *file, struct openflags flags, int mode);
 extern int os_read_file(int fd, void *buf, int len);
 extern int os_write_file(int fd, const void *buf, int count);
-extern int os_file_size(char *file, unsigned long long *size_out);
-extern int os_file_modtime(char *file, unsigned long *modtime);
+extern int os_file_size(const char *file, unsigned long long *size_out);
+extern int os_file_modtime(const char *file, unsigned long *modtime);
 extern int os_pipe(int *fd, int stream, int close_on_exec);
-extern int os_set_fd_async(int fd, int owner);
+extern int os_set_fd_async(int fd);
 extern int os_clear_fd_async(int fd);
 extern int os_set_fd_block(int fd, int blocking);
 extern int os_accept_connection(int fd);
-extern int os_create_unix_socket(char *file, int len, int close_on_exec);
+extern int os_create_unix_socket(const char *file, int len, int close_on_exec);
 extern int os_shutdown_socket(int fd, int r, int w);
 extern void os_close_file(int fd);
 extern int os_rcv_fd(int fd, int *helper_pid_out);
 extern int create_unix_socket(char *file, int len, int close_on_exec);
-extern int os_connect_socket(char *name);
+extern int os_connect_socket(const char *name);
 extern int os_file_type(char *file);
-extern int os_file_mode(char *file, struct openflags *mode_out);
+extern int os_file_mode(const char *file, struct openflags *mode_out);
 extern int os_lock_file(int fd, int excl);
 extern void os_flush_stdout(void);
 extern int os_stat_filesystem(char *path, long *bsize_out,
@@ -168,14 +165,10 @@
 
 /* start_up.c */
 extern void os_early_checks(void);
-extern int can_do_skas(void);
+extern void can_do_skas(void);
 extern void os_check_bugs(void);
 extern void check_host_supports_tls(int *supports_tls, int *tls_min);
 
-/* Make sure they are clear when running in TT mode. Required by
- * SEGV_MAYBE_FIXABLE */
-#define clear_can_do_skas() do { ptrace_faultinfo = proc_mm = 0; } while (0)
-
 /* mem.c */
 extern int create_mem_file(unsigned long long len);
 
@@ -214,7 +207,7 @@
 extern int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv);
 extern int run_helper_thread(int (*proc)(void *), void *arg,
 			     unsigned int flags, unsigned long *stack_out);
-extern int helper_wait(int pid, int nohang, char *pname);
+extern int helper_wait(int pid);
 
 
 /* tls.c */
@@ -237,16 +230,12 @@
 extern int get_signals(void);
 extern int set_signals(int enable);
 
-/* trap.c */
-extern void os_fill_handlinfo(struct kern_handlers h);
-
 /* util.c */
 extern void stack_protections(unsigned long address);
 extern int raw(int fd);
 extern void setup_machinename(char *machine_out);
 extern void setup_hostinfo(char *buf, int len);
-extern int setjmp_wrapper(void (*proc)(void *, void *), ...);
-extern void os_dump_core(void);
+extern void os_dump_core(void) __attribute__ ((noreturn));
 
 /* time.c */
 extern void idle_sleep(unsigned long long nsecs);
@@ -275,11 +264,9 @@
 extern int is_skas_winch(int pid, int fd, void *data);
 extern int start_userspace(unsigned long stub_stack);
 extern int copy_context_skas0(unsigned long stack, int pid);
-extern void save_registers(int pid, struct uml_pt_regs *regs);
-extern void restore_registers(int pid, struct uml_pt_regs *regs);
 extern void userspace(struct uml_pt_regs *regs);
-extern void map_stub_pages(int fd, unsigned long code,
-			   unsigned long data, unsigned long stack);
+extern int map_stub_pages(int fd, unsigned long code, unsigned long data,
+			  unsigned long stack);
 extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
 extern void switch_threads(jmp_buf *me, jmp_buf *you);
 extern int start_idle_thread(void *stack, jmp_buf *switch_buf);
@@ -298,16 +285,12 @@
 extern int os_get_pollfd(int i);
 extern void os_set_pollfd(int i, int fd);
 extern void os_set_ioignore(void);
-extern void init_irq_signals(int on_sigstack);
 
 /* sigio.c */
 extern int add_sigio_fd(int fd);
 extern int ignore_sigio_fd(int fd);
 extern void maybe_sigio_broken(int fd, int read);
 
-/* skas/trap */
-extern void sig_handler_common_skas(int sig, void *sc_ptr);
-
 /* sys-x86_64/prctl.c */
 extern int os_arch_prctl(int pid, int code, unsigned long *addr);
 
diff --git a/arch/um/include/ptrace_user.h b/arch/um/include/ptrace_user.h
index f3450e6..4bce6e0 100644
--- a/arch/um/include/ptrace_user.h
+++ b/arch/um/include/ptrace_user.h
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -10,12 +10,6 @@
 
 extern int ptrace_getregs(long pid, unsigned long *regs_out);
 extern int ptrace_setregs(long pid, unsigned long *regs_in);
-extern int ptrace_getfpregs(long pid, unsigned long *regs_out);
-extern int ptrace_setfpregs(long pid, unsigned long *regs);
-extern void arch_enter_kernel(void *task, int pid);
-extern void arch_leave_kernel(void *task, int pid);
-extern void ptrace_pokeuser(unsigned long addr, unsigned long data);
-
 
 /* syscall emulation path in ptrace */
 
@@ -54,7 +48,8 @@
 	(((int[3][3] ) { \
 		{ PTRACE_SYSCALL, PTRACE_SYSCALL, PTRACE_SINGLESTEP }, \
 		{ PTRACE_SYSEMU, PTRACE_SYSEMU, PTRACE_SINGLESTEP }, \
-		{ PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP, PTRACE_SYSEMU_SINGLESTEP }}) \
+		{ PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP, \
+		  PTRACE_SYSEMU_SINGLESTEP } }) \
 		[sysemu_mode][singlestep_mode])
 
 #endif
diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h
index 0e27406..9ea1ae3 100644
--- a/arch/um/include/registers.h
+++ b/arch/um/include/registers.h
@@ -9,14 +9,13 @@
 #include "sysdep/ptrace.h"
 #include "sysdep/archsetjmp.h"
 
-extern void init_thread_registers(struct uml_pt_regs *to);
 extern int save_fp_registers(int pid, unsigned long *fp_regs);
 extern int restore_fp_registers(int pid, unsigned long *fp_regs);
 extern int save_fpx_registers(int pid, unsigned long *fp_regs);
 extern int restore_fpx_registers(int pid, unsigned long *fp_regs);
-extern void save_registers(int pid, struct uml_pt_regs *regs);
-extern void restore_registers(int pid, struct uml_pt_regs *regs);
-extern void init_registers(int pid);
+extern int save_registers(int pid, struct uml_pt_regs *regs);
+extern int restore_registers(int pid, struct uml_pt_regs *regs);
+extern int init_registers(int pid);
 extern void get_safe_registers(unsigned long *regs);
 extern unsigned long get_thread_reg(int reg, jmp_buf *buf);
 
diff --git a/arch/um/include/signal_kern.h b/arch/um/include/signal_kern.h
deleted file mode 100644
index aeb5d5a..0000000
--- a/arch/um/include/signal_kern.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* 
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SIGNAL_KERN_H__
-#define __SIGNAL_KERN_H__
-
-extern int have_signals(void *t);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/skas/mode-skas.h b/arch/um/include/skas/mode-skas.h
deleted file mode 100644
index e065feb..0000000
--- a/arch/um/include/skas/mode-skas.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
- */
-
-#ifndef __MODE_SKAS_H__
-#define __MODE_SKAS_H__
-
-extern void kill_off_processes_skas(void);
-
-#endif
diff --git a/arch/um/include/sysdep-i386/syscalls.h b/arch/um/include/sysdep-i386/syscalls.h
index 57bd79e..9056981 100644
--- a/arch/um/include/sysdep-i386/syscalls.h
+++ b/arch/um/include/sysdep-i386/syscalls.h
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -18,7 +18,8 @@
 extern syscall_handler_t *sys_call_table[];
 
 #define EXECUTE_SYSCALL(syscall, regs) \
-	((long (*)(struct syscall_args)) (*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
+	((long (*)(struct syscall_args)) \
+	 (*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
 
 extern long sys_mmap2(unsigned long addr, unsigned long len,
 		      unsigned long prot, unsigned long flags,
diff --git a/arch/um/include/sysdep-x86_64/kernel-offsets.h b/arch/um/include/sysdep-x86_64/kernel-offsets.h
index c978b58..a307237 100644
--- a/arch/um/include/sysdep-x86_64/kernel-offsets.h
+++ b/arch/um/include/sysdep-x86_64/kernel-offsets.h
@@ -17,16 +17,7 @@
 #define OFFSET(sym, str, mem) \
 	DEFINE(sym, offsetof(struct str, mem));
 
-#define __NO_STUBS 1
-#undef __SYSCALL
-#undef _ASM_X86_64_UNISTD_H_
-#define __SYSCALL(nr, sym) [nr] = 1,
-static char syscalls[] = {
-#include <asm/arch/unistd.h>
-};
-
 void foo(void)
 {
 #include <common-offsets.h>
-DEFINE(UM_NR_syscall_max, sizeof(syscalls) - 1);
 }
diff --git a/arch/um/include/sysdep-x86_64/syscalls.h b/arch/um/include/sysdep-x86_64/syscalls.h
index cf72256..7cfb0b08 100644
--- a/arch/um/include/sysdep-x86_64/syscalls.h
+++ b/arch/um/include/sysdep-x86_64/syscalls.h
@@ -30,6 +30,4 @@
 extern syscall_handler_t sys_modify_ldt;
 extern syscall_handler_t sys_arch_prctl;
 
-#define NR_syscalls (UM_NR_syscall_max + 1)
-
 #endif
diff --git a/arch/um/include/um_mmu.h b/arch/um/include/um_mmu.h
index 8855d8d..82865fc 100644
--- a/arch/um/include/um_mmu.h
+++ b/arch/um/include/um_mmu.h
@@ -12,10 +12,6 @@
 
 typedef struct mm_context {
 	struct mm_id id;
-	unsigned long last_page_table;
-#ifdef CONFIG_3_LEVEL_PGTABLES
-	unsigned long last_pmd;
-#endif
 	struct uml_ldt ldt;
 } mm_context_t;
 
diff --git a/arch/um/include/um_uaccess.h b/arch/um/include/um_uaccess.h
index fdfc06b..2b6fc8e 100644
--- a/arch/um/include/um_uaccess.h
+++ b/arch/um/include/um_uaccess.h
@@ -6,7 +6,9 @@
 #ifndef __ARCH_UM_UACCESS_H
 #define __ARCH_UM_UACCESS_H
 
-#include "asm/fixmap.h"
+#include <asm/elf.h>
+#include <asm/fixmap.h>
+#include "sysdep/archsetjmp.h"
 
 #define __under_task_size(addr, size) \
 	(((unsigned long) (addr) < TASK_SIZE) && \
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 8196450..76a62c0 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -19,12 +19,13 @@
 void flush_thread(void)
 {
 	void *data = NULL;
-	unsigned long end = proc_mm ? task_size : STUB_START;
 	int ret;
 
 	arch_flush_thread(&current->thread.arch);
 
-	ret = unmap(&current->mm->context.id, 0, end, 1, &data);
+	ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data);
+	ret = ret || unmap(&current->mm->context.id, STUB_END,
+			   TASK_SIZE - STUB_END, 1, &data);
 	if (ret) {
 		printk(KERN_ERR "flush_thread - clearing address space failed, "
 		       "err = %d\n", ret);
diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c
index c716b5a..984f80e 100644
--- a/arch/um/kernel/exitcode.c
+++ b/arch/um/kernel/exitcode.c
@@ -1,15 +1,17 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/init.h"
-#include "linux/ctype.h"
-#include "linux/proc_fs.h"
-#include "asm/uaccess.h"
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/types.h>
+#include <asm/uaccess.h>
 
-/* If read and write race, the read will still atomically read a valid
+/*
+ * If read and write race, the read will still atomically read a valid
  * value.
  */
 int uml_exitcode = 0;
@@ -19,18 +21,19 @@
 {
 	int len, val;
 
-	/* Save uml_exitcode in a local so that we don't need to guarantee
+	/*
+	 * Save uml_exitcode in a local so that we don't need to guarantee
 	 * that sprintf accesses it atomically.
 	 */
 	val = uml_exitcode;
 	len = sprintf(page, "%d\n", val);
 	len -= off;
-	if(len <= off+count)
+	if (len <= off+count)
 		*eof = 1;
 	*start = page + off;
-	if(len > count)
+	if (len > count)
 		len = count;
-	if(len < 0)
+	if (len < 0)
 		len = 0;
 	return len;
 }
@@ -41,11 +44,11 @@
 	char *end, buf[sizeof("nnnnn\0")];
 	int tmp;
 
-	if(copy_from_user(buf, buffer, count))
+	if (copy_from_user(buf, buffer, count))
 		return -EFAULT;
 
 	tmp = simple_strtol(buf, &end, 0);
-	if((*end != '\0') && !isspace(*end))
+	if ((*end != '\0') && !isspace(*end))
 		return -EINVAL;
 
 	uml_exitcode = tmp;
@@ -57,7 +60,7 @@
 	struct proc_dir_entry *ent;
 
 	ent = create_proc_entry("exitcode", 0600, &proc_root);
-	if(ent == NULL){
+	if (ent == NULL) {
 		printk(KERN_WARNING "make_proc_exitcode : Failed to register "
 		       "/proc/exitcode\n");
 		return 0;
diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c
index 734f873..72eccd2 100644
--- a/arch/um/kernel/gmon_syms.c
+++ b/arch/um/kernel/gmon_syms.c
@@ -1,5 +1,5 @@
-/* 
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -8,12 +8,13 @@
 extern void __bb_init_func(void *)  __attribute__((weak));
 EXPORT_SYMBOL(__bb_init_func);
 
-/* This is defined (and referred to in profiling stub code) only by some GCC
+/*
+ * This is defined (and referred to in profiling stub code) only by some GCC
  * versions in libgcov.
  *
  * Since SuSE backported the fix, we cannot handle it depending on GCC version.
- * So, unconditionally export it. But also give it a weak declaration, which will
- * be overridden by any other one.
+ * So, unconditionally export it. But also give it a weak declaration, which
+ * will be overridden by any other one.
  */
 
 extern void __gcov_init(void *) __attribute__((weak));
diff --git a/arch/um/kernel/gprof_syms.c b/arch/um/kernel/gprof_syms.c
index 9244f01..e2f043d 100644
--- a/arch/um/kernel/gprof_syms.c
+++ b/arch/um/kernel/gprof_syms.c
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -7,14 +7,3 @@
 
 extern void mcount(void);
 EXPORT_SYMBOL(mcount);
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 16dc43e..fa01556 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -7,7 +7,6 @@
 #include "linux/bootmem.h"
 #include "linux/initrd.h"
 #include "asm/types.h"
-#include "kern_util.h"
 #include "initrd.h"
 #include "init.h"
 #include "os.h"
@@ -21,18 +20,27 @@
 	long long size;
 	int err;
 
-	if(initrd == NULL)
+	if (initrd == NULL)
 		return 0;
 
 	err = os_file_size(initrd, &size);
-	if(err)
+	if (err)
 		return 0;
 
+	/*
+	 * This is necessary because alloc_bootmem craps out if you
+	 * ask for no memory.
+	 */
+	if (size == 0) {
+		printk(KERN_ERR "\"%\" is a zero-size initrd\n");
+		return 0;
+	}
+
 	area = alloc_bootmem(size);
-	if(area == NULL)
+	if (area == NULL)
 		return 0;
 
-	if(load_initrd(initrd, area, size) == -1)
+	if (load_initrd(initrd, area, size) == -1)
 		return 0;
 
 	initrd_start = (unsigned long) area;
@@ -59,13 +67,15 @@
 	int fd, n;
 
 	fd = os_open_file(filename, of_read(OPENFLAGS()), 0);
-	if(fd < 0){
-		printk("Opening '%s' failed - err = %d\n", filename, -fd);
+	if (fd < 0) {
+		printk(KERN_ERR "Opening '%s' failed - err = %d\n", filename,
+		       -fd);
 		return -1;
 	}
 	n = os_read_file(fd, buf, size);
-	if(n != size){
-		printk("Read of %d bytes from '%s' failed, err = %d\n", size,
+	if (n != size) {
+		printk(KERN_ERR "Read of %d bytes from '%s' failed, "
+		       "err = %d\n", size,
 		       filename, -n);
 		return -1;
 	}
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index ba11ccd..91587f8 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -107,10 +107,9 @@
 	struct pollfd *tmp_pfd;
 	struct irq_fd *new_fd, *irq_fd;
 	unsigned long flags;
-	int pid, events, err, n;
+	int events, err, n;
 
-	pid = os_getpid();
-	err = os_set_fd_async(fd, pid);
+	err = os_set_fd_async(fd);
 	if (err < 0)
 		goto out;
 
@@ -127,7 +126,6 @@
 				     .fd 		= fd,
 				     .type 		= type,
 				     .irq 		= irq,
-				     .pid  		= pid,
 				     .events 		= events,
 				     .current_events 	= 0 } );
 
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 7c7142b..5311ee9 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -18,15 +18,11 @@
 EXPORT_SYMBOL(get_signals);
 EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(sys_waitpid);
-EXPORT_SYMBOL(task_size);
 EXPORT_SYMBOL(flush_tlb_range);
-EXPORT_SYMBOL(host_task_size);
 EXPORT_SYMBOL(arch_validate);
-EXPORT_SYMBOL(get_kmem_end);
 
 EXPORT_SYMBOL(high_physmem);
 EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(um_virt_to_phys);
 EXPORT_SYMBOL(handle_page_fault);
 EXPORT_SYMBOL(find_iomem);
 
@@ -40,7 +36,6 @@
 EXPORT_SYMBOL(os_stat_fd);
 EXPORT_SYMBOL(os_stat_file);
 EXPORT_SYMBOL(os_access);
-EXPORT_SYMBOL(os_get_exec_close);
 EXPORT_SYMBOL(os_set_exec_close);
 EXPORT_SYMBOL(os_getpid);
 EXPORT_SYMBOL(os_open_file);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 59822dee..d872fdc 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -1,49 +1,41 @@
 /*
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/stddef.h"
-#include "linux/kernel.h"
-#include "linux/mm.h"
-#include "linux/bootmem.h"
-#include "linux/swap.h"
-#include "linux/highmem.h"
-#include "linux/gfp.h"
-#include "asm/page.h"
-#include "asm/fixmap.h"
-#include "asm/pgalloc.h"
-#include "kern_util.h"
+#include <linux/stddef.h>
+#include <linux/bootmem.h>
+#include <linux/gfp.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <asm/fixmap.h>
+#include <asm/page.h>
 #include "as-layout.h"
-#include "kern.h"
-#include "mem_user.h"
-#include "um_uaccess.h"
-#include "os.h"
-#include "linux/types.h"
-#include "linux/string.h"
 #include "init.h"
-#include "kern_constants.h"
+#include "kern.h"
+#include "kern_util.h"
+#include "mem_user.h"
+#include "os.h"
 
 /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
 unsigned long *empty_zero_page = NULL;
 /* allocated in paging_init and unchanged thereafter */
 unsigned long *empty_bad_page = NULL;
+
+/*
+ * Initialized during boot, and readonly for initializing page tables
+ * afterwards
+ */
 pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+/* Initialized at boot time, and readonly after that */
 unsigned long long highmem;
 int kmalloc_ok = 0;
 
+/* Used during early boot */
 static unsigned long brk_end;
 
-void unmap_physmem(void)
-{
-	os_unmap_memory((void *) brk_end, uml_reserved - brk_end);
-}
-
-static void map_cb(void *unused)
-{
-	map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
-}
-
 #ifdef CONFIG_HIGHMEM
 static void setup_highmem(unsigned long highmem_start,
 			  unsigned long highmem_len)
@@ -53,7 +45,7 @@
 	int i;
 
 	highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT;
-	for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){
+	for (i = 0; i < highmem_len >> PAGE_SHIFT; i++) {
 		page = &mem_map[highmem_pfn + i];
 		ClearPageReserved(page);
 		init_page_count(page);
@@ -65,14 +57,13 @@
 void __init mem_init(void)
 {
 	/* clear the zero-page */
-	memset((void *) empty_zero_page, 0, PAGE_SIZE);
+	memset(empty_zero_page, 0, PAGE_SIZE);
 
 	/* Map in the area just after the brk now that kmalloc is about
 	 * to be turned on.
 	 */
 	brk_end = (unsigned long) UML_ROUND_UP(sbrk(0));
-	map_cb(NULL);
-	initial_thread_cb(map_cb, NULL);
+	map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
 	free_bootmem(__pa(brk_end), uml_reserved - brk_end);
 	uml_reserved = brk_end;
 
@@ -85,7 +76,7 @@
 #endif
 	num_physpages = totalram_pages;
 	max_pfn = totalram_pages;
-	printk(KERN_INFO "Memory: %luk available\n", 
+	printk(KERN_INFO "Memory: %luk available\n",
 	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10));
 	kmalloc_ok = 1;
 
@@ -119,7 +110,7 @@
 #endif
 }
 
-static void __init fixrange_init(unsigned long start, unsigned long end, 
+static void __init fixrange_init(unsigned long start, unsigned long end,
 				 pgd_t *pgd_base)
 {
 	pgd_t *pgd;
@@ -138,7 +129,7 @@
 		if (pud_none(*pud))
 			one_md_table_init(pud);
 		pmd = pmd_offset(pud, vaddr);
-		for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+		for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) {
 			one_page_table_init(pmd);
 			vaddr += PMD_SIZE;
 		}
@@ -152,7 +143,7 @@
 
 #define kmap_get_fixmap_pte(vaddr)					\
 	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\
- 			  (vaddr)), (vaddr))
+				     (vaddr)), (vaddr))
 
 static void __init kmap_init(void)
 {
@@ -197,21 +188,23 @@
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
-	unsigned long paddr, vaddr = FIXADDR_USER_START;
+	phys_t p;
+	unsigned long v, vaddr = FIXADDR_USER_START;
 
-	if (  ! size )
+	if (!size)
 		return;
 
 	fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir);
-	paddr = (unsigned long)alloc_bootmem_low_pages( size);
-	memcpy( (void *)paddr, (void *)FIXADDR_USER_START, size);
-	paddr = __pa(paddr);
-	for ( ; size > 0; size-=PAGE_SIZE, vaddr+=PAGE_SIZE, paddr+=PAGE_SIZE){
+	v = (unsigned long) alloc_bootmem_low_pages(size);
+	memcpy((void *) v , (void *) FIXADDR_USER_START, size);
+	p = __pa(v);
+	for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
+		      p += PAGE_SIZE) {
 		pgd = swapper_pg_dir + pgd_index(vaddr);
 		pud = pud_offset(pgd, vaddr);
 		pmd = pmd_offset(pud, vaddr);
 		pte = pte_offset_kernel(pmd, vaddr);
-		pte_set_val( (*pte), paddr, PAGE_READONLY);
+		pte_set_val(*pte, p, PAGE_READONLY);
 	}
 #endif
 }
@@ -223,7 +216,7 @@
 
 	empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
 	empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
-	for(i = 0; i < ARRAY_SIZE(zones_size); i++)
+	for (i = 0; i < ARRAY_SIZE(zones_size); i++)
 		zones_size[i] = 0;
 
 	zones_size[ZONE_NORMAL] = (end_iomem >> PAGE_SHIFT) -
@@ -253,32 +246,33 @@
 	int i;
 
  again:
-	if(page == NULL)
+	if (page == NULL)
 		return page;
-	if(PageHighMem(page))
+	if (PageHighMem(page))
 		return page;
 
 	addr = (unsigned long) page_address(page);
-	for(i = 0; i < (1 << order); i++){
+	for (i = 0; i < (1 << order); i++) {
 		current->thread.fault_addr = (void *) addr;
-		if(__do_copy_to_user((void __user *) addr, &zero,
+		if (__do_copy_to_user((void __user *) addr, &zero,
 				     sizeof(zero),
 				     &current->thread.fault_addr,
-				     &current->thread.fault_catcher)){
-			if(!(mask & __GFP_WAIT))
+				     &current->thread.fault_catcher)) {
+			if (!(mask & __GFP_WAIT))
 				return NULL;
 			else break;
 		}
 		addr += PAGE_SIZE;
 	}
 
-	if(i == (1 << order))
+	if (i == (1 << order))
 		return page;
 	page = alloc_pages(mask, order);
 	goto again;
 }
 
-/* This can't do anything because nothing in the kernel image can be freed
+/*
+ * This can't do anything because nothing in the kernel image can be freed
  * since it's not in kernel physical memory.
  */
 
@@ -290,8 +284,8 @@
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
 	if (start < end)
-		printk ("Freeing initrd memory: %ldk freed\n", 
-			(end - start) >> 10);
+		printk(KERN_INFO "Freeing initrd memory: %ldk freed\n",
+		       (end - start) >> 10);
 	for (; start < end; start += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(start));
 		init_page_count(virt_to_page(start));
@@ -308,32 +302,31 @@
 	int highmem = 0;
 	struct page *page;
 
-	printk("Mem-info:\n");
+	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	printk(KERN_INFO "Free swap:       %6ldkB\n",
+	       nr_swap_pages<<(PAGE_SHIFT-10));
 	pfn = max_mapnr;
-	while(pfn-- > 0) {
+	while (pfn-- > 0) {
 		page = pfn_to_page(pfn);
 		total++;
-		if(PageHighMem(page))
+		if (PageHighMem(page))
 			highmem++;
-		if(PageReserved(page))
+		if (PageReserved(page))
 			reserved++;
-		else if(PageSwapCache(page))
+		else if (PageSwapCache(page))
 			cached++;
-		else if(page_count(page))
+		else if (page_count(page))
 			shared += page_count(page) - 1;
 	}
-	printk("%d pages of RAM\n", total);
-	printk("%d pages of HIGHMEM\n", highmem);
-	printk("%d reserved pages\n", reserved);
-	printk("%d pages shared\n", shared);
-	printk("%d pages swap cached\n", cached);
+	printk(KERN_INFO "%d pages of RAM\n", total);
+	printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
+	printk(KERN_INFO "%d reserved pages\n", reserved);
+	printk(KERN_INFO "%d pages shared\n", shared);
+	printk(KERN_INFO "%d pages swap cached\n", cached);
 }
 
-/*
- * Allocate and free page tables.
- */
+/* Allocate and free page tables. */
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
@@ -341,14 +334,14 @@
 
 	if (pgd) {
 		memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
-		memcpy(pgd + USER_PTRS_PER_PGD, 
-		       swapper_pg_dir + USER_PTRS_PER_PGD, 
+		memcpy(pgd + USER_PTRS_PER_PGD,
+		       swapper_pg_dir + USER_PTRS_PER_PGD,
 		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 	}
 	return pgd;
 }
 
-void pgd_free(pgd_t *pgd)
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	free_page((unsigned long) pgd);
 }
@@ -368,3 +361,15 @@
 	pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
 	return pte;
 }
+
+#ifdef CONFIG_3_LEVEL_PGTABLES
+pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
+
+	if (pmd)
+		memset(pmd, 0, PAGE_SIZE);
+
+	return pmd;
+}
+#endif
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index e66432f..9757085 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -55,16 +55,6 @@
 	return 0;
 }
 
-/* Changed during early boot */
-static unsigned long kmem_top = 0;
-
-unsigned long get_kmem_end(void)
-{
-	if (kmem_top == 0)
-		kmem_top = host_task_size - 1024 * 1024;
-	return kmem_top;
-}
-
 void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
 		int r, int w, int x)
 {
@@ -174,10 +164,10 @@
  * setup_iomem, both of which run during early boot.  Afterwards, it's
  * unchanged.
  */
-struct iomem_region *iomem_regions = NULL;
+struct iomem_region *iomem_regions;
 
-/* Initialized in parse_iomem */
-int iomem_size = 0;
+/* Initialized in parse_iomem and unchanged thereafter */
+int iomem_size;
 
 unsigned long find_iomem(char *driver, unsigned long *len_out)
 {
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 0eae00b..c07961b 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -4,19 +4,21 @@
  * Licensed under the GPL
  */
 
-#include "linux/stddef.h"
-#include "linux/err.h"
-#include "linux/hardirq.h"
-#include "linux/mm.h"
-#include "linux/personality.h"
-#include "linux/proc_fs.h"
-#include "linux/ptrace.h"
-#include "linux/random.h"
-#include "linux/sched.h"
-#include "linux/tick.h"
-#include "linux/threads.h"
-#include "asm/pgtable.h"
-#include "asm/uaccess.h"
+#include <linux/stddef.h>
+#include <linux/err.h>
+#include <linux/hardirq.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/personality.h>
+#include <linux/proc_fs.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
+#include <linux/threads.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
 #include "as-layout.h"
 #include "kern_util.h"
 #include "os.h"
@@ -30,7 +32,7 @@
  */
 struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } };
 
-static inline int external_pid(struct task_struct *task)
+static inline int external_pid(void)
 {
 	/* FIXME: Need to look up userspace_pid by cpu */
 	return userspace_pid[0];
@@ -40,7 +42,7 @@
 {
 	int i;
 
-	for(i = 0; i < ncpus; i++) {
+	for (i = 0; i < ncpus; i++) {
 		if (cpu_tasks[i].pid == pid)
 			return i;
 	}
@@ -60,8 +62,6 @@
 	if (atomic)
 		flags = GFP_ATOMIC;
 	page = __get_free_pages(flags, order);
-	if (page == 0)
-		return 0;
 
 	return page;
 }
@@ -80,15 +80,15 @@
 static inline void set_current(struct task_struct *task)
 {
 	cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task)
-		{ external_pid(task), task });
+		{ external_pid(), task });
 }
 
-extern void arch_switch_to(struct task_struct *from, struct task_struct *to);
+extern void arch_switch_to(struct task_struct *to);
 
 void *_switch_to(void *prev, void *next, void *last)
 {
 	struct task_struct *from = prev;
-	struct task_struct *to= next;
+	struct task_struct *to = next;
 
 	to->thread.prev_sched = from;
 	set_current(to);
@@ -99,13 +99,13 @@
 		switch_threads(&from->thread.switch_buf,
 			       &to->thread.switch_buf);
 
-		arch_switch_to(current->thread.prev_sched, current);
+		arch_switch_to(current);
 
 		if (current->thread.saved_task)
 			show_regs(&(current->thread.regs));
-		next= current->thread.saved_task;
-		prev= current;
-	} while(current->thread.saved_task);
+		to = current->thread.saved_task;
+		from = current;
+	} while (current->thread.saved_task);
 
 	return current->thread.prev_sched;
 
@@ -163,8 +163,6 @@
 void fork_handler(void)
 {
 	force_flush_all();
-	if (current->thread.prev_sched == NULL)
-		panic("blech");
 
 	schedule_tail(current->thread.prev_sched);
 
@@ -173,7 +171,7 @@
 	 * arch_switch_to isn't needed. We could want to apply this to
 	 * improve performance. -bb
 	 */
-	arch_switch_to(current->thread.prev_sched, current);
+	arch_switch_to(current);
 
 	current->thread.prev_sched = NULL;
 
@@ -204,7 +202,7 @@
 		arch_copy_thread(&current->thread.arch, &p->thread.arch);
 	}
 	else {
-		init_thread_registers(&p->thread.regs.regs);
+		get_safe_registers(p->thread.regs.regs.gp);
 		p->thread.request.u.thread = current->thread.request.u.thread;
 		handler = new_thread_handler;
 	}
@@ -237,7 +235,7 @@
 {
 	unsigned long long nsecs;
 
-	while(1) {
+	while (1) {
 		/* endless idle loop with no priority at all */
 
 		/*
@@ -256,53 +254,10 @@
 
 void cpu_idle(void)
 {
-	cpu_tasks[current_thread->cpu].pid = os_getpid();
+	cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
 	default_idle();
 }
 
-void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
-		      pte_t *pte_out)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	pte_t ptent;
-
-	if (task->mm == NULL)
-		return ERR_PTR(-EINVAL);
-	pgd = pgd_offset(task->mm, addr);
-	if (!pgd_present(*pgd))
-		return ERR_PTR(-EINVAL);
-
-	pud = pud_offset(pgd, addr);
-	if (!pud_present(*pud))
-		return ERR_PTR(-EINVAL);
-
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd))
-		return ERR_PTR(-EINVAL);
-
-	pte = pte_offset_kernel(pmd, addr);
-	ptent = *pte;
-	if (!pte_present(ptent))
-		return ERR_PTR(-EINVAL);
-
-	if (pte_out != NULL)
-		*pte_out = ptent;
-	return (void *) (pte_val(ptent) & PAGE_MASK) + (addr & ~PAGE_MASK);
-}
-
-char *current_cmd(void)
-{
-#if defined(CONFIG_SMP) || defined(CONFIG_HIGHMEM)
-	return "(Unknown)";
-#else
-	void *addr = um_virt_to_phys(current, current->mm->arg_start, NULL);
-	return IS_ERR(addr) ? "(Unknown)": __va((unsigned long) addr);
-#endif
-}
-
 void dump_thread(struct pt_regs *regs, struct user *u)
 {
 }
@@ -317,7 +272,7 @@
 	unsigned long stack;
 
 	stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER);
-	return stack != (unsigned long) current_thread;
+	return stack != (unsigned long) current_thread_info();
 }
 
 extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end;
@@ -331,7 +286,7 @@
 		(*call)();
 }
 
-char *uml_strdup(char *string)
+char *uml_strdup(const char *string)
 {
 	return kstrdup(string, GFP_KERNEL);
 }
@@ -359,7 +314,7 @@
 int smp_sigio_handler(void)
 {
 #ifdef CONFIG_SMP
-	int cpu = current_thread->cpu;
+	int cpu = current_thread_info()->cpu;
 	IPI_handler(cpu);
 	if (cpu != 0)
 		return 1;
@@ -369,7 +324,7 @@
 
 int cpu(void)
 {
-	return current_thread->cpu;
+	return current_thread_info()->cpu;
 }
 
 static atomic_t using_sysemu = ATOMIC_INIT(0);
@@ -435,7 +390,7 @@
 {
 	struct task_struct *task = t ? t : current;
 
-	if ( ! (task->ptrace & PT_DTRACE) )
+	if (!(task->ptrace & PT_DTRACE))
 		return 0;
 
 	if (task->thread.singlestep_syscall)
@@ -459,3 +414,46 @@
 	return sp & ~0xf;
 }
 #endif
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long stack_page, sp, ip;
+	bool seen_sched = 0;
+
+	if ((p == NULL) || (p == current) || (p->state == TASK_RUNNING))
+		return 0;
+
+	stack_page = (unsigned long) task_stack_page(p);
+	/* Bail if the process has no kernel stack for some reason */
+	if (stack_page == 0)
+		return 0;
+
+	sp = p->thread.switch_buf->JB_SP;
+	/*
+	 * Bail if the stack pointer is below the bottom of the kernel
+	 * stack for some reason
+	 */
+	if (sp < stack_page)
+		return 0;
+
+	while (sp < stack_page + THREAD_SIZE) {
+		ip = *((unsigned long *) sp);
+		if (in_sched_functions(ip))
+			/* Ignore everything until we're above the scheduler */
+			seen_sched = 1;
+		else if (kernel_text_address(ip) && seen_sched)
+			return ip;
+
+		sp += sizeof(unsigned long);
+	}
+
+	return 0;
+}
+
+int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
+{
+	int cpu = current_thread_info()->cpu;
+
+	return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu);
+}
+
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 04cebcf..00197d3 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -4,6 +4,7 @@
  */
 
 #include "linux/sched.h"
+#include "kern_util.h"
 #include "os.h"
 #include "skas.h"
 
@@ -11,7 +12,7 @@
 
 static void kill_off_processes(void)
 {
-	if(proc_mm)
+	if (proc_mm)
 		/*
 		 * FIXME: need to loop over userspace_pids
 		 */
@@ -21,8 +22,8 @@
 		int pid, me;
 
 		me = os_getpid();
-		for_each_process(p){
-			if(p->mm == NULL)
+		for_each_process(p) {
+			if (p->mm == NULL)
 				continue;
 
 			pid = p->mm->context.id.u.pid;
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index 89f9866..2b272b6 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -1,18 +1,12 @@
 /*
- * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/list.h"
-#include "linux/slab.h"
-#include "linux/signal.h"
-#include "linux/interrupt.h"
-#include "init.h"
-#include "sigio.h"
-#include "irq_user.h"
+#include <linux/interrupt.h>
 #include "irq_kern.h"
 #include "os.h"
+#include "sigio.h"
 
 /* Protected by sigio_lock() called from write_sigio_workaround */
 static int sigio_irq_fd = -1;
@@ -33,9 +27,9 @@
 	err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt,
 			     IRQF_DISABLED|IRQF_SAMPLE_RANDOM, "write sigio",
 			     NULL);
-	if(err){
-		printk("write_sigio_irq : um_request_irq failed, err = %d\n",
-		       err);
+	if (err) {
+		printk(KERN_ERR "write_sigio_irq : um_request_irq failed, "
+		       "err = %d\n", err);
 		return -1;
 	}
 	sigio_irq_fd = fd;
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 19cb977..b0fce72 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -3,12 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/module.h"
-#include "linux/ptrace.h"
-#include "linux/sched.h"
-#include "asm/siginfo.h"
-#include "asm/signal.h"
-#include "asm/unistd.h"
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <asm/siginfo.h>
+#include <asm/signal.h>
+#include <asm/unistd.h>
 #include "frame_kern.h"
 #include "kern_util.h"
 #include "sigcontext.h"
@@ -36,7 +36,7 @@
 	/* Did we come from a system call? */
 	if (PT_REGS_SYSCALL_NR(regs) >= 0) {
 		/* If so, check system call restarting.. */
-		switch(PT_REGS_SYSCALL_RET(regs)) {
+		switch (PT_REGS_SYSCALL_RET(regs)) {
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTARTNOHAND:
 			PT_REGS_SYSCALL_RET(regs) = -EINTR;
@@ -116,7 +116,7 @@
 	/* Did we come from a system call? */
 	if (!handled_sig && (PT_REGS_SYSCALL_NR(regs) >= 0)) {
 		/* Restart the system call - no handlers present */
-		switch(PT_REGS_SYSCALL_RET(regs)) {
+		switch (PT_REGS_SYSCALL_RET(regs)) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index 8d07a7a..2c8583c 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -1,17 +1,20 @@
-#include <sched.h>
-#include <signal.h>
-#include <sys/mman.h>
-#include <sys/time.h>
-#include <asm/unistd.h>
-#include "as-layout.h"
-#include "ptrace_user.h"
-#include "skas.h"
-#include "stub-data.h"
-#include "uml-config.h"
-#include "sysdep/stub.h"
-#include "kern_constants.h"
+/*
+ * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
 
-/* This is in a separate file because it needs to be compiled with any
+#include <signal.h>
+#include <sched.h>
+#include <asm/unistd.h>
+#include <sys/time.h>
+#include "as-layout.h"
+#include "kern_constants.h"
+#include "ptrace_user.h"
+#include "stub-data.h"
+#include "sysdep/stub.h"
+
+/*
+ * This is in a separate file because it needs to be compiled with any
  * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled
  *
  * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize
@@ -26,25 +29,26 @@
 
 	err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
 			    STUB_DATA + UM_KERN_PAGE_SIZE / 2 - sizeof(void *));
-	if(err != 0)
+	if (err != 0)
 		goto out;
 
 	err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
-	if(err)
+	if (err)
 		goto out;
 
-	err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, 
+	err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL,
 			    (long) &data->timer, 0);
-	if(err)
+	if (err)
 		goto out;
 
 	remap_stack(data->fd, data->offset);
 	goto done;
 
  out:
-	/* save current result. 
-	 * Parent: pid; 
-	 * child: retcode of mmap already saved and it jumps around this 
+	/*
+	 * save current result.
+	 * Parent: pid;
+	 * child: retcode of mmap already saved and it jumps around this
 	 * assignment
 	 */
 	data->err = err;
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index f859ec3..78b3e9f 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -34,33 +34,14 @@
 	if (!pte)
 		goto out_pte;
 
-	/*
-	 * There's an interaction between the skas0 stub pages, stack
-	 * randomization, and the BUG at the end of exit_mmap.  exit_mmap
-	 * checks that the number of page tables freed is the same as had
-	 * been allocated.  If the stack is on the last page table page,
-	 * then the stack pte page will be freed, and if not, it won't.  To
-	 * avoid having to know where the stack is, or if the process mapped
-	 * something at the top of its address space for some other reason,
-	 * we set TASK_SIZE to end at the start of the last page table.
-	 * This keeps exit_mmap off the last page, but introduces a leak
-	 * of that page.  So, we hang onto it here and free it in
-	 * destroy_context_skas.
-	 */
-
-	mm->context.last_page_table = pmd_page_vaddr(*pmd);
-#ifdef CONFIG_3_LEVEL_PGTABLES
-	mm->context.last_pmd = (unsigned long) __va(pud_val(*pud));
-#endif
-
 	*pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
 	*pte = pte_mkread(*pte);
 	return 0;
 
  out_pmd:
-	pud_free(pud);
+	pud_free(mm, pud);
  out_pte:
-	pmd_free(pmd);
+	pmd_free(mm, pmd);
  out:
 	return -ENOMEM;
 }
@@ -76,24 +57,6 @@
 		stack = get_zeroed_page(GFP_KERNEL);
 		if (stack == 0)
 			goto out;
-
-		/*
-		 * This zeros the entry that pgd_alloc didn't, needed since
-		 * we are about to reinitialize it, and want mm.nr_ptes to
-		 * be accurate.
-		 */
-		mm->pgd[USER_PTRS_PER_PGD] = __pgd(0);
-
-		ret = init_stub_pte(mm, STUB_CODE,
-				    (unsigned long) &__syscall_stub_start);
-		if (ret)
-			goto out_free;
-
-		ret = init_stub_pte(mm, STUB_DATA, stack);
-		if (ret)
-			goto out_free;
-
-		mm->nr_ptes--;
 	}
 
 	to_mm->id.stack = stack;
@@ -114,6 +77,11 @@
 			to_mm->id.u.pid = copy_context_skas0(stack,
 							     from_mm->id.u.pid);
 		else to_mm->id.u.pid = start_userspace(stack);
+
+		if (to_mm->id.u.pid < 0) {
+			ret = to_mm->id.u.pid;
+			goto out_free;
+		}
 	}
 
 	ret = init_new_ldt(to_mm, from_mm);
@@ -132,24 +100,87 @@
 	return ret;
 }
 
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+	struct page **pages;
+	int err, ret;
+
+	if (!skas_needs_stub)
+		return;
+
+	ret = init_stub_pte(mm, STUB_CODE,
+			    (unsigned long) &__syscall_stub_start);
+	if (ret)
+		goto out;
+
+	ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack);
+	if (ret)
+		goto out;
+
+	pages = kmalloc(2 * sizeof(struct page *), GFP_KERNEL);
+	if (pages == NULL) {
+		printk(KERN_ERR "arch_dup_mmap failed to allocate 2 page "
+		       "pointers\n");
+		goto out;
+	}
+
+	pages[0] = virt_to_page(&__syscall_stub_start);
+	pages[1] = virt_to_page(mm->context.id.stack);
+
+	/* dup_mmap already holds mmap_sem */
+	err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
+				      VM_READ | VM_MAYREAD | VM_EXEC |
+				      VM_MAYEXEC | VM_DONTCOPY, pages);
+	if (err) {
+		printk(KERN_ERR "install_special_mapping returned %d\n", err);
+		goto out_free;
+	}
+	return;
+
+out_free:
+	kfree(pages);
+out:
+	force_sigsegv(SIGSEGV, current);
+}
+
+void arch_exit_mmap(struct mm_struct *mm)
+{
+	pte_t *pte;
+
+	pte = virt_to_pte(mm, STUB_CODE);
+	if (pte != NULL)
+		pte_clear(mm, STUB_CODE, pte);
+
+	pte = virt_to_pte(mm, STUB_DATA);
+	if (pte == NULL)
+		return;
+
+	pte_clear(mm, STUB_DATA, pte);
+}
+
 void destroy_context(struct mm_struct *mm)
 {
 	struct mm_context *mmu = &mm->context;
 
 	if (proc_mm)
 		os_close_file(mmu->id.u.mm_fd);
-	else
+	else {
+		/*
+		 * If init_new_context wasn't called, this will be
+		 * zero, resulting in a kill(0), which will result in the
+		 * whole UML suddenly dying.  Also, cover negative and
+		 * 1 cases, since they shouldn't happen either.
+		 */
+		if (mmu->id.u.pid < 2) {
+			printk(KERN_ERR "corrupt mm_context - pid = %d\n",
+			       mmu->id.u.pid);
+			return;
+		}
 		os_kill_ptraced_process(mmu->id.u.pid, 1);
-
-	if (!proc_mm || !ptrace_faultinfo) {
-		free_page(mmu->id.stack);
-		pte_lock_deinit(virt_to_page(mmu->last_page_table));
-		pte_free_kernel((pte_t *) mmu->last_page_table);
-		dec_zone_page_state(virt_to_page(mmu->last_page_table), NR_PAGETABLE);
-#ifdef CONFIG_3_LEVEL_PGTABLES
-		pmd_free((pmd_t *) mmu->last_pmd);
-#endif
 	}
 
+	if (skas_needs_stub)
+		free_page(mmu->id.stack);
+
 	free_ldt(mmu);
 }
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index fce389c..2e9852c 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -6,19 +6,25 @@
 #include "linux/init.h"
 #include "linux/sched.h"
 #include "as-layout.h"
+#include "kern.h"
 #include "os.h"
 #include "skas.h"
 
 int new_mm(unsigned long stack)
 {
-	int fd;
+	int fd, err;
 
 	fd = os_open_file("/proc/mm", of_cloexec(of_write(OPENFLAGS())), 0);
 	if (fd < 0)
 		return fd;
 
-	if (skas_needs_stub)
-		map_stub_pages(fd, STUB_CODE, STUB_DATA, stack);
+	if (skas_needs_stub) {
+		err = map_stub_pages(fd, STUB_CODE, STUB_DATA, stack);
+		if (err) {
+			os_close_file(fd);
+			return err;
+		}
+	}
 
 	return fd;
 }
@@ -49,8 +55,14 @@
 {
 	stack_protections((unsigned long) &cpu0_irqstack);
 	set_sigstack(cpu0_irqstack, THREAD_SIZE);
-	if (proc_mm)
+	if (proc_mm) {
 		userspace_pid[0] = start_userspace(0);
+		if (userspace_pid[0] < 0) {
+			printf("start_uml - start_userspace returned %d\n",
+			       userspace_pid[0]);
+			exit(1);
+		}
+	}
 
 	init_new_thread_signals();
 
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index 50b476f..4e3b820 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -9,6 +9,9 @@
 #include "sysdep/ptrace.h"
 #include "sysdep/syscalls.h"
 
+extern int syscall_table_size;
+#define NR_syscalls (syscall_table_size / sizeof(void *))
+
 void handle_syscall(struct uml_pt_regs *r)
 {
 	struct pt_regs *regs = container_of(r, struct pt_regs, regs);
@@ -17,9 +20,6 @@
 
 	syscall_trace(r, 0);
 
-	current->thread.nsyscalls++;
-	nsyscalls++;
-
 	/*
 	 * This should go in the declaration of syscall, but when I do that,
 	 * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 1d8b119..e22c969 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -3,128 +3,130 @@
  * Licensed under the GPL
  */
 
-#include "linux/err.h"
-#include "linux/highmem.h"
-#include "linux/mm.h"
-#include "asm/current.h"
-#include "asm/page.h"
-#include "asm/pgtable.h"
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/current.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
 #include "kern_util.h"
 #include "os.h"
 
-extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
-			     pte_t *pte_out);
-
-static unsigned long maybe_map(unsigned long virt, int is_write)
+pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
 {
-	pte_t pte;
-	int err;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
 
-	void *phys = um_virt_to_phys(current, virt, &pte);
-	int dummy_code;
+	if (mm == NULL)
+		return NULL;
 
-	if (IS_ERR(phys) || (is_write && !pte_write(pte))) {
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		return NULL;
+
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+		return NULL;
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		return NULL;
+
+	return pte_offset_kernel(pmd, addr);
+}
+
+static pte_t *maybe_map(unsigned long virt, int is_write)
+{
+	pte_t *pte = virt_to_pte(current->mm, virt);
+	int err, dummy_code;
+
+	if ((pte == NULL) || !pte_present(*pte) ||
+	    (is_write && !pte_write(*pte))) {
 		err = handle_page_fault(virt, 0, is_write, 1, &dummy_code);
 		if (err)
-			return -1UL;
-		phys = um_virt_to_phys(current, virt, NULL);
+			return NULL;
+		pte = virt_to_pte(current->mm, virt);
 	}
-	if (IS_ERR(phys))
-		phys = (void *) -1;
+	if (!pte_present(*pte))
+		pte = NULL;
 
-	return (unsigned long) phys;
+	return pte;
 }
 
 static int do_op_one_page(unsigned long addr, int len, int is_write,
 		 int (*op)(unsigned long addr, int len, void *arg), void *arg)
 {
+	jmp_buf buf;
 	struct page *page;
-	int n;
+	pte_t *pte;
+	int n, faulted;
 
-	addr = maybe_map(addr, is_write);
-	if (addr == -1UL)
+	pte = maybe_map(addr, is_write);
+	if (pte == NULL)
 		return -1;
 
-	page = phys_to_page(addr);
+	page = pte_page(*pte);
 	addr = (unsigned long) kmap_atomic(page, KM_UML_USERCOPY) +
 		(addr & ~PAGE_MASK);
 
-	n = (*op)(addr, len, arg);
+	current->thread.fault_catcher = &buf;
+
+	faulted = UML_SETJMP(&buf);
+	if (faulted == 0)
+		n = (*op)(addr, len, arg);
+	else
+		n = -1;
+
+	current->thread.fault_catcher = NULL;
 
 	kunmap_atomic(page, KM_UML_USERCOPY);
 
 	return n;
 }
 
-static void do_buffer_op(void *jmpbuf, void *arg_ptr)
+static int buffer_op(unsigned long addr, int len, int is_write,
+		     int (*op)(unsigned long, int, void *), void *arg)
 {
-	va_list args;
-	unsigned long addr;
-	int len, is_write, size, remain, n;
-	int (*op)(unsigned long, int, void *);
-	void *arg;
-	int *res;
+	int size, remain, n;
 
-	va_copy(args, *(va_list *)arg_ptr);
-	addr = va_arg(args, unsigned long);
-	len = va_arg(args, int);
-	is_write = va_arg(args, int);
-	op = va_arg(args, void *);
-	arg = va_arg(args, void *);
-	res = va_arg(args, int *);
-	va_end(args);
 	size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len);
 	remain = len;
 
-	current->thread.fault_catcher = jmpbuf;
 	n = do_op_one_page(addr, size, is_write, op, arg);
 	if (n != 0) {
-		*res = (n < 0 ? remain : 0);
+		remain = (n < 0 ? remain : 0);
 		goto out;
 	}
 
 	addr += size;
 	remain -= size;
-	if (remain == 0) {
-		*res = 0;
+	if (remain == 0)
 		goto out;
-	}
 
-	while(addr < ((addr + remain) & PAGE_MASK)) {
+	while (addr < ((addr + remain) & PAGE_MASK)) {
 		n = do_op_one_page(addr, PAGE_SIZE, is_write, op, arg);
 		if (n != 0) {
-			*res = (n < 0 ? remain : 0);
+			remain = (n < 0 ? remain : 0);
 			goto out;
 		}
 
 		addr += PAGE_SIZE;
 		remain -= PAGE_SIZE;
 	}
-	if (remain == 0) {
-		*res = 0;
+	if (remain == 0)
+		goto out;
+
+	n = do_op_one_page(addr, remain, is_write, op, arg);
+	if (n != 0) {
+		remain = (n < 0 ? remain : 0);
 		goto out;
 	}
 
-	n = do_op_one_page(addr, remain, is_write, op, arg);
-	if (n != 0)
-		*res = (n < 0 ? remain : 0);
-	else *res = 0;
+	return 0;
  out:
-	current->thread.fault_catcher = NULL;
-}
-
-static int buffer_op(unsigned long addr, int len, int is_write,
-		     int (*op)(unsigned long addr, int len, void *arg),
-		     void *arg)
-{
-	int faulted, res;
-
-	faulted = setjmp_wrapper(do_buffer_op, addr, len, is_write, op, arg,
-				 &res);
-	if (!faulted)
-		return res;
-
-	return addr + len - (unsigned long) current->thread.fault_addr;
+	return remain;
 }
 
 static int copy_chunk_from_user(unsigned long from, int len, void *arg)
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 36d89cf..e1062ec 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -21,7 +21,6 @@
 #include "asm/smp.h"
 #include "asm/processor.h"
 #include "asm/spinlock.h"
-#include "kern_util.h"
 #include "kern.h"
 #include "irq_user.h"
 #include "os.h"
@@ -61,7 +60,7 @@
 			continue;
 		os_write_file(cpu_data[i].ipi_pipe[1], "S", 1);
 	}
-	printk(KERN_INFO "done\n");
+	printk(KERN_CONT "done\n");
 }
 
 static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
@@ -75,8 +74,7 @@
 	if (err < 0)
 		panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err);
 
-	os_set_fd_async(cpu_data[cpu].ipi_pipe[0],
-		     current->thread.mode.tt.extern_pid);
+	os_set_fd_async(cpu_data[cpu].ipi_pipe[0]);
 
 	wmb();
 	if (cpu_test_and_set(cpu, cpu_callin_map)) {
@@ -129,8 +127,7 @@
 	if (err < 0)
 		panic("CPU#0 failed to create IPI pipe, errno = %d", -err);
 
-	os_set_fd_async(cpu_data[me].ipi_pipe[0],
-		     current->thread.mode.tt.extern_pid);
+	os_set_fd_async(cpu_data[me].ipi_pipe[0]);
 
 	for (cpu = 1; cpu < ncpus; cpu++) {
 		printk(KERN_INFO "Booting processor %d...\n", cpu);
@@ -143,9 +140,8 @@
 		while (waittime-- && !cpu_isset(cpu, cpu_callin_map))
 			cpu_relax();
 
-		if (cpu_isset(cpu, cpu_callin_map))
-			printk(KERN_INFO "done\n");
-		else printk(KERN_INFO "failed\n");
+		printk(KERN_INFO "%s\n",
+		       cpu_isset(cpu, cpu_calling_map) ? "done" : "failed");
 	}
 }
 
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index b9d92b2..9cffc62 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -13,9 +13,6 @@
 #include "asm/uaccess.h"
 #include "asm/unistd.h"
 
-/*  Unlocked, I don't care if this is a bit off */
-int nsyscalls = 0;
-
 long sys_fork(void)
 {
 	long ret;
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 9326357..56d43d0 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -1,38 +1,37 @@
-/* 
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/kernel.h"
-#include "linux/module.h"
-#include "linux/kallsyms.h"
-#include "asm/page.h"
-#include "asm/processor.h"
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
 #include "sysrq.h"
 
 /* Catch non-i386 SUBARCH's. */
 #if !defined(CONFIG_UML_X86) || defined(CONFIG_64BIT)
 void show_trace(struct task_struct *task, unsigned long * stack)
 {
-        unsigned long addr;
+	unsigned long addr;
 
-        if (!stack) {
+	if (!stack) {
 		stack = (unsigned long*) &stack;
 		WARN_ON(1);
 	}
 
-        printk("Call Trace: \n");
-        while (((long) stack & (THREAD_SIZE-1)) != 0) {
-                addr = *stack;
+	printk(KERN_INFO "Call Trace: \n");
+	while (((long) stack & (THREAD_SIZE-1)) != 0) {
+		addr = *stack;
 		if (__kernel_text_address(addr)) {
-			printk("%08lx:  [<%08lx>]", (unsigned long) stack, addr);
-			print_symbol(" %s", addr);
-			printk("\n");
-                }
-                stack++;
-        }
-        printk("\n");
+			printk(KERN_INFO "%08lx:  [<%08lx>]",
+			       (unsigned long) stack, addr);
+			print_symbol(KERN_CONT " %s", addr);
+			printk(KERN_CONT "\n");
+		}
+		stack++;
+	}
+	printk(KERN_INFO "\n");
 }
 #endif
 
@@ -67,14 +66,13 @@
 	}
 
 	stack = esp;
-	for(i = 0; i < kstack_depth_to_print; i++) {
+	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(stack))
 			break;
 		if (i && ((i % 8) == 0))
-			printk("\n       ");
+			printk("\n" KERN_INFO "       ");
 		printk("%08lx ", *stack++);
 	}
 
-	printk("Call Trace: \n");
 	show_trace(task, esp);
 }
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 1ac746a..e066e84 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -3,12 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/clockchips.h"
-#include "linux/interrupt.h"
-#include "linux/jiffies.h"
-#include "linux/threads.h"
-#include "asm/irq.h"
-#include "asm/param.h"
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/threads.h>
+#include <asm/irq.h>
+#include <asm/param.h>
 #include "kern_util.h"
 #include "os.h"
 
@@ -32,7 +32,7 @@
 static void itimer_set_mode(enum clock_event_mode mode,
 			    struct clock_event_device *evt)
 {
-	switch(mode) {
+	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
 		set_interval();
 		break;
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index f4a0e40..d175d05 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -3,9 +3,10 @@
  * Licensed under the GPL
  */
 
-#include "linux/mm.h"
-#include "asm/pgtable.h"
-#include "asm/tlbflush.h"
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 #include "as-layout.h"
 #include "mem_user.h"
 #include "os.h"
@@ -56,7 +57,7 @@
 
 	for (i = 0; i < end && !ret; i++) {
 		op = &hvc->ops[i];
-		switch(op->type) {
+		switch (op->type) {
 		case MMAP:
 			ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
 				  op->u.mmap.prot, op->u.mmap.fd,
@@ -183,27 +184,30 @@
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
+		if ((addr >= STUB_START) && (addr < STUB_END))
+			continue;
+
 		r = pte_read(*pte);
 		w = pte_write(*pte);
 		x = pte_exec(*pte);
 		if (!pte_young(*pte)) {
 			r = 0;
 			w = 0;
-		} else if (!pte_dirty(*pte)) {
+		} else if (!pte_dirty(*pte))
 			w = 0;
-		}
+
 		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
 			(x ? UM_PROT_EXEC : 0));
 		if (hvc->force || pte_newpage(*pte)) {
 			if (pte_present(*pte))
 				ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
 					       PAGE_SIZE, prot, hvc);
-			else ret = add_munmap(addr, PAGE_SIZE, hvc);
-		}
-		else if (pte_newprot(*pte))
+			else
+				ret = add_munmap(addr, PAGE_SIZE, hvc);
+		} else if (pte_newprot(*pte))
 			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
 		*pte = pte_mkuptodate(*pte);
-	} while (pte++, addr += PAGE_SIZE, ((addr != end) && !ret));
+	} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
 	return ret;
 }
 
@@ -225,7 +229,7 @@
 			}
 		}
 		else ret = update_pte_range(pmd, addr, next, hvc);
-	} while (pmd++, addr = next, ((addr != end) && !ret));
+	} while (pmd++, addr = next, ((addr < end) && !ret));
 	return ret;
 }
 
@@ -247,7 +251,7 @@
 			}
 		}
 		else ret = update_pmd_range(pud, addr, next, hvc);
-	} while (pud++, addr = next, ((addr != end) && !ret));
+	} while (pud++, addr = next, ((addr < end) && !ret));
 	return ret;
 }
 
@@ -270,7 +274,7 @@
 			}
 		}
 		else ret = update_pud_range(pgd, addr, next, &hvc);
-	} while (pgd++, addr = next, ((addr != end_addr) && !ret));
+	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
 
 	if (!ret)
 		ret = do_ops(&hvc, hvc.index, 1);
@@ -485,9 +489,6 @@
 static void fix_range(struct mm_struct *mm, unsigned long start_addr,
 		      unsigned long end_addr, int force)
 {
-	if (!proc_mm && (end_addr > STUB_START))
-		end_addr = STUB_START;
-
 	fix_range_common(mm, start_addr, end_addr, force);
 }
 
@@ -499,10 +500,9 @@
 	else fix_range(vma->vm_mm, start, end, 0);
 }
 
-void flush_tlb_mm(struct mm_struct *mm)
+void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+			unsigned long end)
 {
-	unsigned long end;
-
 	/*
 	 * Don't bother flushing if this address space is about to be
 	 * destroyed.
@@ -510,8 +510,17 @@
 	if (atomic_read(&mm->mm_users) == 0)
 		return;
 
-	end = proc_mm ? task_size : STUB_START;
-	fix_range(mm, 0, end, 0);
+	fix_range(mm, start, end, 0);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma = mm->mmap;
+
+	while (vma != NULL) {
+		fix_range(mm, vma->vm_start, vma->vm_end, 0);
+		vma = vma->vm_next;
+	}
 }
 
 void force_flush_all(void)
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index cb3321f..44e4904 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -13,6 +13,7 @@
 #include "as-layout.h"
 #include "kern_util.h"
 #include "os.h"
+#include "skas.h"
 #include "sysdep/sigcontext.h"
 
 /*
@@ -128,7 +129,19 @@
 	force_sig_info(SIGSEGV, &si, current);
 }
 
-static void segv_handler(int sig, struct uml_pt_regs *regs)
+void fatal_sigsegv(void)
+{
+	force_sigsegv(SIGSEGV, current);
+	do_signal();
+	/*
+	 * This is to tell gcc that we're not returning - do_signal
+	 * can, in general, return, but in this case, it's not, since
+	 * we just got a fatal SIGSEGV queued.
+	 */
+	os_dump_core();
+}
+
+void segv_handler(int sig, struct uml_pt_regs *regs)
 {
 	struct faultinfo * fi = UPT_FAULTINFO(regs);
 
@@ -216,9 +229,6 @@
 
 void relay_signal(int sig, struct uml_pt_regs *regs)
 {
-	if (arch_handle_signal(sig, regs))
-		return;
-
 	if (!UPT_IS_USER(regs)) {
 		if (sig == SIGBUS)
 			printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
@@ -226,31 +236,24 @@
 		panic("Kernel mode signal %d", sig);
 	}
 
+	arch_examine_signal(sig, regs);
+
 	current->thread.arch.faultinfo = *UPT_FAULTINFO(regs);
 	force_sig(sig, current);
 }
 
-static void bus_handler(int sig, struct uml_pt_regs *regs)
+void bus_handler(int sig, struct uml_pt_regs *regs)
 {
 	if (current->thread.fault_catcher != NULL)
 		UML_LONGJMP(current->thread.fault_catcher, 1);
 	else relay_signal(sig, regs);
 }
 
-static void winch(int sig, struct uml_pt_regs *regs)
+void winch(int sig, struct uml_pt_regs *regs)
 {
 	do_IRQ(WINCH_IRQ, regs);
 }
 
-const struct kern_handlers handlinfo_kern = {
-	.relay_signal = relay_signal,
-	.winch = winch,
-	.bus_handler = bus_handler,
-	.page_fault = segv_handler,
-	.sigio_handler = sigio_handler,
-	.timer_handler = timer_handler
-};
-
 void trap_init(void)
 {
 }
diff --git a/arch/um/kernel/uaccess.c b/arch/um/kernel/uaccess.c
index d7436aa..f0f4b04 100644
--- a/arch/um/kernel/uaccess.c
+++ b/arch/um/kernel/uaccess.c
@@ -1,10 +1,11 @@
 /*
  * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk)
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-/* These are here rather than tt/uaccess.c because skas mode needs them in
+/*
+ * These are here rather than tt/uaccess.c because skas mode needs them in
  * order to do SIGBUS recovery when a tmpfs mount runs out of room.
  */
 
@@ -25,6 +26,8 @@
 
 	fault = __do_user_copy(to, from, n, fault_addr, fault_catcher,
 			       __do_copy, &faulted);
-	if(!faulted) return(0);
-	else return(n - (fault - (unsigned long) to));
+	if (!faulted)
+		return 0;
+	else
+		return n - (fault - (unsigned long) to);
 }
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index f1c7139..468aba9 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -3,22 +3,23 @@
  * Licensed under the GPL
  */
 
-#include "linux/delay.h"
-#include "linux/mm.h"
-#include "linux/module.h"
-#include "linux/seq_file.h"
-#include "linux/string.h"
-#include "linux/utsname.h"
-#include "asm/pgtable.h"
-#include "asm/processor.h"
-#include "asm/setup.h"
-#include "arch.h"
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/utsname.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
 #include "as-layout.h"
+#include "arch.h"
 #include "init.h"
 #include "kern.h"
+#include "kern_util.h"
 #include "mem_user.h"
 #include "os.h"
-#include "skas.h"
 
 #define DEFAULT_COMMAND_LINE "root=98:0"
 
@@ -100,8 +101,6 @@
 };
 
 /* Set in linux_main */
-unsigned long host_task_size;
-unsigned long task_size;
 unsigned long uml_physmem;
 unsigned long uml_reserved; /* Also modified in mem_init */
 unsigned long start_vm;
@@ -197,20 +196,19 @@
 "    Prints this message.\n\n"
 );
 
-static int __init uml_checksetup(char *line, int *add)
+static void __init uml_checksetup(char *line, int *add)
 {
 	struct uml_param *p;
 
 	p = &__uml_setup_start;
-	while(p < &__uml_setup_end) {
+	while (p < &__uml_setup_end) {
 		int n;
 
 		n = strlen(p->str);
 		if (!strncmp(line, p->str, n) && p->setup_func(line + n, add))
-			return 1;
+			return;
 		p++;
 	}
-	return 0;
 }
 
 static void __init uml_postsetup(void)
@@ -218,13 +216,30 @@
 	initcall_t *p;
 
 	p = &__uml_postsetup_start;
-	while(p < &__uml_postsetup_end) {
+	while (p < &__uml_postsetup_end) {
 		(*p)();
 		p++;
 	}
 	return;
 }
 
+static int panic_exit(struct notifier_block *self, unsigned long unused1,
+		      void *unused2)
+{
+	bust_spinlocks(1);
+	show_regs(&(current->thread.regs));
+	bust_spinlocks(0);
+	uml_exitcode = 1;
+	os_dump_core();
+	return 0;
+}
+
+static struct notifier_block panic_exit_notifier = {
+	.notifier_call 		= panic_exit,
+	.next 			= NULL,
+	.priority 		= 0
+};
+
 /* Set during early boot */
 unsigned long brk_start;
 unsigned long end_iomem;
@@ -234,20 +249,6 @@
 
 extern char __binary_start;
 
-static unsigned long set_task_sizes_skas(unsigned long *task_size_out)
-{
-	/* Round up to the nearest 4M */
-	unsigned long host_task_size = ROUND_4M((unsigned long)
-						&host_task_size);
-
-	if (!skas_needs_stub)
-		*task_size_out = host_task_size;
-	else
-		*task_size_out = STUB_START & PGDIR_MASK;
-
-	return host_task_size;
-}
-
 int __init linux_main(int argc, char **argv)
 {
 	unsigned long avail, diff;
@@ -278,13 +279,6 @@
 
 	printf("UML running in %s mode\n", mode);
 
-	host_task_size = set_task_sizes_skas(&task_size);
-
-	/*
-	 * Setting up handlers to 'sig_info' struct
-	 */
-	os_fill_handlinfo(handlinfo_kern);
-
 	brk_start = (unsigned long) sbrk(0);
 
 	/*
@@ -309,7 +303,7 @@
 
 	highmem = 0;
 	iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
-	max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC;
+	max_physmem = CONFIG_TOP_ADDR - uml_physmem - iomem_size - MIN_VMALLOC;
 
 	/*
 	 * Zones have to begin on a 1 << MAX_ORDER page boundary,
@@ -341,7 +335,7 @@
 	}
 
 	virtmem_size = physmem_size;
-	avail = get_kmem_end() - start_vm;
+	avail = CONFIG_TOP_ADDR - start_vm;
 	if (physmem_size > avail)
 		virtmem_size = avail;
 	end_vm = start_vm + virtmem_size;
@@ -350,6 +344,9 @@
 		printf("Kernel virtual memory size shrunk to %lu bytes\n",
 		       virtmem_size);
 
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &panic_exit_notifier);
+
 	uml_postsetup();
 
 	stack_protections((unsigned long) &init_thread_info);
@@ -358,29 +355,8 @@
 	return start_uml();
 }
 
-extern int uml_exitcode;
-
-static int panic_exit(struct notifier_block *self, unsigned long unused1,
-		      void *unused2)
-{
-	bust_spinlocks(1);
-	show_regs(&(current->thread.regs));
-	bust_spinlocks(0);
-	uml_exitcode = 1;
-	os_dump_core();
-	return 0;
-}
-
-static struct notifier_block panic_exit_notifier = {
-	.notifier_call 		= panic_exit,
-	.next 			= NULL,
-	.priority 		= 0
-};
-
 void __init setup_arch(char **cmdline_p)
 {
-	atomic_notifier_chain_register(&panic_notifier_list,
-			&panic_exit_notifier);
 	paging_init();
 	strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c
index 039e16e..81e07e2b 100644
--- a/arch/um/kernel/umid.c
+++ b/arch/um/kernel/umid.c
@@ -1,13 +1,12 @@
-/* 
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "asm/errno.h"
+#include <asm/errno.h>
 #include "init.h"
-#include "os.h"
 #include "kern.h"
-#include "linux/kernel.h"
+#include "os.h"
 
 /* Changed by set_umid_arg */
 static int umid_inited = 0;
@@ -16,16 +15,16 @@
 {
 	int err;
 
-	if(umid_inited){
+	if (umid_inited) {
 		printf("umid already set\n");
 		return 0;
 	}
 
 	*add = 0;
 	err = set_umid(name);
-	if(err == -EEXIST)
+	if (err == -EEXIST)
 		printf("umid '%s' already in use\n", name);
-	else if(!err)
+	else if (!err)
 		umid_inited = 1;
 
 	return 0;
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 8e129af..8a48d6a 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -4,7 +4,7 @@
 #
 
 obj-y = aio.o elf_aux.o execvp.o file.o helper.o irq.o main.o mem.o process.o \
-	registers.o sigio.o signal.o start_up.o time.o trap.o tty.o uaccess.o \
+	registers.o sigio.o signal.o start_up.o time.o tty.o uaccess.o \
 	umid.o tls.o user_syms.o util.o drivers/ sys-$(SUBARCH)/ skas/
 
 obj-$(CONFIG_TTY_LOG) += tty_log.o
@@ -12,7 +12,7 @@
 
 USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \
 	main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \
-	trap.o tty.o tls.o uaccess.o umid.o util.o
+	tty.o tls.o uaccess.o umid.o util.o
 
 CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH)
 
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
index 93dc0c8..b8d8c9c 100644
--- a/arch/um/os-Linux/aio.c
+++ b/arch/um/os-Linux/aio.c
@@ -12,6 +12,7 @@
 #include "aio.h"
 #include "init.h"
 #include "kern_constants.h"
+#include "kern_util.h"
 #include "os.h"
 #include "user.h"
 
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
index 07ca0cb..6fb0b17 100644
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ b/arch/um/os-Linux/drivers/ethertap_user.c
@@ -131,7 +131,7 @@
 	}
 	if (c != 1) {
 		printk(UM_KERN_ERR "etap_tramp : uml_net failed\n");
-		err = helper_wait(pid, 0, "uml_net");
+		err = helper_wait(pid);
 	}
 	return err;
 }
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
index 1037a3b6..2448be0 100644
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ b/arch/um/os-Linux/drivers/tuntap_user.c
@@ -14,6 +14,7 @@
 #include <sys/wait.h>
 #include <sys/uio.h>
 #include "kern_constants.h"
+#include "kern_util.h"
 #include "os.h"
 #include "tuntap.h"
 #include "user.h"
@@ -107,7 +108,7 @@
 		       "errno = %d\n", errno);
 		return err;
 	}
-	helper_wait(pid, 0, "tuntap_open_tramp");
+	helper_wait(pid);
 
 	cmsg = CMSG_FIRSTHDR(&msg);
 	if (cmsg == NULL) {
@@ -148,7 +149,7 @@
 		memset(&ifr, 0, sizeof(ifr));
 		ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
 		strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name));
-		if (ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0) {
+		if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) {
 			err = -errno;
 			printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n",
 			       errno);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index f834627..b5afcfd 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -8,18 +8,16 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/un.h>
 #include <sys/ioctl.h>
 #include <sys/mount.h>
-#include <sys/uio.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include "kern_constants.h"
 #include "os.h"
 #include "user.h"
-#include "kern_util.h"
 
-static void copy_stat(struct uml_stat *dst, struct stat64 *src)
+static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
 {
 	*dst = ((struct uml_stat) {
 		.ust_dev     = src->st_dev,     /* device */
@@ -43,10 +41,10 @@
 	int err;
 
 	CATCH_EINTR(err = fstat64(fd, &sbuf));
-	if(err < 0)
+	if (err < 0)
 		return -errno;
 
-	if(ubuf != NULL)
+	if (ubuf != NULL)
 		copy_stat(ubuf, &sbuf);
 	return err;
 }
@@ -56,27 +54,26 @@
 	struct stat64 sbuf;
 	int err;
 
-	do {
-		err = stat64(file_name, &sbuf);
-	} while((err < 0) && (errno == EINTR)) ;
-
-	if(err < 0)
+	CATCH_EINTR(err = stat64(file_name, &sbuf));
+	if (err < 0)
 		return -errno;
 
-	if(ubuf != NULL)
+	if (ubuf != NULL)
 		copy_stat(ubuf, &sbuf);
 	return err;
 }
 
-int os_access(const char* file, int mode)
+int os_access(const char *file, int mode)
 {
 	int amode, err;
 
-	amode=(mode&OS_ACC_R_OK ? R_OK : 0) | (mode&OS_ACC_W_OK ? W_OK : 0) |
-	      (mode&OS_ACC_X_OK ? X_OK : 0) | (mode&OS_ACC_F_OK ? F_OK : 0) ;
+	amode = (mode & OS_ACC_R_OK ? R_OK : 0) |
+		(mode & OS_ACC_W_OK ? W_OK : 0) |
+		(mode & OS_ACC_X_OK ? X_OK : 0) |
+		(mode & OS_ACC_F_OK ? F_OK : 0);
 
 	err = access(file, amode);
-	if(err < 0)
+	if (err < 0)
 		return -errno;
 
 	return 0;
@@ -88,7 +85,7 @@
 	int err;
 
 	err = ioctl(fd, cmd, arg);
-	if(err < 0)
+	if (err < 0)
 		return -errno;
 
 	return err;
@@ -97,7 +94,7 @@
 /* FIXME: ensure namebuf in os_get_if_name is big enough */
 int os_get_ifname(int fd, char* namebuf)
 {
-	if(ioctl(fd, SIOCGIFNAME, namebuf) < 0)
+	if (ioctl(fd, SIOCGIFNAME, namebuf) < 0)
 		return -errno;
 
 	return 0;
@@ -108,37 +105,22 @@
 	int disc, sencap;
 
 	disc = N_SLIP;
-	if(ioctl(fd, TIOCSETD, &disc) < 0)
+	if (ioctl(fd, TIOCSETD, &disc) < 0)
 		return -errno;
 
 	sencap = 0;
-	if(ioctl(fd, SIOCSIFENCAP, &sencap) < 0)
+	if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0)
 		return -errno;
 
 	return 0;
 }
 
-int os_set_owner(int fd, int pid)
-{
-	if(fcntl(fd, F_SETOWN, pid) < 0){
-		int save_errno = errno;
-
-		if(fcntl(fd, F_GETOWN, 0) != pid)
-			return -save_errno;
-	}
-
-	return 0;
-}
-
 int os_mode_fd(int fd, int mode)
 {
 	int err;
 
-	do {
-		err = fchmod(fd, mode);
-	} while((err < 0) && (errno==EINTR)) ;
-
-	if(err < 0)
+	CATCH_EINTR(err = fchmod(fd, mode));
+	if (err < 0)
 		return -errno;
 
 	return 0;
@@ -150,64 +132,73 @@
 	int err;
 
 	err = os_stat_file(file, &buf);
-	if(err < 0)
+	if (err < 0)
 		return err;
 
-	if(S_ISDIR(buf.ust_mode))
+	if (S_ISDIR(buf.ust_mode))
 		return OS_TYPE_DIR;
-	else if(S_ISLNK(buf.ust_mode))
+	else if (S_ISLNK(buf.ust_mode))
 		return OS_TYPE_SYMLINK;
-	else if(S_ISCHR(buf.ust_mode))
+	else if (S_ISCHR(buf.ust_mode))
 		return OS_TYPE_CHARDEV;
-	else if(S_ISBLK(buf.ust_mode))
+	else if (S_ISBLK(buf.ust_mode))
 		return OS_TYPE_BLOCKDEV;
-	else if(S_ISFIFO(buf.ust_mode))
+	else if (S_ISFIFO(buf.ust_mode))
 		return OS_TYPE_FIFO;
-	else if(S_ISSOCK(buf.ust_mode))
+	else if (S_ISSOCK(buf.ust_mode))
 		return OS_TYPE_SOCK;
 	else return OS_TYPE_FILE;
 }
 
-int os_file_mode(char *file, struct openflags *mode_out)
+int os_file_mode(const char *file, struct openflags *mode_out)
 {
 	int err;
 
 	*mode_out = OPENFLAGS();
 
 	err = access(file, W_OK);
-	if(err && (errno != EACCES))
+	if (err && (errno != EACCES))
 		return -errno;
-	else if(!err)
+	else if (!err)
 		*mode_out = of_write(*mode_out);
 
 	err = access(file, R_OK);
-	if(err && (errno != EACCES))
+	if (err && (errno != EACCES))
 		return -errno;
-	else if(!err)
+	else if (!err)
 		*mode_out = of_read(*mode_out);
 
 	return err;
 }
 
-int os_open_file(char *file, struct openflags flags, int mode)
+int os_open_file(const char *file, struct openflags flags, int mode)
 {
 	int fd, err, f = 0;
 
-	if(flags.r && flags.w) f = O_RDWR;
-	else if(flags.r) f = O_RDONLY;
-	else if(flags.w) f = O_WRONLY;
+	if (flags.r && flags.w)
+		f = O_RDWR;
+	else if (flags.r)
+		f = O_RDONLY;
+	else if (flags.w)
+		f = O_WRONLY;
 	else f = 0;
 
-	if(flags.s) f |= O_SYNC;
-	if(flags.c) f |= O_CREAT;
-	if(flags.t) f |= O_TRUNC;
-	if(flags.e) f |= O_EXCL;
+	if (flags.s)
+		f |= O_SYNC;
+	if (flags.c)
+		f |= O_CREAT;
+	if (flags.t)
+		f |= O_TRUNC;
+	if (flags.e)
+		f |= O_EXCL;
+	if (flags.a)
+		f |= O_APPEND;
 
 	fd = open64(file, f, mode);
-	if(fd < 0)
+	if (fd < 0)
 		return -errno;
 
-	if(flags.cl && fcntl(fd, F_SETFD, 1)){
+	if (flags.cl && fcntl(fd, F_SETFD, 1)) {
 		err = -errno;
 		close(fd);
 		return err;
@@ -216,7 +207,7 @@
 	return fd;
 }
 
-int os_connect_socket(char *name)
+int os_connect_socket(const char *name)
 {
 	struct sockaddr_un sock;
 	int fd, err;
@@ -225,13 +216,13 @@
 	snprintf(sock.sun_path, sizeof(sock.sun_path), "%s", name);
 
 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
-	if(fd < 0) {
+	if (fd < 0) {
 		err = -errno;
 		goto out;
 	}
 
 	err = connect(fd, (struct sockaddr *) &sock, sizeof(sock));
-	if(err) {
+	if (err) {
 		err = -errno;
 		goto out_close;
 	}
@@ -254,7 +245,7 @@
 	unsigned long long actual;
 
 	actual = lseek64(fd, offset, SEEK_SET);
-	if(actual != offset)
+	if (actual != offset)
 		return -errno;
 	return 0;
 }
@@ -263,7 +254,7 @@
 {
 	int n = read(fd, buf, len);
 
-	if(n < 0)
+	if (n < 0)
 		return -errno;
 	return n;
 }
@@ -272,37 +263,38 @@
 {
 	int n = write(fd, (void *) buf, len);
 
-	if(n < 0)
+	if (n < 0)
 		return -errno;
 	return n;
 }
 
-int os_file_size(char *file, unsigned long long *size_out)
+int os_file_size(const char *file, unsigned long long *size_out)
 {
 	struct uml_stat buf;
 	int err;
 
 	err = os_stat_file(file, &buf);
-	if(err < 0){
-		printk("Couldn't stat \"%s\" : err = %d\n", file, -err);
+	if (err < 0) {
+		printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file,
+		       -err);
 		return err;
 	}
 
-	if(S_ISBLK(buf.ust_mode)){
+	if (S_ISBLK(buf.ust_mode)) {
 		int fd;
 		long blocks;
 
 		fd = open(file, O_RDONLY, 0);
-		if(fd < 0) {
+		if (fd < 0) {
 			err = -errno;
-			printk("Couldn't open \"%s\", errno = %d\n", file,
-			       errno);
+			printk(UM_KERN_ERR "Couldn't open \"%s\", "
+			       "errno = %d\n", file, errno);
 			return err;
 		}
-		if(ioctl(fd, BLKGETSIZE, &blocks) < 0){
+		if (ioctl(fd, BLKGETSIZE, &blocks) < 0) {
 			err = -errno;
-			printk("Couldn't get the block size of \"%s\", "
-			       "errno = %d\n", file, errno);
+			printk(UM_KERN_ERR "Couldn't get the block size of "
+			       "\"%s\", errno = %d\n", file, errno);
 			close(fd);
 			return err;
 		}
@@ -314,14 +306,15 @@
 	return 0;
 }
 
-int os_file_modtime(char *file, unsigned long *modtime)
+int os_file_modtime(const char *file, unsigned long *modtime)
 {
 	struct uml_stat buf;
 	int err;
 
 	err = os_stat_file(file, &buf);
-	if(err < 0){
-		printk("Couldn't stat \"%s\" : err = %d\n", file, -err);
+	if (err < 0) {
+		printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file,
+		       -err);
 		return err;
 	}
 
@@ -329,26 +322,13 @@
 	return 0;
 }
 
-int os_get_exec_close(int fd, int *close_on_exec)
-{
-	int ret;
-
-	CATCH_EINTR(ret = fcntl(fd, F_GETFD));
-
-	if(ret < 0)
-		return -errno;
-
-	*close_on_exec = (ret & FD_CLOEXEC) ? 1 : 0;
-	return ret;
-}
-
 int os_set_exec_close(int fd)
 {
 	int err;
 
 	CATCH_EINTR(err = fcntl(fd, F_SETFD, FD_CLOEXEC));
 
-	if(err < 0)
+	if (err < 0)
 		return -errno;
 	return err;
 }
@@ -358,53 +338,51 @@
 	int err, type = stream ? SOCK_STREAM : SOCK_DGRAM;
 
 	err = socketpair(AF_UNIX, type, 0, fds);
-	if(err < 0)
+	if (err < 0)
 		return -errno;
 
-	if(!close_on_exec)
+	if (!close_on_exec)
 		return 0;
 
 	err = os_set_exec_close(fds[0]);
-	if(err < 0)
+	if (err < 0)
 		goto error;
 
 	err = os_set_exec_close(fds[1]);
-	if(err < 0)
+	if (err < 0)
 		goto error;
 
 	return 0;
 
  error:
-	printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err);
+	printk(UM_KERN_ERR "os_pipe : Setting FD_CLOEXEC failed, err = %d\n",
+	       -err);
 	close(fds[1]);
 	close(fds[0]);
 	return err;
 }
 
-int os_set_fd_async(int fd, int owner)
+int os_set_fd_async(int fd)
 {
-	int err;
+	int err, flags;
 
-	/* XXX This should do F_GETFL first */
-	if(fcntl(fd, F_SETFL, O_ASYNC | O_NONBLOCK) < 0){
+	flags = fcntl(fd, F_GETFL);
+	if (flags < 0)
+		return -errno;
+
+	flags |= O_ASYNC | O_NONBLOCK;
+	if (fcntl(fd, F_SETFL, flags) < 0) {
 		err = -errno;
-		printk("os_set_fd_async : failed to set O_ASYNC and "
-		       "O_NONBLOCK on fd # %d, errno = %d\n", fd, errno);
+		printk(UM_KERN_ERR "os_set_fd_async : failed to set O_ASYNC "
+		       "and O_NONBLOCK on fd # %d, errno = %d\n", fd, errno);
 		return err;
 	}
-#ifdef notdef
-	if(fcntl(fd, F_SETFD, 1) < 0){
-		printk("os_set_fd_async : Setting FD_CLOEXEC failed, "
-		       "errno = %d\n", errno);
-	}
-#endif
 
-	if((fcntl(fd, F_SETSIG, SIGIO) < 0) ||
-	   (fcntl(fd, F_SETOWN, owner) < 0)){
+	if ((fcntl(fd, F_SETSIG, SIGIO) < 0) ||
+	    (fcntl(fd, F_SETOWN, os_getpid()) < 0)) {
 		err = -errno;
-		printk("os_set_fd_async : Failed to fcntl F_SETOWN "
-		       "(or F_SETSIG) fd %d to pid %d, errno = %d\n", fd,
-		       owner, errno);
+		printk(UM_KERN_ERR "os_set_fd_async : Failed to fcntl F_SETOWN "
+		       "(or F_SETSIG) fd %d, errno = %d\n", fd, errno);
 		return err;
 	}
 
@@ -413,10 +391,14 @@
 
 int os_clear_fd_async(int fd)
 {
-	int flags = fcntl(fd, F_GETFL);
+	int flags;
+
+	flags = fcntl(fd, F_GETFL);
+	if (flags < 0)
+		return -errno;
 
 	flags &= ~(O_ASYNC | O_NONBLOCK);
-	if(fcntl(fd, F_SETFL, flags) < 0)
+	if (fcntl(fd, F_SETFL, flags) < 0)
 		return -errno;
 	return 0;
 }
@@ -426,11 +408,15 @@
 	int flags;
 
 	flags = fcntl(fd, F_GETFL);
+	if (flags < 0)
+		return -errno;
 
-	if(blocking) flags &= ~O_NONBLOCK;
-	else flags |= O_NONBLOCK;
+	if (blocking)
+		flags &= ~O_NONBLOCK;
+	else
+		flags |= O_NONBLOCK;
 
-	if(fcntl(fd, F_SETFL, flags) < 0)
+	if (fcntl(fd, F_SETFL, flags) < 0)
 		return -errno;
 
 	return 0;
@@ -441,7 +427,7 @@
 	int new;
 
 	new = accept(fd, NULL, 0);
-	if(new < 0)
+	if (new < 0)
 		return -errno;
 	return new;
 }
@@ -462,15 +448,17 @@
 {
 	int what, err;
 
-	if(r && w) what = SHUT_RDWR;
-	else if(r) what = SHUT_RD;
-	else if(w) what = SHUT_WR;
-	else {
-		printk("os_shutdown_socket : neither r or w was set\n");
+	if (r && w)
+		what = SHUT_RDWR;
+	else if (r)
+		what = SHUT_RD;
+	else if (w)
+		what = SHUT_WR;
+	else
 		return -EINVAL;
-	}
+
 	err = shutdown(fd, what);
-	if(err < 0)
+	if (err < 0)
 		return -errno;
 	return 0;
 }
@@ -494,19 +482,20 @@
 	msg.msg_flags = 0;
 
 	n = recvmsg(fd, &msg, 0);
-	if(n < 0)
+	if (n < 0)
 		return -errno;
-	else if(n != iov.iov_len)
+	else if (n != iov.iov_len)
 		*helper_pid_out = -1;
 
 	cmsg = CMSG_FIRSTHDR(&msg);
-	if(cmsg == NULL){
-		printk("rcv_fd didn't receive anything, error = %d\n", errno);
+	if (cmsg == NULL) {
+		printk(UM_KERN_ERR "rcv_fd didn't receive anything, "
+		       "error = %d\n", errno);
 		return -1;
 	}
-	if((cmsg->cmsg_level != SOL_SOCKET) ||
-	   (cmsg->cmsg_type != SCM_RIGHTS)){
-		printk("rcv_fd didn't receive a descriptor\n");
+	if ((cmsg->cmsg_level != SOL_SOCKET) ||
+	    (cmsg->cmsg_type != SCM_RIGHTS)) {
+		printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n");
 		return -1;
 	}
 
@@ -514,29 +503,28 @@
 	return new;
 }
 
-int os_create_unix_socket(char *file, int len, int close_on_exec)
+int os_create_unix_socket(const char *file, int len, int close_on_exec)
 {
 	struct sockaddr_un addr;
 	int sock, err;
 
 	sock = socket(PF_UNIX, SOCK_DGRAM, 0);
-	if(sock < 0)
+	if (sock < 0)
 		return -errno;
 
-	if(close_on_exec) {
+	if (close_on_exec) {
 		err = os_set_exec_close(sock);
-		if(err < 0)
-			printk("create_unix_socket : close_on_exec failed, "
-		       "err = %d", -err);
+		if (err < 0)
+			printk(UM_KERN_ERR "create_unix_socket : "
+			       "close_on_exec failed, err = %d", -err);
 	}
 
 	addr.sun_family = AF_UNIX;
 
-	/* XXX Be more careful about overflow */
 	snprintf(addr.sun_path, len, "%s", file);
 
 	err = bind(sock, (struct sockaddr *) &addr, sizeof(addr));
-	if(err < 0)
+	if (err < 0)
 		return -errno;
 
 	return sock;
@@ -557,17 +545,18 @@
 	int err, save;
 
 	err = fcntl(fd, F_SETLK, &lock);
-	if(!err)
+	if (!err)
 		goto out;
 
 	save = -errno;
 	err = fcntl(fd, F_GETLK, &lock);
-	if(err){
+	if (err) {
 		err = -errno;
 		goto out;
 	}
 
-	printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid);
+	printk(UM_KERN_ERR "F_SETLK failed, file already locked by pid %d\n",
+	       lock.l_pid);
 	err = save;
  out:
 	return err;
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index fba3f0f..f4bd349 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -1,22 +1,19 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <errno.h>
 #include <sched.h>
-#include <limits.h>
-#include <sys/signal.h>
-#include <sys/wait.h>
 #include <sys/socket.h>
-#include "user.h"
+#include <sys/wait.h>
+#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "kern_constants.h"
+#include "user.h"
 
 struct helper_data {
 	void (*pre_exec)(void*);
@@ -30,21 +27,19 @@
 {
 	struct helper_data *data = arg;
 	char **argv = data->argv;
-	int errval;
+	int err;
 
 	if (data->pre_exec != NULL)
 		(*data->pre_exec)(data->pre_data);
-	errval = execvp_noalloc(data->buf, argv[0], argv);
-	printk("helper_child - execvp of '%s' failed - errno = %d\n", argv[0],
-	       -errval);
-	write(data->fd, &errval, sizeof(errval));
-	kill(os_getpid(), SIGKILL);
+	err = execvp_noalloc(data->buf, argv[0], argv);
+
+	/* If the exec succeeds, we don't get here */
+	write(data->fd, &err, sizeof(err));
+
 	return 0;
 }
 
-/* Returns either the pid of the child process we run or -E* on failure.
- * XXX The alloc_stack here breaks if this is called in the tracing thread, so
- * we need to receive a preallocated stack (a local buffer is ok). */
+/* Returns either the pid of the child process we run or -E* on failure. */
 int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
 {
 	struct helper_data data;
@@ -58,14 +53,15 @@
 	ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
 	if (ret < 0) {
 		ret = -errno;
-		printk("run_helper : pipe failed, errno = %d\n", errno);
+		printk(UM_KERN_ERR "run_helper : pipe failed, errno = %d\n",
+		       errno);
 		goto out_free;
 	}
 
 	ret = os_set_exec_close(fds[1]);
 	if (ret < 0) {
-		printk("run_helper : setting FD_CLOEXEC failed, ret = %d\n",
-		       -ret);
+		printk(UM_KERN_ERR "run_helper : setting FD_CLOEXEC failed, "
+		       "ret = %d\n", -ret);
 		goto out_close;
 	}
 
@@ -79,7 +75,8 @@
 	pid = clone(helper_child, (void *) sp, CLONE_VM, &data);
 	if (pid < 0) {
 		ret = -errno;
-		printk("run_helper : clone failed, errno = %d\n", errno);
+		printk(UM_KERN_ERR "run_helper : clone failed, errno = %d\n",
+		       errno);
 		goto out_free2;
 	}
 
@@ -96,10 +93,9 @@
 	} else {
 		if (n < 0) {
 			n = -errno;
-			printk("run_helper : read on pipe failed, ret = %d\n",
-			       -n);
+			printk(UM_KERN_ERR "run_helper : read on pipe failed, "
+			       "ret = %d\n", -n);
 			ret = n;
-			kill(pid, SIGKILL);
 		}
 		CATCH_EINTR(waitpid(pid, NULL, __WCLONE));
 	}
@@ -129,50 +125,40 @@
 	pid = clone(proc, (void *) sp, flags, arg);
 	if (pid < 0) {
 		err = -errno;
-		printk("run_helper_thread : clone failed, errno = %d\n",
-		       errno);
+		printk(UM_KERN_ERR "run_helper_thread : clone failed, "
+		       "errno = %d\n", errno);
 		return err;
 	}
 	if (stack_out == NULL) {
 		CATCH_EINTR(pid = waitpid(pid, &status, __WCLONE));
 		if (pid < 0) {
 			err = -errno;
-			printk("run_helper_thread - wait failed, errno = %d\n",
-			       errno);
+			printk(UM_KERN_ERR "run_helper_thread - wait failed, "
+			       "errno = %d\n", errno);
 			pid = err;
 		}
 		if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0))
-			printk("run_helper_thread - thread returned status "
-			       "0x%x\n", status);
+			printk(UM_KERN_ERR "run_helper_thread - thread "
+			       "returned status 0x%x\n", status);
 		free_stack(stack, 0);
 	} else
 		*stack_out = stack;
 	return pid;
 }
 
-int helper_wait(int pid, int nohang, char *pname)
+int helper_wait(int pid)
 {
 	int ret, status;
 	int wflags = __WCLONE;
 
-	if (nohang)
-		wflags |= WNOHANG;
-
-	if (!pname)
-		pname = "helper_wait";
-
 	CATCH_EINTR(ret = waitpid(pid, &status, wflags));
 	if (ret < 0) {
-		printk(UM_KERN_ERR "%s : waitpid process %d failed, "
-		       "errno = %d\n", pname, pid, errno);
+		printk(UM_KERN_ERR "helper_wait : waitpid process %d failed, "
+		       "errno = %d\n", pid, errno);
 		return -errno;
-	} else if (nohang && ret == 0) {
-		printk(UM_KERN_ERR "%s : process %d has not exited\n",
-		       pname, pid);
-		return -ECHILD;
 	} else if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
-		printk(UM_KERN_ERR "%s : process %d didn't exit with "
-		       "status 0\n", pname, pid);
+		printk(UM_KERN_ERR "helper_wait : process %d exited with "
+		       "status 0x%x\n", pid, status);
 		return -ECHILD;
 	} else
 		return 0;
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index 6aa6f95..0348b97 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -1,23 +1,19 @@
 /*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <stdlib.h>
-#include <unistd.h>
 #include <errno.h>
+#include <poll.h>
 #include <signal.h>
 #include <string.h>
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include "kern_util.h"
-#include "user.h"
-#include "process.h"
-#include "sigio.h"
 #include "irq_user.h"
+#include "kern_constants.h"
 #include "os.h"
+#include "process.h"
 #include "um_malloc.h"
+#include "user.h"
 
 /*
  * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
@@ -36,7 +32,7 @@
 	if (n < 0) {
 		err = -errno;
 		if (errno != EINTR)
-			printk("sigio_handler: os_waiting_for_events:"
+			printk(UM_KERN_ERR "os_waiting_for_events:"
 			       " poll returned %d, errno = %d\n", n, errno);
 		return err;
 	}
@@ -95,24 +91,26 @@
 			struct irq_fd *old_fd = *prev;
 			if ((pollfds[i].fd != -1) &&
 			    (pollfds[i].fd != (*prev)->fd)) {
-				printk("os_free_irq_by_cb - mismatch between "
-				       "active_fds and pollfds, fd %d vs %d\n",
+				printk(UM_KERN_ERR "os_free_irq_by_cb - "
+				       "mismatch between active_fds and "
+				       "pollfds, fd %d vs %d\n",
 				       (*prev)->fd, pollfds[i].fd);
 				goto out;
 			}
 
 			pollfds_num--;
 
-			/* This moves the *whole* array after pollfds[i]
+			/*
+			 * This moves the *whole* array after pollfds[i]
 			 * (though it doesn't spot as such)!
 			 */
 			memmove(&pollfds[i], &pollfds[i + 1],
 			       (pollfds_num - i) * sizeof(pollfds[0]));
-			if(*last_irq_ptr2 == &old_fd->next)
+			if (*last_irq_ptr2 == &old_fd->next)
 				*last_irq_ptr2 = prev;
 
 			*prev = (*prev)->next;
-			if(old_fd->type == IRQ_WRITE)
+			if (old_fd->type == IRQ_WRITE)
 				ignore_sigio_fd(old_fd->fd);
 			kfree(old_fd);
 			continue;
@@ -138,14 +136,3 @@
 {
 	signal(SIGIO, SIG_IGN);
 }
-
-void init_irq_signals(int on_sigstack)
-{
-	int flags;
-
-	flags = on_sigstack ? SA_ONSTACK : 0;
-
-	set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-	signal(SIGWINCH, SIG_IGN);
-}
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 82c3778..abb9b0f 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -73,7 +73,7 @@
 	action.sa_handler = last_ditch_exit;
 	if (sigaction(sig, &action, NULL) < 0) {
 		printf("failed to install handler for signal %d - errno = %d\n",
-		       errno);
+		       sig, errno);
 		exit(1);
 	}
 }
@@ -92,7 +92,8 @@
 	 * just use the default + /usr/lib/uml
 	 */
 	if (!old_path || (path_len = strlen(old_path)) == 0) {
-		putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH);
+		if (putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH))
+			perror("couldn't putenv");
 		return;
 	}
 
@@ -100,15 +101,16 @@
 	path_len += strlen("PATH=" UML_LIB_PATH) + 1;
 	new_path = malloc(path_len);
 	if (!new_path) {
-		perror("coudn't malloc to set a new PATH");
+		perror("couldn't malloc to set a new PATH");
 		return;
 	}
 	snprintf(new_path, path_len, "PATH=%s" UML_LIB_PATH, old_path);
-	putenv(new_path);
+	if (putenv(new_path)) {
+		perror("couldn't putenv to set a new PATH");
+		free(new_path);
+	}
 }
 
-extern int uml_exitcode;
-
 extern void scan_elf_aux( char **envp);
 
 int __init main(int argc, char **argv, char **envp)
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 436f8d2..eedc2d8 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -9,7 +9,6 @@
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <sys/statfs.h>
-#include "kern_util.h"
 #include "user.h"
 #include "mem_user.h"
 #include "init.h"
@@ -30,7 +29,7 @@
 
 static void __init find_tempdir(void)
 {
-	char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL };
+	const char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL };
 	int i;
 	char *dir = NULL;
 
@@ -59,9 +58,10 @@
  * read the file as needed.  If there's an error, -errno is returned;
  * if the end of the file is reached, 0 is returned.
  */
-static int next(int fd, char *buf, int size, char c)
+static int next(int fd, char *buf, size_t size, char c)
 {
-	int n, len;
+	ssize_t n;
+	size_t len;
 	char *ptr;
 
 	while((ptr = strchr(buf, c)) == NULL){
@@ -172,13 +172,15 @@
 
 	which_tmpdir();
 	tempname = malloc(MAXPATHLEN);
+	if (!tempname)
+		goto out;
 
 	find_tempdir();
 	if (template[0] != '/')
 		strcpy(tempname, tempdir);
 	else
 		tempname[0] = '\0';
-	strcat(tempname, template);
+	strncat(tempname, template, MAXPATHLEN-1-strlen(tempname));
 	fd = mkstemp(tempname);
 	if(fd < 0){
 		fprintf(stderr, "open - cannot create %s: %s\n", tempname,
@@ -268,6 +270,7 @@
 	if(addr == MAP_FAILED){
 		err = errno;
 		perror("failed");
+		close(fd);
 		if(err == EPERM)
 			printf("%s must be not mounted noexec\n",tempdir);
 		exit(1);
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index bda5c31..abf6bea 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -249,7 +249,10 @@
 		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
 	signal(SIGHUP, SIG_IGN);
 
-	init_irq_signals(1);
+	set_handler(SIGIO, (__sighandler_t) sig_handler,
+		    SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM,
+		    SIGVTALRM, -1);
+	signal(SIGWINCH, SIG_IGN);
 }
 
 int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr)
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index a32ba6a..830fe6a 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -8,47 +8,41 @@
 #include <string.h>
 #include <sys/ptrace.h>
 #include "sysdep/ptrace.h"
-#include "user.h"
 
-/* This is set once at boot time and not changed thereafter */
-
-static unsigned long exec_regs[MAX_REG_NR];
-
-void init_thread_registers(struct uml_pt_regs *to)
-{
-	memcpy(to->gp, exec_regs, sizeof(to->gp));
-}
-
-void save_registers(int pid, struct uml_pt_regs *regs)
+int save_registers(int pid, struct uml_pt_regs *regs)
 {
 	int err;
 
 	err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp);
 	if (err < 0)
-		panic("save_registers - saving registers failed, errno = %d\n",
-		      errno);
+		return -errno;
+	return 0;
 }
 
-void restore_registers(int pid, struct uml_pt_regs *regs)
+int restore_registers(int pid, struct uml_pt_regs *regs)
 {
 	int err;
 
 	err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp);
 	if (err < 0)
-		panic("restore_registers - saving registers failed, "
-		      "errno = %d\n", errno);
+		return -errno;
+	return 0;
 }
 
-void init_registers(int pid)
+/* This is set once at boot time and not changed thereafter */
+
+static unsigned long exec_regs[MAX_REG_NR];
+
+int init_registers(int pid)
 {
 	int err;
 
 	err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs);
-	if (err)
-		panic("check_ptrace : PTRACE_GETREGS failed, errno = %d",
-		      errno);
+	if (err < 0)
+		return -errno;
 
 	arch_init_registers(pid);
+	return 0;
 }
 
 void get_safe_registers(unsigned long *regs)
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index dc03e9c..abf47a7c 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -1,34 +1,33 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <unistd.h>
-#include <stdlib.h>
-#include <termios.h>
-#include <pty.h>
-#include <signal.h>
-#include <fcntl.h>
 #include <errno.h>
-#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <pty.h>
 #include <sched.h>
-#include <sys/socket.h>
-#include <sys/poll.h>
-#include "init.h"
-#include "user.h"
+#include <signal.h>
+#include <string.h>
+#include "kern_constants.h"
 #include "kern_util.h"
-#include "sigio.h"
-#include "os.h"
-#include "um_malloc.h"
 #include "init.h"
+#include "os.h"
+#include "sigio.h"
+#include "um_malloc.h"
+#include "user.h"
 
-/* Protected by sigio_lock(), also used by sigio_cleanup, which is an
+/*
+ * Protected by sigio_lock(), also used by sigio_cleanup, which is an
  * exitcall.
  */
 static int write_sigio_pid = -1;
 static unsigned long write_sigio_stack;
 
-/* These arrays are initialized before the sigio thread is started, and
+/*
+ * These arrays are initialized before the sigio thread is started, and
  * the descriptors closed after it is killed.  So, it can't see them change.
  * On the UML side, they are changed under the sigio_lock.
  */
@@ -43,7 +42,8 @@
 	int used;
 };
 
-/* Protected by sigio_lock().  Used by the sigio thread, but the UML thread
+/*
+ * Protected by sigio_lock().  Used by the sigio thread, but the UML thread
  * synchronizes with it.
  */
 static struct pollfds current_poll;
@@ -57,23 +57,26 @@
 	int i, n, respond_fd;
 	char c;
 
-        signal(SIGWINCH, SIG_IGN);
+	signal(SIGWINCH, SIG_IGN);
 	fds = &current_poll;
-	while(1){
+	while (1) {
 		n = poll(fds->poll, fds->used, -1);
-		if(n < 0){
-			if(errno == EINTR) continue;
-			printk("write_sigio_thread : poll returned %d, "
-			       "errno = %d\n", n, errno);
+		if (n < 0) {
+			if (errno == EINTR)
+				continue;
+			printk(UM_KERN_ERR "write_sigio_thread : poll returned "
+			       "%d, errno = %d\n", n, errno);
 		}
-		for(i = 0; i < fds->used; i++){
+		for (i = 0; i < fds->used; i++) {
 			p = &fds->poll[i];
-			if(p->revents == 0) continue;
-			if(p->fd == sigio_private[1]){
+			if (p->revents == 0)
+				continue;
+			if (p->fd == sigio_private[1]) {
 				CATCH_EINTR(n = read(sigio_private[1], &c,
 						     sizeof(c)));
-				if(n != sizeof(c))
-					printk("write_sigio_thread : "
+				if (n != sizeof(c))
+					printk(UM_KERN_ERR
+					       "write_sigio_thread : "
 					       "read on socket failed, "
 					       "err = %d\n", errno);
 				tmp = current_poll;
@@ -89,9 +92,10 @@
 			}
 
 			CATCH_EINTR(n = write(respond_fd, &c, sizeof(c)));
-			if(n != sizeof(c))
-				printk("write_sigio_thread : write on socket "
-				       "failed, err = %d\n", errno);
+			if (n != sizeof(c))
+				printk(UM_KERN_ERR "write_sigio_thread : "
+				       "write on socket failed, err = %d\n",
+				       errno);
 		}
 	}
 
@@ -102,12 +106,13 @@
 {
 	struct pollfd *new;
 
-	if(n <= polls->size)
+	if (n <= polls->size)
 		return 0;
 
 	new = kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC);
-	if(new == NULL){
-		printk("need_poll : failed to allocate new pollfds\n");
+	if (new == NULL) {
+		printk(UM_KERN_ERR "need_poll : failed to allocate new "
+		       "pollfds\n");
 		return -ENOMEM;
 	}
 
@@ -119,7 +124,8 @@
 	return 0;
 }
 
-/* Must be called with sigio_lock held, because it's needed by the marked
+/*
+ * Must be called with sigio_lock held, because it's needed by the marked
  * critical section.
  */
 static void update_thread(void)
@@ -129,15 +135,17 @@
 	char c;
 
 	flags = set_signals(0);
-	n = write(sigio_private[0], &c, sizeof(c));
-	if(n != sizeof(c)){
-		printk("update_thread : write failed, err = %d\n", errno);
+	CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c)));
+	if (n != sizeof(c)) {
+		printk(UM_KERN_ERR "update_thread : write failed, err = %d\n",
+		       errno);
 		goto fail;
 	}
 
 	CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c)));
-	if(n != sizeof(c)){
-		printk("update_thread : read failed, err = %d\n", errno);
+	if (n != sizeof(c)) {
+		printk(UM_KERN_ERR "update_thread : read failed, err = %d\n",
+		       errno);
 		goto fail;
 	}
 
@@ -164,23 +172,23 @@
 	int err = 0, i, n;
 
 	sigio_lock();
-	for(i = 0; i < all_sigio_fds.used; i++){
-		if(all_sigio_fds.poll[i].fd == fd)
+	for (i = 0; i < all_sigio_fds.used; i++) {
+		if (all_sigio_fds.poll[i].fd == fd)
 			break;
 	}
-	if(i == all_sigio_fds.used)
+	if (i == all_sigio_fds.used)
 		goto out;
 
 	p = &all_sigio_fds.poll[i];
 
-	for(i = 0; i < current_poll.used; i++){
-		if(current_poll.poll[i].fd == fd)
+	for (i = 0; i < current_poll.used; i++) {
+		if (current_poll.poll[i].fd == fd)
 			goto out;
 	}
 
 	n = current_poll.used;
 	err = need_poll(&next_poll, n + 1);
-	if(err)
+	if (err)
 		goto out;
 
 	memcpy(next_poll.poll, current_poll.poll,
@@ -198,27 +206,29 @@
 	struct pollfd *p;
 	int err = 0, i, n = 0;
 
-	/* This is called from exitcalls elsewhere in UML - if
+	/*
+	 * This is called from exitcalls elsewhere in UML - if
 	 * sigio_cleanup has already run, then update_thread will hang
 	 * or fail because the thread is no longer running.
 	 */
-	if(write_sigio_pid == -1)
+	if (write_sigio_pid == -1)
 		return -EIO;
 
 	sigio_lock();
-	for(i = 0; i < current_poll.used; i++){
-		if(current_poll.poll[i].fd == fd) break;
+	for (i = 0; i < current_poll.used; i++) {
+		if (current_poll.poll[i].fd == fd)
+			break;
 	}
-	if(i == current_poll.used)
+	if (i == current_poll.used)
 		goto out;
 
 	err = need_poll(&next_poll, current_poll.used - 1);
-	if(err)
+	if (err)
 		goto out;
 
-	for(i = 0; i < current_poll.used; i++){
+	for (i = 0; i < current_poll.used; i++) {
 		p = &current_poll.poll[i];
-		if(p->fd != fd)
+		if (p->fd != fd)
 			next_poll.poll[n++] = *p;
 	}
 	next_poll.used = current_poll.used - 1;
@@ -235,7 +245,8 @@
 
 	p = kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL);
 	if (p == NULL) {
-		printk("setup_initial_poll : failed to allocate poll\n");
+		printk(UM_KERN_ERR "setup_initial_poll : failed to allocate "
+		       "poll\n");
 		return NULL;
 	}
 	*p = ((struct pollfd) { .fd		= fd,
@@ -261,27 +272,29 @@
 		return;
 
 	err = os_pipe(l_write_sigio_fds, 1, 1);
-	if(err < 0){
-		printk("write_sigio_workaround - os_pipe 1 failed, "
+	if (err < 0) {
+		printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, "
 		       "err = %d\n", -err);
 		return;
 	}
 	err = os_pipe(l_sigio_private, 1, 1);
-	if(err < 0){
-		printk("write_sigio_workaround - os_pipe 2 failed, "
+	if (err < 0) {
+		printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, "
 		       "err = %d\n", -err);
 		goto out_close1;
 	}
 
 	p = setup_initial_poll(l_sigio_private[1]);
-	if(!p)
+	if (!p)
 		goto out_close2;
 
 	sigio_lock();
 
-	/* Did we race? Don't try to optimize this, please, it's not so likely
-	 * to happen, and no more than once at the boot. */
-	if(write_sigio_pid != -1)
+	/*
+	 * Did we race? Don't try to optimize this, please, it's not so likely
+	 * to happen, and no more than once at the boot.
+	 */
+	if (write_sigio_pid != -1)
 		goto out_free;
 
 	current_poll = ((struct pollfds) { .poll 	= p,
@@ -333,19 +346,19 @@
 {
 	int err;
 
-	if(!isatty(fd))
+	if (!isatty(fd))
 		return;
 
-	if((read || pty_output_sigio) && (!read || pty_close_sigio))
+	if ((read || pty_output_sigio) && (!read || pty_close_sigio))
 		return;
 
 	write_sigio_workaround();
 
 	sigio_lock();
 	err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1);
-	if(err){
-		printk("maybe_sigio_broken - failed to add pollfd for "
-		       "descriptor %d\n", fd);
+	if (err) {
+		printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd "
+		       "for descriptor %d\n", fd);
 		goto out;
 	}
 
@@ -388,7 +401,7 @@
 	struct openpty_arg *info = arg;
 
 	info->err = 0;
-	if(openpty(&info->master, &info->slave, NULL, NULL, NULL))
+	if (openpty(&info->master, &info->slave, NULL, NULL, NULL))
 		info->err = -errno;
 }
 
@@ -397,17 +410,17 @@
 	int flags;
 
 	flags = fcntl(master, F_GETFL);
-	if(flags < 0)
+	if (flags < 0)
 		return -errno;
 
-	if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) ||
-	   (fcntl(master, F_SETOWN, os_getpid()) < 0))
+	if ((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) ||
+	    (fcntl(master, F_SETOWN, os_getpid()) < 0))
 		return -errno;
 
-	if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0))
+	if ((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0))
 		return -errno;
 
-	return(0);
+	return 0;
 }
 
 static void __init check_one_sigio(void (*proc)(int, int))
@@ -417,34 +430,49 @@
 	int master, slave, err;
 
 	initial_thread_cb(openpty_cb, &pty);
-	if(pty.err){
-		printk("openpty failed, errno = %d\n", -pty.err);
+	if (pty.err) {
+		printk(UM_KERN_ERR "check_one_sigio failed, errno = %d\n",
+		       -pty.err);
 		return;
 	}
 
 	master = pty.master;
 	slave = pty.slave;
 
-	if((master == -1) || (slave == -1)){
-		printk("openpty failed to allocate a pty\n");
+	if ((master == -1) || (slave == -1)) {
+		printk(UM_KERN_ERR "check_one_sigio failed to allocate a "
+		       "pty\n");
 		return;
 	}
 
 	/* Not now, but complain so we now where we failed. */
 	err = raw(master);
-	if (err < 0)
-		panic("check_sigio : __raw failed, errno = %d\n", -err);
+	if (err < 0) {
+		printk(UM_KERN_ERR "check_one_sigio : raw failed, errno = %d\n",
+		      -err);
+		return;
+	}
 
 	err = async_pty(master, slave);
-	if(err < 0)
-		panic("tty_fds : sigio_async failed, err = %d\n", -err);
+	if (err < 0) {
+		printk(UM_KERN_ERR "check_one_sigio : sigio_async failed, "
+		       "err = %d\n", -err);
+		return;
+	}
 
-	if(sigaction(SIGIO, NULL, &old) < 0)
-		panic("check_sigio : sigaction 1 failed, errno = %d\n", errno);
+	if (sigaction(SIGIO, NULL, &old) < 0) {
+		printk(UM_KERN_ERR "check_one_sigio : sigaction 1 failed, "
+		       "errno = %d\n", errno);
+		return;
+	}
+
 	new = old;
 	new.sa_handler = handler;
-	if(sigaction(SIGIO, &new, NULL) < 0)
-		panic("check_sigio : sigaction 2 failed, errno = %d\n", errno);
+	if (sigaction(SIGIO, &new, NULL) < 0) {
+		printk(UM_KERN_ERR "check_one_sigio : sigaction 2 failed, "
+		       "errno = %d\n", errno);
+		return;
+	}
 
 	got_sigio = 0;
 	(*proc)(master, slave);
@@ -452,8 +480,9 @@
 	close(master);
 	close(slave);
 
-	if(sigaction(SIGIO, &old, NULL) < 0)
-		panic("check_sigio : sigaction 3 failed, errno = %d\n", errno);
+	if (sigaction(SIGIO, &old, NULL) < 0)
+		printk(UM_KERN_ERR "check_one_sigio : sigaction 3 failed, "
+		       "errno = %d\n", errno);
 }
 
 static void tty_output(int master, int slave)
@@ -461,42 +490,45 @@
 	int n;
 	char buf[512];
 
-	printk("Checking that host ptys support output SIGIO...");
+	printk(UM_KERN_INFO "Checking that host ptys support output SIGIO...");
 
 	memset(buf, 0, sizeof(buf));
 
-	while(write(master, buf, sizeof(buf)) > 0) ;
-	if(errno != EAGAIN)
-		panic("tty_output : write failed, errno = %d\n", errno);
-	while(((n = read(slave, buf, sizeof(buf))) > 0) && !got_sigio) ;
+	while (write(master, buf, sizeof(buf)) > 0) ;
+	if (errno != EAGAIN)
+		printk(UM_KERN_ERR "tty_output : write failed, errno = %d\n",
+		       errno);
+	while (((n = read(slave, buf, sizeof(buf))) > 0) && !got_sigio)
+		;
 
-	if(got_sigio){
-		printk("Yes\n");
+	if (got_sigio) {
+		printk(UM_KERN_CONT "Yes\n");
 		pty_output_sigio = 1;
-	}
-	else if(n == -EAGAIN)
-		printk("No, enabling workaround\n");
-	else panic("tty_output : read failed, err = %d\n", n);
+	} else if (n == -EAGAIN)
+		printk(UM_KERN_CONT "No, enabling workaround\n");
+	else
+		printk(UM_KERN_CONT "tty_output : read failed, err = %d\n", n);
 }
 
 static void tty_close(int master, int slave)
 {
-	printk("Checking that host ptys support SIGIO on close...");
+	printk(UM_KERN_INFO "Checking that host ptys support SIGIO on "
+	       "close...");
 
 	close(slave);
-	if(got_sigio){
-		printk("Yes\n");
+	if (got_sigio) {
+		printk(UM_KERN_CONT "Yes\n");
 		pty_close_sigio = 1;
-	}
-	else printk("No, enabling workaround\n");
+	} else
+		printk(UM_KERN_CONT "No, enabling workaround\n");
 }
 
 void __init check_sigio(void)
 {
-	if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) &&
-	   (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){
-		printk("No pseudo-terminals available - skipping pty SIGIO "
-		       "check\n");
+	if ((access("/dev/ptmx", R_OK) < 0) &&
+	    (access("/dev/ptyp0", R_OK) < 0)) {
+		printk(UM_KERN_WARNING "No pseudo-terminals available - "
+		       "skipping pty SIGIO check\n");
 		return;
 	}
 	check_one_sigio(tty_output);
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index e9800b0..0fb0cc8 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -9,11 +9,47 @@
 #include <errno.h>
 #include <signal.h>
 #include <strings.h>
+#include "as-layout.h"
+#include "kern_util.h"
 #include "os.h"
 #include "sysdep/barrier.h"
 #include "sysdep/sigcontext.h"
 #include "user.h"
 
+/* Copied from linux/compiler-gcc.h since we can't include it directly */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+void (*sig_info[NSIG])(int, struct uml_pt_regs *) = {
+	[SIGTRAP]	= relay_signal,
+	[SIGFPE]	= relay_signal,
+	[SIGILL]	= relay_signal,
+	[SIGWINCH]	= winch,
+	[SIGBUS]	= bus_handler,
+	[SIGSEGV]	= segv_handler,
+	[SIGIO]		= sigio_handler,
+	[SIGVTALRM]	= timer_handler };
+
+static void sig_handler_common(int sig, struct sigcontext *sc)
+{
+	struct uml_pt_regs r;
+	int save_errno = errno;
+
+	r.is_user = 0;
+	if (sig == SIGSEGV) {
+		/* For segfaults, we want the data from the sigcontext. */
+		copy_sc(&r, sc);
+		GET_FAULTINFO_FROM_SC(r.faultinfo, sc);
+	}
+
+	/* enable signals if sig isn't IRQ signal */
+	if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM))
+		unblock_signals();
+
+	(*sig_info[sig])(sig, &r);
+
+	errno = save_errno;
+}
+
 /*
  * These are the asynchronous signals.  SIGPROF is excluded because we want to
  * be able to profile all of UML, not just the non-critical sections.  If
@@ -26,13 +62,8 @@
 #define SIGVTALRM_BIT 1
 #define SIGVTALRM_MASK (1 << SIGVTALRM_BIT)
 
-/*
- * These are used by both the signal handlers and
- * block/unblock_signals.  I don't want modifications cached in a
- * register - they must go straight to memory.
- */
-static volatile int signals_enabled = 1;
-static volatile int pending = 0;
+static int signals_enabled;
+static unsigned int signals_pending;
 
 void sig_handler(int sig, struct sigcontext *sc)
 {
@@ -40,13 +71,13 @@
 
 	enabled = signals_enabled;
 	if (!enabled && (sig == SIGIO)) {
-		pending |= SIGIO_MASK;
+		signals_pending |= SIGIO_MASK;
 		return;
 	}
 
 	block_signals();
 
-	sig_handler_common_skas(sig, sc);
+	sig_handler_common(sig, sc);
 
 	set_signals(enabled);
 }
@@ -68,7 +99,7 @@
 
 	enabled = signals_enabled;
 	if (!signals_enabled) {
-		pending |= SIGVTALRM_MASK;
+		signals_pending |= SIGVTALRM_MASK;
 		return;
 	}
 
@@ -94,16 +125,6 @@
 		panic("enabling signal stack failed, errno = %d\n", errno);
 }
 
-void remove_sigstack(void)
-{
-	stack_t stack = ((stack_t) { .ss_flags	= SS_DISABLE,
-				     .ss_sp	= NULL,
-				     .ss_size	= 0 });
-
-	if (sigaltstack(&stack, NULL) != 0)
-		panic("disabling signal stack failed, errno = %d\n", errno);
-}
-
 void (*handlers[_NSIG])(int sig, struct sigcontext *sc);
 
 void handle_signal(int sig, struct sigcontext *sc)
@@ -166,6 +187,9 @@
 		sigaddset(&action.sa_mask, mask);
 	va_end(ap);
 
+	if (sig == SIGSEGV)
+		flags |= SA_NODEFER;
+
 	action.sa_flags = flags;
 	action.sa_restorer = NULL;
 	if (sigaction(sig, &action, NULL) < 0)
@@ -179,12 +203,14 @@
 
 int change_sig(int signal, int on)
 {
-	sigset_t sigset, old;
+	sigset_t sigset;
 
 	sigemptyset(&sigset);
 	sigaddset(&sigset, signal);
-	sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old);
-	return !sigismember(&old, signal);
+	if (sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0)
+		return -errno;
+
+	return 0;
 }
 
 void block_signals(void)
@@ -196,7 +222,7 @@
 	 * This might matter if gcc figures out how to inline this and
 	 * decides to shuffle this code into the caller.
 	 */
-	mb();
+	barrier();
 }
 
 void unblock_signals(void)
@@ -209,36 +235,26 @@
 	/*
 	 * We loop because the IRQ handler returns with interrupts off.  So,
 	 * interrupts may have arrived and we need to re-enable them and
-	 * recheck pending.
+	 * recheck signals_pending.
 	 */
 	while(1) {
 		/*
 		 * Save and reset save_pending after enabling signals.  This
-		 * way, pending won't be changed while we're reading it.
+		 * way, signals_pending won't be changed while we're reading it.
 		 */
 		signals_enabled = 1;
 
 		/*
-		 * Setting signals_enabled and reading pending must
+		 * Setting signals_enabled and reading signals_pending must
 		 * happen in this order.
 		 */
-		mb();
+		barrier();
 
-		save_pending = pending;
-		if (save_pending == 0) {
-			/*
-			 * This must return with signals enabled, so
-			 * this barrier ensures that writes are
-			 * flushed out before the return.  This might
-			 * matter if gcc figures out how to inline
-			 * this (unlikely, given its size) and decides
-			 * to shuffle this code into the caller.
-			 */
-			mb();
+		save_pending = signals_pending;
+		if (save_pending == 0)
 			return;
-		}
 
-		pending = 0;
+		signals_pending = 0;
 
 		/*
 		 * We have pending interrupts, so disable signals, as the
@@ -254,7 +270,7 @@
 		 * back here.
 		 */
 		if (save_pending & SIGIO_MASK)
-			sig_handler_common_skas(SIGIO, NULL);
+			sig_handler_common(SIGIO, NULL);
 
 		if (save_pending & SIGVTALRM_MASK)
 			real_alarm_handler(NULL);
diff --git a/arch/um/os-Linux/skas/Makefile b/arch/um/os-Linux/skas/Makefile
index 5fd8d4d..d2ea340 100644
--- a/arch/um/os-Linux/skas/Makefile
+++ b/arch/um/os-Linux/skas/Makefile
@@ -1,10 +1,10 @@
 #
-# Copyright (C) 2002 - 2004 Jeff Dike (jdike@addtoit.com)
+# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
 # Licensed under the GPL
 #
 
-obj-y := mem.o process.o trap.o
+obj-y := mem.o process.o
 
-USER_OBJS := mem.o process.o trap.o
+USER_OBJS := $(obj-y)
 
 include arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index e8b7a97..d36c89c 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -15,6 +15,7 @@
 #include "as-layout.h"
 #include "chan_user.h"
 #include "kern_constants.h"
+#include "kern_util.h"
 #include "mem.h"
 #include "os.h"
 #include "process.h"
@@ -37,27 +38,27 @@
 
 static int ptrace_dump_regs(int pid)
 {
-        unsigned long regs[MAX_REG_NR];
-        int i;
+	unsigned long regs[MAX_REG_NR];
+	int i;
 
-        if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
-                return -errno;
+	if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
+		return -errno;
 
 	printk(UM_KERN_ERR "Stub registers -\n");
 	for (i = 0; i < ARRAY_SIZE(regs); i++)
 		printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]);
 
-        return 0;
+	return 0;
 }
 
 /*
  * Signals that are OK to receive in the stub - we'll just continue it.
  * SIGWINCH will happen when UML is inside a detached screen.
  */
-#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH))
+#define STUB_SIG_MASK (1 << SIGVTALRM)
 
 /* Signals that the stub will finish with - anything else is an error */
-#define STUB_DONE_MASK ((1 << SIGUSR1) | (1 << SIGTRAP))
+#define STUB_DONE_MASK (1 << SIGTRAP)
 
 void wait_stub_done(int pid)
 {
@@ -72,9 +73,11 @@
 			break;
 
 		err = ptrace(PTRACE_CONT, pid, 0, 0);
-		if (err)
-			panic("wait_stub_done : continue failed, errno = %d\n",
-			      errno);
+		if (err) {
+			printk(UM_KERN_ERR "wait_stub_done : continue failed, "
+			       "errno = %d\n", errno);
+			fatal_sigsegv();
+		}
 	}
 
 	if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0)
@@ -85,8 +88,10 @@
 	if (err)
 		printk(UM_KERN_ERR "Failed to get registers from stub, "
 		       "errno = %d\n", -err);
-	panic("wait_stub_done : failed to wait for SIGUSR1/SIGTRAP, pid = %d, "
-	      "n = %d, errno = %d, status = 0x%x\n", pid, n, errno, status);
+	printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, "
+	       "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno,
+	       status);
+	fatal_sigsegv();
 }
 
 extern unsigned long current_stub_stack(void);
@@ -97,9 +102,11 @@
 
 	if (ptrace_faultinfo) {
 		err = ptrace(PTRACE_FAULTINFO, pid, 0, fi);
-		if (err)
-			panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, "
-			      "errno = %d\n", errno);
+		if (err) {
+			printk(UM_KERN_ERR "get_skas_faultinfo - "
+			       "PTRACE_FAULTINFO failed, errno = %d\n", errno);
+			fatal_sigsegv();
+		}
 
 		/* Special handling for i386, which has different structs */
 		if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo))
@@ -109,9 +116,11 @@
 	}
 	else {
 		err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV);
-		if (err)
-			panic("Failed to continue stub, pid = %d, errno = %d\n",
-			      pid, errno);
+		if (err) {
+			printk(UM_KERN_ERR "Failed to continue stub, pid = %d, "
+			       "errno = %d\n", pid, errno);
+			fatal_sigsegv();
+		}
 		wait_stub_done(pid);
 
 		/*
@@ -137,6 +146,9 @@
 {
 	int err, status;
 
+	if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
+		fatal_sigsegv();
+
 	/* Mark this as a syscall */
 	UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp);
 
@@ -144,25 +156,31 @@
 	{
 		err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
 			     __NR_getpid);
-		if (err < 0)
-			panic("handle_trap - nullifying syscall failed, "
-			      "errno = %d\n", errno);
+		if (err < 0) {
+			printk(UM_KERN_ERR "handle_trap - nullifying syscall "
+			       "failed, errno = %d\n", errno);
+			fatal_sigsegv();
+		}
 
 		err = ptrace(PTRACE_SYSCALL, pid, 0, 0);
-		if (err < 0)
-			panic("handle_trap - continuing to end of syscall "
-			      "failed, errno = %d\n", errno);
+		if (err < 0) {
+			printk(UM_KERN_ERR "handle_trap - continuing to end of "
+			       "syscall failed, errno = %d\n", errno);
+			fatal_sigsegv();
+		}
 
 		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
 		if ((err < 0) || !WIFSTOPPED(status) ||
-		   (WSTOPSIG(status) != SIGTRAP + 0x80)) {
-                        err = ptrace_dump_regs(pid);
-                        if (err)
-                                printk(UM_KERN_ERR "Failed to get registers "
+		    (WSTOPSIG(status) != SIGTRAP + 0x80)) {
+			err = ptrace_dump_regs(pid);
+			if (err)
+				printk(UM_KERN_ERR "Failed to get registers "
 				       "from process, errno = %d\n", -err);
-			panic("handle_trap - failed to wait at end of syscall, "
-			      "errno = %d, status = %d\n", errno, status);
-                }
+			printk(UM_KERN_ERR "handle_trap - failed to wait at "
+			       "end of syscall, errno = %d, status = %d\n",
+			       errno, status);
+			fatal_sigsegv();
+		}
 	}
 
 	handle_syscall(regs);
@@ -178,10 +196,13 @@
 	ptrace(PTRACE_TRACEME, 0, 0, 0);
 
 	signal(SIGTERM, SIG_DFL);
+	signal(SIGWINCH, SIG_IGN);
 	err = set_interval();
-	if (err)
-		panic("userspace_tramp - setting timer failed, errno = %d\n",
-		      err);
+	if (err) {
+		printk(UM_KERN_ERR "userspace_tramp - setting timer failed, "
+		       "errno = %d\n", err);
+		exit(1);
+	}
 
 	if (!proc_mm) {
 		/*
@@ -221,16 +242,14 @@
 
 		set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE);
 		sigemptyset(&sa.sa_mask);
-		sigaddset(&sa.sa_mask, SIGIO);
-		sigaddset(&sa.sa_mask, SIGWINCH);
-		sigaddset(&sa.sa_mask, SIGVTALRM);
-		sigaddset(&sa.sa_mask, SIGUSR1);
-		sa.sa_flags = SA_ONSTACK;
+		sa.sa_flags = SA_ONSTACK | SA_NODEFER;
 		sa.sa_handler = (void *) v;
 		sa.sa_restorer = NULL;
-		if (sigaction(SIGSEGV, &sa, NULL) < 0)
-			panic("userspace_tramp - setting SIGSEGV handler "
-			      "failed - errno = %d\n", errno);
+		if (sigaction(SIGSEGV, &sa, NULL) < 0) {
+			printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV "
+			       "handler failed - errno = %d\n", errno);
+			exit(1);
+		}
 	}
 
 	kill(os_getpid(), SIGSTOP);
@@ -246,13 +265,18 @@
 {
 	void *stack;
 	unsigned long sp;
-	int pid, status, n, flags;
+	int pid, status, n, flags, err;
 
 	stack = mmap(NULL, UM_KERN_PAGE_SIZE,
 		     PROT_READ | PROT_WRITE | PROT_EXEC,
 		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (stack == MAP_FAILED)
-		panic("start_userspace : mmap failed, errno = %d", errno);
+	if (stack == MAP_FAILED) {
+		err = -errno;
+		printk(UM_KERN_ERR "start_userspace : mmap failed, "
+		       "errno = %d\n", errno);
+		return err;
+	}
+
 	sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *);
 
 	flags = CLONE_FILES;
@@ -262,29 +286,50 @@
 		flags |= SIGCHLD;
 
 	pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
-	if (pid < 0)
-		panic("start_userspace : clone failed, errno = %d", errno);
+	if (pid < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "start_userspace : clone failed, "
+		       "errno = %d\n", errno);
+		return err;
+	}
 
 	do {
 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
-		if (n < 0)
-			panic("start_userspace : wait failed, errno = %d",
-			      errno);
+		if (n < 0) {
+			err = -errno;
+			printk(UM_KERN_ERR "start_userspace : wait failed, "
+			       "errno = %d\n", errno);
+			goto out_kill;
+		}
 	} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM));
 
-	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP))
-		panic("start_userspace : expected SIGSTOP, got status = %d",
-		      status);
+	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
+		err = -EINVAL;
+		printk(UM_KERN_ERR "start_userspace : expected SIGSTOP, got "
+		       "status = %d\n", status);
+		goto out_kill;
+	}
 
 	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
-		   (void *) PTRACE_O_TRACESYSGOOD) < 0)
-		panic("start_userspace : PTRACE_OLDSETOPTIONS failed, "
-		      "errno = %d\n", errno);
+		   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "start_userspace : PTRACE_OLDSETOPTIONS "
+		       "failed, errno = %d\n", errno);
+		goto out_kill;
+	}
 
-	if (munmap(stack, UM_KERN_PAGE_SIZE) < 0)
-		panic("start_userspace : munmap failed, errno = %d\n", errno);
+	if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "start_userspace : munmap failed, "
+		       "errno = %d\n", errno);
+		goto out_kill;
+	}
 
 	return pid;
+
+ out_kill:
+	os_kill_ptraced_process(pid, 1);
+	return err;
 }
 
 void userspace(struct uml_pt_regs *regs)
@@ -302,7 +347,16 @@
 	nsecs += os_nsecs();
 
 	while (1) {
-		restore_registers(pid, regs);
+		/*
+		 * This can legitimately fail if the process loads a
+		 * bogus value into a segment register.  It will
+		 * segfault and PTRACE_GETREGS will read that value
+		 * out of the process.  However, PTRACE_SETREGS will
+		 * fail.  In this case, there is nothing to do but
+		 * just kill the process.
+		 */
+		if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp))
+			fatal_sigsegv();
 
 		/* Now we set local_using_sysemu to be used for one loop */
 		local_using_sysemu = get_using_sysemu();
@@ -310,19 +364,26 @@
 		op = SELECT_PTRACE_OPERATION(local_using_sysemu,
 					     singlestepping(NULL));
 
-		err = ptrace(op, pid, 0, 0);
-		if (err)
-			panic("userspace - could not resume userspace process, "
-			      "pid=%d, ptrace operation = %d, errno = %d\n",
-			      pid, op, errno);
+		if (ptrace(op, pid, 0, 0)) {
+			printk(UM_KERN_ERR "userspace - ptrace continue "
+			       "failed, op = %d, errno = %d\n", op, errno);
+			fatal_sigsegv();
+		}
 
 		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
-		if (err < 0)
-			panic("userspace - waitpid failed, errno = %d\n",
-			      errno);
+		if (err < 0) {
+			printk(UM_KERN_ERR "userspace - wait failed, "
+			       "errno = %d\n", errno);
+			fatal_sigsegv();
+		}
 
 		regs->is_user = 1;
-		save_registers(pid, regs);
+		if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
+			printk(UM_KERN_ERR "userspace - PTRACE_GETREGS failed, "
+			       "errno = %d\n", errno);
+			fatal_sigsegv();
+		}
+
 		UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
 
 		if (WIFSTOPPED(status)) {
@@ -345,7 +406,7 @@
 				break;
 			case SIGVTALRM:
 				now = os_nsecs();
-				if(now < nsecs)
+				if (now < nsecs)
 					break;
 				block_signals();
 				(*sig_info[sig])(sig, regs);
@@ -368,6 +429,7 @@
 			default:
 			        printk(UM_KERN_ERR "userspace - child stopped "
 				       "with signal %d\n", sig);
+				fatal_sigsegv();
 			}
 			pid = userspace_pid[0];
 			interrupt_end();
@@ -419,9 +481,12 @@
 						     .it_interval = tv }) });
 
 	err = ptrace_setregs(pid, thread_regs);
-	if (err < 0)
-		panic("copy_context_skas0 : PTRACE_SETREGS failed, "
-		      "pid = %d, errno = %d\n", pid, -err);
+	if (err < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_SETREGS "
+		       "failed, pid = %d, errno = %d\n", pid, -err);
+		return err;
+	}
 
 	/* set a well known return code for detection of child write failure */
 	child_data->err = 12345678;
@@ -431,31 +496,47 @@
 	 * parent's stack, and check, if bad result.
 	 */
 	err = ptrace(PTRACE_CONT, pid, 0, 0);
-	if (err)
-		panic("Failed to continue new process, pid = %d, "
-		      "errno = %d\n", pid, errno);
+	if (err) {
+		err = -errno;
+		printk(UM_KERN_ERR "Failed to continue new process, pid = %d, "
+		       "errno = %d\n", pid, errno);
+		return err;
+	}
+
 	wait_stub_done(pid);
 
 	pid = data->err;
-	if (pid < 0)
-		panic("copy_context_skas0 - stub-parent reports error %d\n",
-		      -pid);
+	if (pid < 0) {
+		printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports "
+		       "error %d\n", -pid);
+		return pid;
+	}
 
 	/*
 	 * Wait, until child has finished too: read child's result from
 	 * child's stack and check it.
 	 */
 	wait_stub_done(pid);
-	if (child_data->err != STUB_DATA)
-		panic("copy_context_skas0 - stub-child reports error %ld\n",
-		      child_data->err);
+	if (child_data->err != STUB_DATA) {
+		printk(UM_KERN_ERR "copy_context_skas0 - stub-child reports "
+		       "error %ld\n", child_data->err);
+		err = child_data->err;
+		goto out_kill;
+	}
 
 	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
-		   (void *)PTRACE_O_TRACESYSGOOD) < 0)
-		panic("copy_context_skas0 : PTRACE_OLDSETOPTIONS failed, "
-		      "errno = %d\n", errno);
+		   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_OLDSETOPTIONS "
+		       "failed, errno = %d\n", errno);
+		goto out_kill;
+	}
 
 	return pid;
+
+ out_kill:
+	os_kill_ptraced_process(pid, 1);
+	return err;
 }
 
 /*
@@ -463,8 +544,8 @@
  * available. Opening /proc/mm creates a new mm_context, which lacks
  * the stub-pages. Thus, we map them using /proc/mm-fd
  */
-void map_stub_pages(int fd, unsigned long code,
-		    unsigned long data, unsigned long stack)
+int map_stub_pages(int fd, unsigned long code, unsigned long data,
+		   unsigned long stack)
 {
 	struct proc_mm_op mmop;
 	int n;
@@ -488,8 +569,9 @@
 		printk(UM_KERN_ERR "mmap args - addr = 0x%lx, fd = %d, "
 		       "offset = %llx\n", code, code_fd,
 		       (unsigned long long) code_offset);
-		panic("map_stub_pages : /proc/mm map for code failed, "
-		      "err = %d\n", n);
+		printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for code "
+		       "failed, err = %d\n", n);
+		return -n;
 	}
 
 	if (stack) {
@@ -507,10 +589,15 @@
 				      .offset  = map_offset
 		} } });
 		CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop)));
-		if (n != sizeof(mmop))
-			panic("map_stub_pages : /proc/mm map for data failed, "
-			      "err = %d\n", errno);
+		if (n != sizeof(mmop)) {
+			n = errno;
+			printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for "
+			       "data failed, err = %d\n", n);
+			return -n;
+		}
 	}
+
+	return 0;
 }
 
 void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
@@ -571,7 +658,9 @@
 		kmalloc_ok = 0;
 		return 1;
 	default:
-		panic("Bad sigsetjmp return in start_idle_thread - %d\n", n);
+		printk(UM_KERN_ERR "Bad sigsetjmp return in "
+		       "start_idle_thread - %d\n", n);
+		fatal_sigsegv();
 	}
 	longjmp(*switch_buf, 1);
 }
@@ -614,9 +703,11 @@
 	if (proc_mm) {
 		err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0,
 			     mm_idp->u.mm_fd);
-		if (err)
-			panic("__switch_mm - PTRACE_SWITCH_MM failed, "
-			      "errno = %d\n", errno);
+		if (err) {
+			printk(UM_KERN_ERR "__switch_mm - PTRACE_SWITCH_MM "
+			       "failed, errno = %d\n", errno);
+			fatal_sigsegv();
+		}
 	}
 	else userspace_pid[0] = mm_idp->u.pid;
 }
diff --git a/arch/um/os-Linux/skas/trap.c b/arch/um/os-Linux/skas/trap.c
deleted file mode 100644
index 3b1b924..0000000
--- a/arch/um/os-Linux/skas/trap.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#if 0
-#include "kern_util.h"
-#include "skas.h"
-#include "ptrace_user.h"
-#include "sysdep/ptrace_user.h"
-#endif
-
-#include <errno.h>
-#include <signal.h>
-#include "sysdep/ptrace.h"
-#include "kern_constants.h"
-#include "as-layout.h"
-#include "os.h"
-#include "sigcontext.h"
-#include "task.h"
-
-static struct uml_pt_regs ksig_regs[UM_NR_CPUS];
-
-void sig_handler_common_skas(int sig, void *sc_ptr)
-{
-	struct sigcontext *sc = sc_ptr;
-	struct uml_pt_regs *r;
-	void (*handler)(int, struct uml_pt_regs *);
-	int save_user, save_errno = errno;
-
-	/*
-	 * This is done because to allow SIGSEGV to be delivered inside a SEGV
-	 * handler.  This can happen in copy_user, and if SEGV is disabled,
-	 * the process will die.
-	 * XXX Figure out why this is better than SA_NODEFER
-	 */
-	if (sig == SIGSEGV) {
-		change_sig(SIGSEGV, 1);
-		/*
-		 * For segfaults, we want the data from the
-		 * sigcontext.  In this case, we don't want to mangle
-		 * the process registers, so use a static set of
-		 * registers.  For other signals, the process
-		 * registers are OK.
-		 */
-		r = &ksig_regs[cpu()];
-		copy_sc(r, sc_ptr);
-	}
-	else r = TASK_REGS(get_current());
-
-	save_user = r->is_user;
-	r->is_user = 0;
-	if ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) ||
-	    (sig == SIGILL) || (sig == SIGTRAP))
-		GET_FAULTINFO_FROM_SC(r->faultinfo, sc);
-
-	change_sig(SIGUSR1, 1);
-
-	handler = sig_info[sig];
-
-	/* unblock SIGVTALRM, SIGIO if sig isn't IRQ signal */
-	if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM))
-		unblock_signals();
-
-	handler(sig, r);
-
-	errno = save_errno;
-	r->is_user = save_user;
-}
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 7b81f6c..b616e15 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -60,10 +60,11 @@
 		 * the UML code itself.
 		 */
 		ret = 2;
-	_exit(ret);
+
+	exit(ret);
 }
 
-static void fatal_perror(char *str)
+static void fatal_perror(const char *str)
 {
 	perror(str);
 	exit(1);
@@ -341,6 +342,8 @@
 
 void __init os_early_checks(void)
 {
+	int pid;
+
 	/* Print out the core dump limits early */
 	check_coredump_limit();
 
@@ -350,6 +353,11 @@
 	 * kernel is running.
 	 */
 	check_tmpexec();
+
+	pid = start_ptraced_child();
+	if (init_registers(pid))
+		fatal("Failed to initialize default registers");
+	stop_ptraced_child(pid, 1, 1);
 }
 
 static int __init noprocmm_cmd_param(char *str, int* add)
@@ -411,7 +419,6 @@
 			non_fatal("found\n");
 	}
 
-	init_registers(pid);
 	stop_ptraced_child(pid, 1, 1);
 }
 
@@ -466,7 +473,7 @@
 	else non_fatal("found\n");
 }
 
-int can_do_skas(void)
+void can_do_skas(void)
 {
 	non_fatal("Checking for the skas3 patch in the host:\n");
 
@@ -476,8 +483,6 @@
 
 	if (!proc_mm || !ptrace_faultinfo || !ptrace_ldt)
 		skas_needs_stub = 1;
-
-	return 1;
 }
 
 int __init parse_iomem(char *str, int *add)
diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c
deleted file mode 100644
index 2a1c984..0000000
--- a/arch/um/os-Linux/trap.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <signal.h>
-#include "os.h"
-#include "sysdep/ptrace.h"
-
-/* Initialized from linux_main() */
-void (*sig_info[NSIG])(int, struct uml_pt_regs *);
-
-void os_fill_handlinfo(struct kern_handlers h)
-{
-	sig_info[SIGTRAP] = h.relay_signal;
-	sig_info[SIGFPE] = h.relay_signal;
-	sig_info[SIGILL] = h.relay_signal;
-	sig_info[SIGWINCH] = h.winch;
-	sig_info[SIGBUS] = h.bus_handler;
-	sig_info[SIGSEGV] = h.page_fault;
-	sig_info[SIGIO] = h.sigio_handler;
-	sig_info[SIGVTALRM] = h.timer_handler;
-}
diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c
index 4cfdd18..b09ff66 100644
--- a/arch/um/os-Linux/tty.c
+++ b/arch/um/os-Linux/tty.c
@@ -1,13 +1,16 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <stdlib.h>
+#include <unistd.h>
 #include <errno.h>
+#include <fcntl.h>
+#include "kern_constants.h"
+#include "kern_util.h"
 #include "os.h"
 #include "user.h"
-#include "kern_util.h"
 
 struct grantpt_info {
 	int fd;
@@ -26,36 +29,34 @@
 int get_pty(void)
 {
 	struct grantpt_info info;
-	int fd;
+	int fd, err;
 
-	fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0);
-	if(fd < 0){
-		printk("get_pty : Couldn't open /dev/ptmx - err = %d\n", -fd);
-		return(fd);
+	fd = open("/dev/ptmx", O_RDWR);
+	if (fd < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "get_pty : Couldn't open /dev/ptmx - "
+		       "err = %d\n", errno);
+		return err;
 	}
 
 	info.fd = fd;
 	initial_thread_cb(grantpt_cb, &info);
 
-	if(info.res < 0){
-		printk("get_pty : Couldn't grant pty - errno = %d\n", 
-		       -info.err);
-		return(-1);
+	if (info.res < 0) {
+		err = -info.err;
+		printk(UM_KERN_ERR "get_pty : Couldn't grant pty - "
+		       "errno = %d\n", -info.err);
+		goto out;
 	}
-	if(unlockpt(fd) < 0){
-		printk("get_pty : Couldn't unlock pty - errno = %d\n", errno);
-		return(-1);
-	}
-	return(fd);
-}
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+	if (unlockpt(fd) < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "get_pty : Couldn't unlock pty - "
+		       "errno = %d\n", errno);
+		goto out;
+	}
+	return fd;
+out:
+	close(fd);
+	return err;
+}
diff --git a/arch/um/os-Linux/tty_log.c b/arch/um/os-Linux/tty_log.c
index d11a55b..cc648e6 100644
--- a/arch/um/os-Linux/tty_log.c
+++ b/arch/um/os-Linux/tty_log.c
@@ -12,7 +12,6 @@
 #include <sys/time.h>
 #include "init.h"
 #include "user.h"
-#include "kern_util.h"
 #include "os.h"
 
 #define TTY_LOG_DIR "./"
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 3e058ce..a6f31d4 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -88,21 +88,6 @@
 		 host.release, host.version, host.machine);
 }
 
-int setjmp_wrapper(void (*proc)(void *, void *), ...)
-{
-	va_list args;
-	jmp_buf buf;
-	int n;
-
-	n = UML_SETJMP(&buf);
-	if(n == 0){
-		va_start(args, proc);
-		(*proc)(&buf, &args);
-	}
-	va_end(args);
-	return n;
-}
-
 void os_dump_core(void)
 {
 	int pid;
diff --git a/arch/um/sys-i386/bug.c b/arch/um/sys-i386/bug.c
index a4360b5..8d4f273 100644
--- a/arch/um/sys-i386/bug.c
+++ b/arch/um/sys-i386/bug.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/uaccess.h>
+#include <asm/errno.h>
 
 /* Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
  * that's not relevant in skas mode.
diff --git a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c
index 806895d..a74442d 100644
--- a/arch/um/sys-i386/bugs.c
+++ b/arch/um/sys-i386/bugs.c
@@ -3,171 +3,47 @@
  * Licensed under the GPL
  */
 
-#include <errno.h>
 #include <signal.h>
-#include <string.h>
 #include "kern_constants.h"
-#include "os.h"
+#include "kern_util.h"
+#include "longjmp.h"
 #include "task.h"
 #include "user.h"
-
-#define MAXTOKEN 64
+#include "sysdep/ptrace.h"
 
 /* Set during early boot */
 int host_has_cmov = 1;
-int host_has_xmm = 0;
+static jmp_buf cmov_test_return;
 
-static char token(int fd, char *buf, int len, char stop)
+static void cmov_sigill_test_handler(int sig)
 {
-	int n;
-	char *ptr, *end, c;
-
-	ptr = buf;
-	end = &buf[len];
-	do {
-		n = os_read_file(fd, ptr, sizeof(*ptr));
-		c = *ptr++;
-		if (n != sizeof(*ptr)) {
-			if (n == 0)
-				return 0;
-			printk(UM_KERN_ERR "Reading /proc/cpuinfo failed, "
-			       "err = %d\n", -n);
-			if (n < 0)
-				return n;
-			else return -EIO;
-		}
-	} while ((c != '\n') && (c != stop) && (ptr < end));
-
-	if (ptr == end) {
-		printk(UM_KERN_ERR "Failed to find '%c' in /proc/cpuinfo\n",
-		       stop);
-		return -1;
-	}
-	*(ptr - 1) = '\0';
-	return c;
-}
-
-static int find_cpuinfo_line(int fd, char *key, char *scratch, int len)
-{
-	int n;
-	char c;
-
-	scratch[len - 1] = '\0';
-	while (1) {
-		c = token(fd, scratch, len - 1, ':');
-		if (c <= 0)
-			return 0;
-		else if (c != ':') {
-			printk(UM_KERN_ERR "Failed to find ':' in "
-			       "/proc/cpuinfo\n");
-			return 0;
-		}
-
-		if (!strncmp(scratch, key, strlen(key)))
-			return 1;
-
-		do {
-			n = os_read_file(fd, &c, sizeof(c));
-			if (n != sizeof(c)) {
-				printk(UM_KERN_ERR "Failed to find newline in "
-				       "/proc/cpuinfo, err = %d\n", -n);
-				return 0;
-			}
-		} while (c != '\n');
-	}
-	return 0;
-}
-
-static int check_cpu_flag(char *feature, int *have_it)
-{
-	char buf[MAXTOKEN], c;
-	int fd, len = ARRAY_SIZE(buf);
-
-	printk(UM_KERN_INFO "Checking for host processor %s support...",
-	       feature);
-	fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0);
-	if (fd < 0) {
-		printk(UM_KERN_ERR "Couldn't open /proc/cpuinfo, err = %d\n",
-		       -fd);
-		return 0;
-	}
-
-	*have_it = 0;
-	if (!find_cpuinfo_line(fd, "flags", buf, ARRAY_SIZE(buf)))
-		goto out;
-
-	c = token(fd, buf, len - 1, ' ');
-	if (c < 0)
-		goto out;
-	else if (c != ' ') {
-		printk(UM_KERN_ERR "Failed to find ' ' in /proc/cpuinfo\n");
-		goto out;
-	}
-
-	while (1) {
-		c = token(fd, buf, len - 1, ' ');
-		if (c < 0)
-			goto out;
-		else if (c == '\n')
-			break;
-
-		if (!strcmp(buf, feature)) {
-			*have_it = 1;
-			goto out;
-		}
-	}
- out:
-	if (*have_it == 0)
-		printk("No\n");
-	else if (*have_it == 1)
-		printk("Yes\n");
-	os_close_file(fd);
-	return 1;
-}
-
-#if 0 /*
-       * This doesn't work in tt mode, plus it's causing compilation problems
-       * for some people.
-       */
-static void disable_lcall(void)
-{
-	struct modify_ldt_ldt_s ldt;
-	int err;
-
-	bzero(&ldt, sizeof(ldt));
-	ldt.entry_number = 7;
-	ldt.base_addr = 0;
-	ldt.limit = 0;
-	err = modify_ldt(1, &ldt, sizeof(ldt));
-	if (err)
-		printk(UM_KERN_ERR "Failed to disable lcall7 - errno = %d\n",
-		       errno);
-}
-#endif
-
-void arch_init_thread(void)
-{
-#if 0
-	disable_lcall();
-#endif
+	host_has_cmov = 0;
+	longjmp(cmov_test_return, 1);
 }
 
 void arch_check_bugs(void)
 {
-	int have_it;
+	struct sigaction old, new;
 
-	if (os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0) {
-		printk(UM_KERN_ERR "/proc/cpuinfo not available - skipping CPU "
-		       "capability checks\n");
-		return;
-	}
-	if (check_cpu_flag("cmov", &have_it))
-		host_has_cmov = have_it;
-	if (check_cpu_flag("xmm", &have_it))
-		host_has_xmm = have_it;
+	printk(UM_KERN_INFO "Checking for host processor cmov support...");
+	new.sa_handler = cmov_sigill_test_handler;
+
+	/* Make sure that SIGILL is enabled after the handler longjmps back */
+	new.sa_flags = SA_NODEFER;
+	sigemptyset(&new.sa_mask);
+	sigaction(SIGILL, &new, &old);
+
+	if (setjmp(cmov_test_return) == 0) {
+		unsigned long foo = 0;
+		__asm__ __volatile__("cmovz %0, %1" : "=r" (foo) : "0" (foo));
+		printk(UM_KERN_CONT "Yes\n");
+	} else
+		printk(UM_KERN_CONT "No\n");
+
+	sigaction(SIGILL, &old, &new);
 }
 
-int arch_handle_signal(int sig, struct uml_pt_regs *regs)
+void arch_examine_signal(int sig, struct uml_pt_regs *regs)
 {
 	unsigned char tmp[2];
 
@@ -176,24 +52,25 @@
 	 * SIGILL in init.
 	 */
 	if ((sig != SIGILL) || (TASK_PID(get_current()) != 1))
-		return 0;
+		return;
 
-	if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2))
-		panic("SIGILL in init, could not read instructions!\n");
+	if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) {
+		printk(UM_KERN_ERR "SIGILL in init, could not read "
+		       "instructions!\n");
+		return;
+	}
+
 	if ((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40))
-		return 0;
+		return;
 
 	if (host_has_cmov == 0)
-		panic("SIGILL caused by cmov, which this processor doesn't "
-		      "implement, boot a filesystem compiled for older "
-		      "processors");
+		printk(UM_KERN_ERR "SIGILL caused by cmov, which this "
+		       "processor doesn't implement.  Boot a filesystem "
+		       "compiled for older processors");
 	else if (host_has_cmov == 1)
-		panic("SIGILL caused by cmov, which this processor claims to "
-		      "implement");
-	else if (host_has_cmov == -1)
-		panic("SIGILL caused by cmov, couldn't tell if this processor "
-		      "implements it, boot a filesystem compiled for older "
-		      "processors");
-	else panic("Bad value for host_has_cmov (%d)", host_has_cmov);
-	return 0;
+		printk(UM_KERN_ERR "SIGILL caused by cmov, which this "
+		       "processor claims to implement");
+	else
+		printk(UM_KERN_ERR "Bad value for host_has_cmov (%d)",
+			host_has_cmov);
 }
diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
index 67c0958..a34263e 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/um/sys-i386/ldt.c
@@ -3,8 +3,9 @@
  * Licensed under the GPL
  */
 
-#include "linux/mm.h"
-#include "asm/unistd.h"
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/unistd.h>
 #include "os.h"
 #include "proc_mm.h"
 #include "skas.h"
@@ -146,7 +147,7 @@
 	if (ptrace_ldt)
 		return read_ldt_from_host(ptr, bytecount);
 
-	down(&ldt->semaphore);
+	mutex_lock(&ldt->lock);
 	if (ldt->entry_count <= LDT_DIRECT_ENTRIES) {
 		size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES;
 		if (size > bytecount)
@@ -170,7 +171,7 @@
 			ptr += size;
 		}
 	}
-	up(&ldt->semaphore);
+	mutex_unlock(&ldt->lock);
 
 	if (bytecount == 0 || err == -EFAULT)
 		goto out;
@@ -228,7 +229,7 @@
 	}
 
 	if (!ptrace_ldt)
-		down(&ldt->semaphore);
+		mutex_lock(&ldt->lock);
 
 	err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
 	if (err)
@@ -288,7 +289,7 @@
 	err = 0;
 
 out_unlock:
-	up(&ldt->semaphore);
+	mutex_unlock(&ldt->lock);
 out:
 	return err;
 }
@@ -395,7 +396,7 @@
 
 
 	if (!ptrace_ldt)
-		init_MUTEX(&new_mm->ldt.semaphore);
+		mutex_init(&new_mm->ldt.lock);
 
 	if (!from_mm) {
 		memset(&desc, 0, sizeof(desc));
@@ -455,7 +456,7 @@
 		 * i.e., we have to use the stub for modify_ldt, which
 		 * can't handle the big read buffer of up to 64kB.
 		 */
-		down(&from_mm->ldt.semaphore);
+		mutex_lock(&from_mm->ldt.lock);
 		if (from_mm->ldt.entry_count <= LDT_DIRECT_ENTRIES)
 			memcpy(new_mm->ldt.u.entries, from_mm->ldt.u.entries,
 			       sizeof(new_mm->ldt.u.entries));
@@ -474,7 +475,7 @@
 			}
 		}
 		new_mm->ldt.entry_count = from_mm->ldt.entry_count;
-		up(&from_mm->ldt.semaphore);
+		mutex_unlock(&from_mm->ldt.lock);
 	}
 
     out:
diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c
index bd3da8a..6b44999 100644
--- a/arch/um/sys-i386/ptrace.c
+++ b/arch/um/sys-i386/ptrace.c
@@ -8,11 +8,11 @@
 #include "asm/uaccess.h"
 #include "skas.h"
 
-extern int arch_switch_tls(struct task_struct *from, struct task_struct *to);
+extern int arch_switch_tls(struct task_struct *to);
 
-void arch_switch_to(struct task_struct *from, struct task_struct *to)
+void arch_switch_to(struct task_struct *to)
 {
-	int err = arch_switch_tls(from, to);
+	int err = arch_switch_tls(to);
 	if (!err)
 		return;
 
diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c
index 5cf97bc..0b10c3e 100644
--- a/arch/um/sys-i386/ptrace_user.c
+++ b/arch/um/sys-i386/ptrace_user.c
@@ -19,17 +19,3 @@
 		return -errno;
 	return 0;
 }
-
-int ptrace_getfpregs(long pid, unsigned long *regs)
-{
-	if (ptrace(PTRACE_GETFPREGS, pid, 0, regs) < 0)
-		return -errno;
-	return 0;
-}
-
-int ptrace_setfpregs(long pid, unsigned long *regs)
-{
-	if (ptrace(PTRACE_SETFPREGS, pid, 0, regs) < 0)
-		return -errno;
-	return 0;
-}
diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index 19053d4..fd0c25a 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -168,12 +168,13 @@
 			     struct sigcontext __user *from)
 {
 	struct sigcontext sc;
-	int err;
+	int err, pid;
 
 	err = copy_from_user(&sc, from, sizeof(sc));
 	if (err)
 		return err;
 
+	pid = userspace_pid[current_thread_info()->cpu];
 	copy_sc(&regs->regs, &sc);
 	if (have_fpx_regs) {
 		struct user_fxsr_struct fpx;
@@ -187,8 +188,7 @@
 		if (err)
 			return 1;
 
-		err = restore_fpx_registers(userspace_pid[current_thread->cpu],
-					    (unsigned long *) &fpx);
+		err = restore_fpx_registers(pid, (unsigned long *) &fpx);
 		if (err < 0) {
 			printk(KERN_ERR "copy_sc_from_user - "
 			       "restore_fpx_registers failed, errno = %d\n",
@@ -204,8 +204,7 @@
 		if (err)
 			return 1;
 
-		err = restore_fp_registers(userspace_pid[current_thread->cpu],
-					   (unsigned long *) &fp);
+		err = restore_fp_registers(pid, (unsigned long *) &fp);
 		if (err < 0) {
 			printk(KERN_ERR "copy_sc_from_user - "
 			       "restore_fp_registers failed, errno = %d\n",
@@ -223,7 +222,7 @@
 {
 	struct sigcontext sc;
 	struct faultinfo * fi = &current->thread.arch.faultinfo;
-	int err;
+	int err, pid;
 
 	sc.gs = REGS_GS(regs->regs.gp);
 	sc.fs = REGS_FS(regs->regs.gp);
@@ -249,11 +248,11 @@
 	to_fp = (to_fp ? to_fp : (struct _fpstate __user *) (to + 1));
 	sc.fpstate = to_fp;
 
+	pid = userspace_pid[current_thread_info()->cpu];
 	if (have_fpx_regs) {
 		struct user_fxsr_struct fpx;
 
-		err = save_fpx_registers(userspace_pid[current_thread->cpu],
-					 (unsigned long *) &fpx);
+		err = save_fpx_registers(pid, (unsigned long *) &fpx);
 		if (err < 0){
 			printk(KERN_ERR "copy_sc_to_user - save_fpx_registers "
 			       "failed, errno = %d\n", err);
@@ -276,8 +275,7 @@
 	else {
 		struct user_i387_struct fp;
 
-		err = save_fp_registers(userspace_pid[current_thread->cpu],
-					(unsigned long *) &fp);
+		err = save_fp_registers(pid, (unsigned long *) &fp);
 		if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
 			return 1;
 	}
diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S
index e730772..7699e89 100644
--- a/arch/um/sys-i386/stub.S
+++ b/arch/um/sys-i386/stub.S
@@ -7,7 +7,7 @@
 	.globl batch_syscall_stub
 batch_syscall_stub:
 	/* load pointer to first operation */
-	mov	$(ASM_STUB_DATA+8), %esp
+	mov	$(STUB_DATA+8), %esp
 
 again:
 	/* load length of additional data */
@@ -15,12 +15,12 @@
 
 	/* if(length == 0) : end of list */
 	/* write possible 0 to header */
-	mov	%eax, ASM_STUB_DATA+4
+	mov	%eax, STUB_DATA+4
 	cmpl	$0, %eax
 	jz	done
 
 	/* save current pointer */
-	mov	%esp, ASM_STUB_DATA+4
+	mov	%esp, STUB_DATA+4
 
 	/* skip additional data */
 	add	%eax, %esp
@@ -46,7 +46,7 @@
 
 done:
 	/* save return value */
-	mov	%eax, ASM_STUB_DATA
+	mov	%eax, STUB_DATA
 
 	/* stop */
 	int3
diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c
index b3999cb..28ccf73 100644
--- a/arch/um/sys-i386/stub_segv.c
+++ b/arch/um/sys-i386/stub_segv.c
@@ -1,32 +1,17 @@
 /*
- * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <signal.h>
-#include <sys/select.h> /* The only way I can see to get sigset_t */
-#include <asm/unistd.h>
-#include "as-layout.h"
-#include "uml-config.h"
 #include "sysdep/stub.h"
 #include "sysdep/sigcontext.h"
-#include "sysdep/faultinfo.h"
 
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_segv_handler(int sig)
 {
 	struct sigcontext *sc = (struct sigcontext *) (&sig + 1);
-	int pid;
 
 	GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA), sc);
 
-	pid = stub_syscall0(__NR_getpid);
-	stub_syscall2(__NR_kill, pid, SIGUSR1);
-
-	/* Load pointer to sigcontext into esp, since we need to leave
-	 * the stack in its original form when we do the sigreturn here, by
-	 * hand.
-	 */
-	__asm__ __volatile__("mov %0,%%esp ; movl %1, %%eax ; "
-			     "int $0x80" : : "a" (sc), "g" (__NR_sigreturn));
+	trap_myself();
 }
diff --git a/arch/um/sys-i386/sys_call_table.S b/arch/um/sys-i386/sys_call_table.S
index 12d4148..00e5f520 100644
--- a/arch/um/sys-i386/sys_call_table.S
+++ b/arch/um/sys-i386/sys_call_table.S
@@ -9,4 +9,9 @@
 
 #define old_mmap old_mmap_i386
 
+.section .rodata,"a"
+
 #include "../../x86/kernel/syscall_table_32.S"
+
+ENTRY(syscall_table_size)
+.long .-sys_call_table
diff --git a/arch/um/sys-i386/tls.c b/arch/um/sys-i386/tls.c
index fcaff86..c6c7131 100644
--- a/arch/um/sys-i386/tls.c
+++ b/arch/um/sys-i386/tls.c
@@ -26,6 +26,11 @@
 	cpu = get_cpu();
 	ret = os_set_thread_area(info, userspace_pid[cpu]);
 	put_cpu();
+
+	if (ret)
+		printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, "
+		       "index = %d\n", ret, info->entry_number);
+
 	return ret;
 }
 
@@ -37,6 +42,11 @@
 	cpu = get_cpu();
 	ret = os_get_thread_area(info, userspace_pid[cpu]);
 	put_cpu();
+
+	if (ret)
+		printk(KERN_ERR "PTRACE_GET_THREAD_AREA failed, err = %d, "
+		       "index = %d\n", ret, info->entry_number);
+
 	return ret;
 }
 
@@ -172,7 +182,7 @@
  * SKAS patch.
  */
 
-int arch_switch_tls(struct task_struct *from, struct task_struct *to)
+int arch_switch_tls(struct task_struct *to)
 {
 	if (!host_supports_tls)
 		return 0;
@@ -225,7 +235,8 @@
 }
 
 /* XXX: use do_get_thread_area to read the host value? I'm not at all sure! */
-static int get_tls_entry(struct task_struct* task, struct user_desc *info, int idx)
+static int get_tls_entry(struct task_struct *task, struct user_desc *info,
+			 int idx)
 {
 	struct thread_struct *t = &task->thread;
 
@@ -263,7 +274,7 @@
 	goto out;
 }
 
-asmlinkage int sys_set_thread_area(struct user_desc __user *user_desc)
+int sys_set_thread_area(struct user_desc __user *user_desc)
 {
 	struct user_desc info;
 	int idx, ret;
@@ -298,7 +309,7 @@
  * i386. However the only possible error are caused by bugs.
  */
 int ptrace_set_thread_area(struct task_struct *child, int idx,
-		struct user_desc __user *user_desc)
+			   struct user_desc __user *user_desc)
 {
 	struct user_desc info;
 
@@ -311,7 +322,7 @@
 	return set_tls_entry(child, &info, idx, 0);
 }
 
-asmlinkage int sys_get_thread_area(struct user_desc __user *user_desc)
+int sys_get_thread_area(struct user_desc __user *user_desc)
 {
 	struct user_desc info;
 	int idx, ret;
@@ -355,10 +366,9 @@
 	return ret;
 }
 
-
 /*
- * XXX: This part is probably common to i386 and x86-64. Don't create a common
- * file for now, do that when implementing x86-64 support.
+ * This code is really i386-only, but it detects and logs x86_64 GDT indexes
+ * if a 32-bit UML is running on a 64-bit host.
  */
 static int __init __setup_host_supports_tls(void)
 {
@@ -367,13 +377,16 @@
 		printk(KERN_INFO "Host TLS support detected\n");
 		printk(KERN_INFO "Detected host type: ");
 		switch (host_gdt_entry_tls_min) {
-			case GDT_ENTRY_TLS_MIN_I386:
-				printk("i386\n");
-				break;
-			case GDT_ENTRY_TLS_MIN_X86_64:
-				printk("x86_64\n");
-				break;
+		case GDT_ENTRY_TLS_MIN_I386:
+			printk(KERN_CONT "i386");
+			break;
+		case GDT_ENTRY_TLS_MIN_X86_64:
+			printk(KERN_CONT "x86_64");
+			break;
 		}
+		printk(KERN_CONT " (GDT indexes %d to %d)\n",
+		       host_gdt_entry_tls_min,
+		       host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES);
 	} else
 		printk(KERN_ERR "  Host TLS support NOT detected! "
 				"TLS support inside UML will not work\n");
diff --git a/arch/um/sys-ppc/Makefile b/arch/um/sys-ppc/Makefile
index a9814a7..0890152 100644
--- a/arch/um/sys-ppc/Makefile
+++ b/arch/um/sys-ppc/Makefile
@@ -6,7 +6,7 @@
 OBJS = ptrace.o sigcontext.o semaphore.o checksum.o miscthings.o misc.o \
 	ptrace_user.o sysrq.o
 
-EXTRA_AFLAGS := -DCONFIG_PPC32 -I. -I$(TOPDIR)/arch/ppc/kernel
+EXTRA_AFLAGS := -DCONFIG_PPC32 -I. -I$(srctree)/arch/ppc/kernel
 
 all: $(OBJ)
 
@@ -22,25 +22,25 @@
 
 semaphore.c:
 	rm -f $@
-	ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@
+	ln -s $(srctree)/arch/ppc/kernel/$@ $@
 
 checksum.S:
 	rm -f $@
-	ln -s $(TOPDIR)/arch/ppc/lib/$@ $@
+	ln -s $(srctree)/arch/ppc/lib/$@ $@
 
 mk_defs.c:
 	rm -f $@
-	ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@
+	ln -s $(srctree)/arch/ppc/kernel/$@ $@
 
 ppc_defs.head:
 	rm -f $@
-	ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@
+	ln -s $(srctree)/arch/ppc/kernel/$@ $@
 
 ppc_defs.h: mk_defs.c ppc_defs.head \
-		$(TOPDIR)/include/asm-ppc/mmu.h \
-		$(TOPDIR)/include/asm-ppc/processor.h \
-		$(TOPDIR)/include/asm-ppc/pgtable.h \
-		$(TOPDIR)/include/asm-ppc/ptrace.h
+		$(srctree)/include/asm-ppc/mmu.h \
+		$(srctree)/include/asm-ppc/processor.h \
+		$(srctree)/include/asm-ppc/pgtable.h \
+		$(srctree)/include/asm-ppc/ptrace.h
 #	$(CC) $(CFLAGS) -S mk_defs.c
 	cp ppc_defs.head ppc_defs.h
 # for bk, this way we can write to the file even if it's not checked out
@@ -56,13 +56,13 @@
 
 checksum.o: checksum.S
 	rm -f asm
-	ln -s $(TOPDIR)/include/asm-ppc asm
+	ln -s $(srctree)/include/asm-ppc asm
 	$(CC) $(EXTRA_AFLAGS) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
 	rm -f asm
 
 misc.o: misc.S ppc_defs.h
 	rm -f asm
-	ln -s $(TOPDIR)/include/asm-ppc asm
+	ln -s $(srctree)/include/asm-ppc asm
 	$(CC) $(EXTRA_AFLAGS) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
 	rm -f asm
 
diff --git a/arch/um/sys-x86_64/bug.c b/arch/um/sys-x86_64/bug.c
index a4360b5..e8034e3 100644
--- a/arch/um/sys-x86_64/bug.c
+++ b/arch/um/sys-x86_64/bug.c
@@ -5,7 +5,8 @@
 
 #include <linux/uaccess.h>
 
-/* Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
+/*
+ * Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
  * that's not relevant in skas mode.
  */
 
diff --git a/arch/um/sys-x86_64/bugs.c b/arch/um/sys-x86_64/bugs.c
index 506b676..44e02ba 100644
--- a/arch/um/sys-x86_64/bugs.c
+++ b/arch/um/sys-x86_64/bugs.c
@@ -6,15 +6,10 @@
 
 #include "sysdep/ptrace.h"
 
-void arch_init_thread(void)
-{
-}
-
 void arch_check_bugs(void)
 {
 }
 
-int arch_handle_signal(int sig, struct uml_pt_regs *regs)
+void arch_examine_signal(int sig, struct uml_pt_regs *regs)
 {
-	return 0;
 }
diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c
index b7631b0..f3458d7 100644
--- a/arch/um/sys-x86_64/ptrace.c
+++ b/arch/um/sys-x86_64/ptrace.c
@@ -5,13 +5,12 @@
  * Licensed under the GPL
  */
 
-#define __FRAME_OFFSETS
-#include <asm/ptrace.h>
+#include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/mm.h>
+#define __FRAME_OFFSETS
+#include <asm/ptrace.h>
 #include <asm/uaccess.h>
-#include <asm/elf.h>
 
 /*
  * determines which flags the user has access to.
@@ -24,12 +23,14 @@
 	unsigned long tmp;
 
 #ifdef TIF_IA32
-	/* Some code in the 64bit emulation may not be 64bit clean.
-	   Don't take any chances. */
+	/*
+	 * Some code in the 64bit emulation may not be 64bit clean.
+	 * Don't take any chances.
+	 */
 	if (test_tsk_thread_flag(child, TIF_IA32))
 		value &= 0xffffffff;
 #endif
-	switch (regno){
+	switch (regno) {
 	case FS:
 	case GS:
 	case DS:
@@ -66,7 +67,7 @@
 	if (addr < MAX_REG_OFFSET)
 		return putreg(child, addr, data);
 	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
-		(addr <= offsetof(struct user, u_debugreg[7]))){
+		(addr <= offsetof(struct user, u_debugreg[7]))) {
 		addr -= offsetof(struct user, u_debugreg[0]);
 		addr = addr >> 2;
 		if ((addr == 4) || (addr == 5))
@@ -108,11 +109,10 @@
 		return -EIO;
 
 	tmp = 0;  /* Default return condition */
-	if (addr < MAX_REG_OFFSET){
+	if (addr < MAX_REG_OFFSET)
 		tmp = getreg(child, addr);
-	}
 	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
-		(addr <= offsetof(struct user, u_debugreg[7]))){
+		(addr <= offsetof(struct user, u_debugreg[7]))) {
 		addr -= offsetof(struct user, u_debugreg[0]);
 		addr = addr >> 2;
 		tmp = child->thread.arch.debugregs[addr];
@@ -127,8 +127,9 @@
 	int n;
 
 	n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
-	if (n){
-		/* access_process_vm() grants access to vsyscall and stub,
+	if (n) {
+		/*
+		 * access_process_vm() grants access to vsyscall and stub,
 		 * while copy_from_user doesn't. Maybe access_process_vm is
 		 * slow, but that doesn't matter, since it will be called only
 		 * in case of singlestepping, if copy_from_user failed.
@@ -155,7 +156,7 @@
 		return err;
 
 	n = copy_to_user(buf, fpregs, sizeof(fpregs));
-	if(n > 0)
+	if (n > 0)
 		return -EFAULT;
 
 	return n;
diff --git a/arch/um/sys-x86_64/ptrace_user.c b/arch/um/sys-x86_64/ptrace_user.c
index b5f9c33..c57a496 100644
--- a/arch/um/sys-x86_64/ptrace_user.c
+++ b/arch/um/sys-x86_64/ptrace_user.c
@@ -4,55 +4,19 @@
  * Licensed under the GPL
  */
 
-#include <stddef.h>
 #include <errno.h>
 #include "ptrace_user.h"
-#include "user.h"
-#include "kern_constants.h"
 
 int ptrace_getregs(long pid, unsigned long *regs_out)
 {
-	if(ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0)
-		return(-errno);
-	return(0);
-}
-
-int ptrace_setregs(long pid, unsigned long *regs)
-{
-	if(ptrace(PTRACE_SETREGS, pid, 0, regs) < 0)
-		return(-errno);
-	return(0);
-}
-
-int ptrace_setfpregs(long pid, unsigned long *regs)
-{
-	if (ptrace(PTRACE_SETFPREGS, pid, 0, regs) < 0)
+	if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0)
 		return -errno;
-	return 0;
+	return(0);
 }
 
-void ptrace_pokeuser(unsigned long addr, unsigned long data)
+int ptrace_setregs(long pid, unsigned long *regs_out)
 {
-	panic("ptrace_pokeuser");
-}
-
-#define DS 184
-#define ES 192
-#define __USER_DS     0x2b
-
-void arch_enter_kernel(void *task, int pid)
-{
-}
-
-void arch_leave_kernel(void *task, int pid)
-{
-#ifdef UM_USER_CS
-        if(ptrace(PTRACE_POKEUSR, pid, CS, UM_USER_CS) < 0)
-                printk("POKEUSR CS failed");
-#endif
-
-        if(ptrace(PTRACE_POKEUSR, pid, DS, __USER_DS) < 0)
-                printk("POKEUSR DS failed");
-        if(ptrace(PTRACE_POKEUSR, pid, ES, __USER_DS) < 0)
-                printk("POKEUSR ES failed");
+	if (ptrace(PTRACE_SETREGS, pid, 0, regs_out) < 0)
+		return -errno;
+	return(0);
 }
diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c
index 1407018..1a899a7 100644
--- a/arch/um/sys-x86_64/signal.c
+++ b/arch/um/sys-x86_64/signal.c
@@ -81,7 +81,7 @@
 	if (err)
 		return 1;
 
-	err = restore_fp_registers(userspace_pid[current_thread->cpu],
+	err = restore_fp_registers(userspace_pid[current_thread_info()->cpu],
 				   (unsigned long *) &fp);
 	if (err < 0) {
 		printk(KERN_ERR "copy_sc_from_user - "
@@ -143,7 +143,7 @@
 	if (err)
 		return 1;
 
-	err = save_fp_registers(userspace_pid[current_thread->cpu],
+	err = save_fp_registers(userspace_pid[current_thread_info()->cpu],
 				(unsigned long *) &fp);
 	if (err < 0) {
 		printk(KERN_ERR "copy_sc_from_user - restore_fp_registers "
diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S
index 4afe204..5687687 100644
--- a/arch/um/sys-x86_64/stub.S
+++ b/arch/um/sys-x86_64/stub.S
@@ -8,18 +8,18 @@
 	/* We don't have 64-bit constants, so this constructs the address
 	 * we need.
 	 */
-	movq	$(ASM_STUB_DATA >> 32), %rbx
+	movq	$(STUB_DATA >> 32), %rbx
 	salq	$32, %rbx
-	movq	$(ASM_STUB_DATA & 0xffffffff), %rcx
+	movq	$(STUB_DATA & 0xffffffff), %rcx
 	or	%rcx, %rbx
 	movq	%rax, (%rbx)
 	int3
 
 	.globl batch_syscall_stub
 batch_syscall_stub:
-	mov	$(ASM_STUB_DATA >> 32), %rbx
+	mov	$(STUB_DATA >> 32), %rbx
 	sal	$32, %rbx
-	mov	$(ASM_STUB_DATA & 0xffffffff), %rax
+	mov	$(STUB_DATA & 0xffffffff), %rax
 	or	%rax, %rbx
 	/* load pointer to first operation */
 	mov	%rbx, %rsp
diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c
index 3afb590..ced051a 100644
--- a/arch/um/sys-x86_64/stub_segv.c
+++ b/arch/um/sys-x86_64/stub_segv.c
@@ -1,51 +1,22 @@
 /*
- * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <stddef.h>
 #include <signal.h>
-#include <asm/unistd.h>
 #include "as-layout.h"
-#include "uml-config.h"
-#include "sysdep/sigcontext.h"
-#include "sysdep/faultinfo.h"
 #include "sysdep/stub.h"
-
-/* Copied from sys-x86_64/signal.c - Can't find an equivalent definition
- * in the libc headers anywhere.
- */
-struct rt_sigframe
-{
-	char *pretcode;
-	struct ucontext uc;
-	struct siginfo info;
-};
-
-/* Copied here from <linux/kernel.h> - we're userspace. */
-#define container_of(ptr, type, member) ({                   \
-	const typeof( ((type *)0)->member ) *__mptr = (ptr); \
-	(type *)( (char *)__mptr - offsetof(type,member) );})
+#include "sysdep/faultinfo.h"
+#include "sysdep/sigcontext.h"
 
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_segv_handler(int sig)
 {
 	struct ucontext *uc;
-        int pid;
 
 	__asm__ __volatile__("movq %%rdx, %0" : "=g" (uc) :);
 	GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA),
 			      &uc->uc_mcontext);
-
-	pid = stub_syscall0(__NR_getpid);
-	stub_syscall2(__NR_kill, pid, SIGUSR1);
-
-	/* sys_sigreturn expects that the stack pointer will be 8 bytes into
-	 * the signal frame.  So, we use the ucontext pointer, which we know
-	 * already, to get the signal frame pointer, and add 8 to that.
-	 */
-	__asm__ __volatile__("movq %0, %%rsp; movq %1, %%rax ; syscall": :
-                             "g" ((unsigned long)
-                                  container_of(uc, struct rt_sigframe, uc) + 8),
-                             "g" (__NR_rt_sigreturn));
+	trap_myself();
 }
+
diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c
index 71b2ae4..c128eb8 100644
--- a/arch/um/sys-x86_64/syscall_table.c
+++ b/arch/um/sys-x86_64/syscall_table.c
@@ -1,5 +1,7 @@
-/* System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c
- * with some changes for UML. */
+/*
+ * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c
+ * with some changes for UML.
+ */
 
 #include <linux/linkage.h>
 #include <linux/sys.h>
@@ -8,22 +10,26 @@
 
 #define __NO_STUBS
 
-/* Below you can see, in terms of #define's, the differences between the x86-64
- * and the UML syscall table. */
+/*
+ * Below you can see, in terms of #define's, the differences between the x86-64
+ * and the UML syscall table.
+ */
 
 /* Not going to be implemented by UML, since we have no hardware. */
 #define stub_iopl sys_ni_syscall
 #define sys_ioperm sys_ni_syscall
 
-/* The UML TLS problem. Note that x86_64 does not implement this, so the below
- * is needed only for the ia32 compatibility. */
-/*#define sys_set_thread_area sys_ni_syscall
-#define sys_get_thread_area sys_ni_syscall*/
+/*
+ * The UML TLS problem. Note that x86_64 does not implement this, so the below
+ * is needed only for the ia32 compatibility.
+ */
 
 /* On UML we call it this way ("old" means it's not mmap2) */
 #define sys_mmap old_mmap
-/* On x86-64 sys_uname is actually sys_newuname plus a compatibility trick.
- * See arch/x86_64/kernel/sys_x86_64.c */
+/*
+ * On x86-64 sys_uname is actually sys_newuname plus a compatibility trick.
+ * See arch/x86_64/kernel/sys_x86_64.c
+ */
 #define sys_uname sys_uname64
 
 #define stub_clone sys_clone
@@ -46,8 +52,19 @@
 
 extern void sys_ni_syscall(void);
 
-sys_call_ptr_t sys_call_table[UM_NR_syscall_max+1] __cacheline_aligned = {
-	/* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */
-	[0 ... UM_NR_syscall_max] = &sys_ni_syscall,
+/*
+ * We used to have a trick here which made sure that holes in the
+ * x86_64 table were filled in with sys_ni_syscall, but a comment in
+ * unistd_64.h says that holes aren't allowed, so the trick was
+ * removed.
+ * The trick looked like this
+ *	[0 ... UM_NR_syscall_max] = &sys_ni_syscall
+ * before including unistd_64.h - the later initializations overwrote
+ * the sys_ni_syscall filler.
+ */
+
+sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
 #include <asm-x86/unistd_64.h>
 };
+
+int syscall_table_size = sizeof(sys_call_table);
diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c
index 86f6b18..f1199fd 100644
--- a/arch/um/sys-x86_64/syscalls.c
+++ b/arch/um/sys-x86_64/syscalls.c
@@ -48,7 +48,9 @@
 	switch (code) {
 	case ARCH_SET_FS:
 	case ARCH_SET_GS:
-		restore_registers(pid, &current->thread.regs.regs);
+		ret = restore_registers(pid, &current->thread.regs.regs);
+		if (ret)
+			return ret;
 		break;
 	case ARCH_GET_FS:
 	case ARCH_GET_GS:
@@ -70,10 +72,10 @@
 	switch (code) {
 	case ARCH_SET_FS:
 		current->thread.arch.fs = (unsigned long) ptr;
-		save_registers(pid, &current->thread.regs.regs);
+		ret = save_registers(pid, &current->thread.regs.regs);
 		break;
 	case ARCH_SET_GS:
-		save_registers(pid, &current->thread.regs.regs);
+		ret = save_registers(pid, &current->thread.regs.regs);
 		break;
 	case ARCH_GET_FS:
 		ret = put_user(tmp, addr);
@@ -105,7 +107,7 @@
 	return ret;
 }
 
-void arch_switch_to(struct task_struct *from, struct task_struct *to)
+void arch_switch_to(struct task_struct *to)
 {
 	if ((to->thread.arch.fs == 0) || (to->mm == NULL))
 		return;
diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/um/sys-x86_64/sysrq.c
index 7654440..f4f82be 100644
--- a/arch/um/sys-x86_64/sysrq.c
+++ b/arch/um/sys-x86_64/sysrq.c
@@ -4,32 +4,33 @@
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/utsname.h"
-#include "linux/module.h"
-#include "asm/current.h"
-#include "asm/ptrace.h"
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <asm/current.h>
+#include <asm/ptrace.h>
 #include "sysrq.h"
 
-void __show_regs(struct pt_regs * regs)
+void __show_regs(struct pt_regs *regs)
 {
 	printk("\n");
 	print_modules();
-	printk("Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
+	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
 		current->comm, print_tainted(), init_utsname()->release);
-	printk("RIP: %04lx:[<%016lx>] ", PT_REGS_CS(regs) & 0xffff,
+	printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff,
 	       PT_REGS_RIP(regs));
-	printk("\nRSP: %016lx  EFLAGS: %08lx\n", PT_REGS_RSP(regs),
+	printk(KERN_INFO "RSP: %016lx  EFLAGS: %08lx\n", PT_REGS_RSP(regs),
 	       PT_REGS_EFLAGS(regs));
-	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs));
-	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
+	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
 	       PT_REGS_RDX(regs), PT_REGS_RSI(regs), PT_REGS_RDI(regs));
-	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
+	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
 	       PT_REGS_RBP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs));
-	printk("R10: %016lx R11: %016lx R12: %016lx\n",
+	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
 	       PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs));
-	printk("R13: %016lx R14: %016lx R15: %016lx\n",
+	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
 	       PT_REGS_R13(regs), PT_REGS_R14(regs), PT_REGS_R15(regs));
 }
 
diff --git a/arch/um/sys-x86_64/um_module.c b/arch/um/sys-x86_64/um_module.c
index 8b8eff1..3dead39 100644
--- a/arch/um/sys-x86_64/um_module.c
+++ b/arch/um/sys-x86_64/um_module.c
@@ -1,7 +1,7 @@
 #include <linux/vmalloc.h>
 #include <linux/moduleloader.h>
 
-/*Copied from i386 arch/i386/kernel/module.c */
+/* Copied from i386 arch/i386/kernel/module.c */
 void *module_alloc(unsigned long size)
 {
 	if (size == 0)
@@ -13,7 +13,9 @@
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
+	/*
+	 * FIXME: If module_region == mod->init_region, trim exception
+	 * table entries.
+	 */
 }
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 59eef1c..4348211 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -465,6 +465,9 @@
 	  Calgary anyway, pass 'iommu=calgary' on the kernel command line.
 	  If unsure, say Y.
 
+config IOMMU_HELPER
+	def_bool (CALGARY_IOMMU || GART_IOMMU)
+
 # need this always selected by IOMMU for the VIA workaround
 config SWIOTLB
 	bool
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 0db0a62..8022d3c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -722,7 +722,9 @@
 	.quad sys_epoll_pwait
 	.quad compat_sys_utimensat	/* 320 */
 	.quad compat_sys_signalfd
-	.quad compat_sys_timerfd
+	.quad sys_timerfd_create
 	.quad sys_eventfd
 	.quad sys32_fallocate
+	.quad compat_sys_timerfd_settime	/* 325 */
+	.quad compat_sys_timerfd_gettime
 ia32_syscall_end:
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 1fe7f04..1b5464c 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -35,6 +35,7 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
+#include <linux/iommu-helper.h>
 #include <asm/gart.h>
 #include <asm/calgary.h>
 #include <asm/tce.h>
@@ -260,22 +261,28 @@
 	spin_unlock_irqrestore(&tbl->it_lock, flags);
 }
 
-static unsigned long iommu_range_alloc(struct iommu_table *tbl,
-	unsigned int npages)
+static unsigned long iommu_range_alloc(struct device *dev,
+				       struct iommu_table *tbl,
+				       unsigned int npages)
 {
 	unsigned long flags;
 	unsigned long offset;
+	unsigned long boundary_size;
+
+	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+			      PAGE_SIZE) >> PAGE_SHIFT;
 
 	BUG_ON(npages == 0);
 
 	spin_lock_irqsave(&tbl->it_lock, flags);
 
-	offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
-				       tbl->it_size, npages);
+	offset = iommu_area_alloc(tbl->it_map, tbl->it_size, tbl->it_hint,
+				  npages, 0, boundary_size, 0);
 	if (offset == ~0UL) {
 		tbl->chip_ops->tce_cache_blast(tbl);
-		offset = find_next_zero_string(tbl->it_map, 0,
-					       tbl->it_size, npages);
+
+		offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,
+					  npages, 0, boundary_size, 0);
 		if (offset == ~0UL) {
 			printk(KERN_WARNING "Calgary: IOMMU full.\n");
 			spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -286,7 +293,6 @@
 		}
 	}
 
-	set_bit_string(tbl->it_map, offset, npages);
 	tbl->it_hint = offset + npages;
 	BUG_ON(tbl->it_hint > tbl->it_size);
 
@@ -295,13 +301,13 @@
 	return offset;
 }
 
-static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
-	unsigned int npages, int direction)
+static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
+			      void *vaddr, unsigned int npages, int direction)
 {
 	unsigned long entry;
 	dma_addr_t ret = bad_dma_address;
 
-	entry = iommu_range_alloc(tbl, npages);
+	entry = iommu_range_alloc(dev, tbl, npages);
 
 	if (unlikely(entry == bad_dma_address))
 		goto error;
@@ -354,7 +360,7 @@
 			       badbit, tbl, dma_addr, entry, npages);
 	}
 
-	__clear_bit_string(tbl->it_map, entry, npages);
+	iommu_area_free(tbl->it_map, entry, npages);
 
 	spin_unlock_irqrestore(&tbl->it_lock, flags);
 }
@@ -438,7 +444,7 @@
 		vaddr = (unsigned long) sg_virt(s);
 		npages = num_dma_pages(vaddr, s->length);
 
-		entry = iommu_range_alloc(tbl, npages);
+		entry = iommu_range_alloc(dev, tbl, npages);
 		if (entry == bad_dma_address) {
 			/* makes sure unmap knows to stop */
 			s->dma_length = 0;
@@ -476,7 +482,7 @@
 	npages = num_dma_pages(uaddr, size);
 
 	if (translation_enabled(tbl))
-		dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
+		dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction);
 	else
 		dma_handle = virt_to_bus(vaddr);
 
@@ -516,7 +522,7 @@
 
 	if (translation_enabled(tbl)) {
 		/* set up tces to cover the allocated range */
-		mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
+		mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
 		if (mapping == bad_dma_address)
 			goto free;
 
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 845cbec..65f6acb 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -25,6 +25,7 @@
 #include <linux/bitops.h>
 #include <linux/kdebug.h>
 #include <linux/scatterlist.h>
+#include <linux/iommu-helper.h>
 #include <asm/atomic.h>
 #include <asm/io.h>
 #include <asm/mtrr.h>
@@ -82,17 +83,24 @@
 static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
 static int need_flush;		/* global flush state. set for each gart wrap */
 
-static unsigned long alloc_iommu(int size)
+static unsigned long alloc_iommu(struct device *dev, int size)
 {
 	unsigned long offset, flags;
+	unsigned long boundary_size;
+	unsigned long base_index;
+
+	base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
+			   PAGE_SIZE) >> PAGE_SHIFT;
+	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+			      PAGE_SIZE) >> PAGE_SHIFT;
 
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
-	offset = find_next_zero_string(iommu_gart_bitmap, next_bit,
-					iommu_pages, size);
+	offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
+				  size, base_index, boundary_size, 0);
 	if (offset == -1) {
 		need_flush = 1;
-		offset = find_next_zero_string(iommu_gart_bitmap, 0,
-						iommu_pages, size);
+		offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
+					  size, base_index, boundary_size, 0);
 	}
 	if (offset != -1) {
 		set_bit_string(iommu_gart_bitmap, offset, size);
@@ -114,7 +122,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
-	__clear_bit_string(iommu_gart_bitmap, offset, size);
+	iommu_area_free(iommu_gart_bitmap, offset, size);
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 }
 
@@ -235,7 +243,7 @@
 				size_t size, int dir)
 {
 	unsigned long npages = to_pages(phys_mem, size);
-	unsigned long iommu_page = alloc_iommu(npages);
+	unsigned long iommu_page = alloc_iommu(dev, npages);
 	int i;
 
 	if (iommu_page == -1) {
@@ -355,10 +363,11 @@
 }
 
 /* Map multiple scatterlist entries continuous into the first. */
-static int __dma_map_cont(struct scatterlist *start, int nelems,
-			  struct scatterlist *sout, unsigned long pages)
+static int __dma_map_cont(struct device *dev, struct scatterlist *start,
+			  int nelems, struct scatterlist *sout,
+			  unsigned long pages)
 {
-	unsigned long iommu_start = alloc_iommu(pages);
+	unsigned long iommu_start = alloc_iommu(dev, pages);
 	unsigned long iommu_page = iommu_start;
 	struct scatterlist *s;
 	int i;
@@ -394,8 +403,8 @@
 }
 
 static inline int
-dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout,
-	     unsigned long pages, int need)
+dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
+	     struct scatterlist *sout, unsigned long pages, int need)
 {
 	if (!need) {
 		BUG_ON(nelems != 1);
@@ -403,7 +412,7 @@
 		sout->dma_length = start->length;
 		return 0;
 	}
-	return __dma_map_cont(start, nelems, sout, pages);
+	return __dma_map_cont(dev, start, nelems, sout, pages);
 }
 
 /*
@@ -416,6 +425,8 @@
 	struct scatterlist *s, *ps, *start_sg, *sgmap;
 	int need = 0, nextneed, i, out, start;
 	unsigned long pages = 0;
+	unsigned int seg_size;
+	unsigned int max_seg_size;
 
 	if (nents == 0)
 		return 0;
@@ -426,6 +437,8 @@
 	out = 0;
 	start = 0;
 	start_sg = sgmap = sg;
+	seg_size = 0;
+	max_seg_size = dma_get_max_seg_size(dev);
 	ps = NULL; /* shut up gcc */
 	for_each_sg(sg, s, nents, i) {
 		dma_addr_t addr = sg_phys(s);
@@ -443,11 +456,13 @@
 			 * offset.
 			 */
 			if (!iommu_merge || !nextneed || !need || s->offset ||
+			    (s->length + seg_size > max_seg_size) ||
 			    (ps->offset + ps->length) % PAGE_SIZE) {
-				if (dma_map_cont(start_sg, i - start, sgmap,
-						  pages, need) < 0)
+				if (dma_map_cont(dev, start_sg, i - start,
+						 sgmap, pages, need) < 0)
 					goto error;
 				out++;
+				seg_size = 0;
 				sgmap = sg_next(sgmap);
 				pages = 0;
 				start = i;
@@ -455,11 +470,12 @@
 			}
 		}
 
+		seg_size += s->length;
 		need = nextneed;
 		pages += to_pages(s->offset, s->length);
 		ps = s;
 	}
-	if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0)
+	if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
 		goto error;
 	out++;
 	flush_gart();
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 8344c70..adff556 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -321,6 +321,8 @@
 	.long sys_epoll_pwait
 	.long sys_utimensat		/* 320 */
 	.long sys_signalfd
-	.long sys_timerfd
+	.long sys_timerfd_create
 	.long sys_eventfd
 	.long sys_fallocate
+	.long sys_timerfd_settime	/* 325 */
+	.long sys_timerfd_gettime
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f94a0b..cf53081 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1739,7 +1739,7 @@
 	if (bytes == 8) {
 		gpa_t gpa;
 		struct page *page;
-		char *addr;
+		char *kaddr;
 		u64 val;
 
 		down_read(&current->mm->mmap_sem);
@@ -1754,9 +1754,9 @@
 
 		val = *(u64 *)new;
 		page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-		addr = kmap_atomic(page, KM_USER0);
-		set_64bit((u64 *)(addr + offset_in_page(gpa)), val);
-		kunmap_atomic(addr, KM_USER0);
+		kaddr = kmap_atomic(page, KM_USER0);
+		set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
+		kunmap_atomic(kaddr, KM_USER0);
 		kvm_release_page_dirty(page);
 	emul_write:
 		up_read(&current->mm->mmap_sem);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 4876182..25df1c1 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -21,7 +21,7 @@
 
         lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
         lib-y += thunk_64.o clear_page_64.o copy_page_64.o
-        lib-y += bitstr_64.o bitops_64.o
+        lib-y += bitops_64.o
         lib-y += memmove_64.o memset_64.o
         lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
 endif
diff --git a/arch/x86/lib/bitstr_64.c b/arch/x86/lib/bitstr_64.c
deleted file mode 100644
index 7445caf..0000000
--- a/arch/x86/lib/bitstr_64.c
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <linux/module.h>
-#include <linux/bitops.h>
-
-/* Find string of zero bits in a bitmap */ 
-unsigned long 
-find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len)
-{ 
-	unsigned long n, end, i; 	
-
- again:
-	n = find_next_zero_bit(bitmap, nbits, start);
-	if (n == -1) 
-		return -1;
-	
-	/* could test bitsliced, but it's hardly worth it */
-	end = n+len;
-	if (end > nbits)
-		return -1; 
-	for (i = n+1; i < end; i++) { 
-		if (test_bit(i, bitmap)) {  
-			start = i+1; 
-			goto again; 
-		} 
-	}
-	return n;
-}
-
-EXPORT_SYMBOL(find_next_zero_string);
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index c7db504..6c19146 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -272,7 +272,7 @@
  * preallocate which never got a corresponding vma will need to be
  * freed manually.
  */
-static void pgd_mop_up_pmds(pgd_t *pgdp)
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
 {
 	int i;
 
@@ -285,7 +285,7 @@
 			pgdp[i] = native_make_pgd(0);
 
 			paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
-			pmd_free(pmd);
+			pmd_free(mm, pmd);
 		}
 	}
 }
@@ -313,7 +313,7 @@
 		pmd_t *pmd = pmd_alloc_one(mm, addr);
 
 		if (!pmd) {
-			pgd_mop_up_pmds(pgd);
+			pgd_mop_up_pmds(mm, pgd);
 			return 0;
 		}
 
@@ -333,7 +333,7 @@
 	return 1;
 }
 
-static void pgd_mop_up_pmds(pgd_t *pgd)
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
 {
 }
 #endif	/* CONFIG_X86_PAE */
@@ -352,9 +352,9 @@
 	return pgd;
 }
 
-void pgd_free(pgd_t *pgd)
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	pgd_mop_up_pmds(pgd);
+	pgd_mop_up_pmds(mm, pgd);
 	quicklist_free(0, pgd_dtor, pgd);
 }
 
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 42ba0e2..103b9df 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -72,7 +72,7 @@
 		}
 	}
 }
-
+EXPORT_SYMBOL(pcibios_align_resource);
 
 /*
  *  Handle resources of PCI devices.  If the world were perfect, we could
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 3f8a231..d74d9fb 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -52,6 +52,8 @@
 
 source "drivers/spi/Kconfig"
 
+source "drivers/gpio/Kconfig"
+
 source "drivers/w1/Kconfig"
 
 source "drivers/power/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 0ee9a8a..f1c11db 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -5,6 +5,7 @@
 # Rewritten to use lists instead of if-statements.
 #
 
+obj-$(CONFIG_HAVE_GPIO_LIB)	+= gpio/
 obj-$(CONFIG_PCI)		+= pci/
 obj-$(CONFIG_PARISC)		+= parisc/
 obj-$(CONFIG_RAPIDIO)		+= rapidio/
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index eb1f82f..199ea21 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -38,7 +38,7 @@
 #include <linux/dmi.h>
 #include <linux/moduleparam.h>
 #include <linux/sched.h>	/* need_resched() */
-#include <linux/latency.h>
+#include <linux/pm_qos_params.h>
 #include <linux/clockchips.h>
 #include <linux/cpuidle.h>
 
@@ -648,7 +648,8 @@
 	if (cx->promotion.state &&
 	    ((cx->promotion.state - pr->power.states) <= max_cstate)) {
 		if (sleep_ticks > cx->promotion.threshold.ticks &&
-		  cx->promotion.state->latency <= system_latency_constraint()) {
+		  cx->promotion.state->latency <=
+				pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
 			cx->promotion.count++;
 			cx->demotion.count = 0;
 			if (cx->promotion.count >=
@@ -692,7 +693,8 @@
 	 * or if the latency of the current state is unacceptable
 	 */
 	if ((pr->power.state - pr->power.states) > max_cstate ||
-		pr->power.state->latency > system_latency_constraint()) {
+		pr->power.state->latency >
+				pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
 		if (cx->demotion.state)
 			next_state = cx->demotion.state;
 	}
@@ -1200,7 +1202,7 @@
 		   "maximum allowed latency: %d usec\n",
 		   pr->power.state ? pr->power.state - pr->power.states : 0,
 		   max_cstate, (unsigned)pr->power.bm_activity,
-		   system_latency_constraint());
+		   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY));
 
 	seq_puts(seq, "states:\n");
 
@@ -1718,8 +1720,9 @@
 			       "ACPI: processor limited to max C-state %d\n",
 			       max_cstate);
 		first_run++;
-#if !defined (CONFIG_CPU_IDLE) && defined (CONFIG_SMP)
-		register_latency_notifier(&acpi_processor_latency_notifier);
+#if !defined(CONFIG_CPU_IDLE) && defined(CONFIG_SMP)
+		pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY,
+				&acpi_processor_latency_notifier);
 #endif
 	}
 
@@ -1806,7 +1809,8 @@
 		 */
 		cpu_idle_wait();
 #ifdef CONFIG_SMP
-		unregister_latency_notifier(&acpi_processor_latency_notifier);
+		pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY,
+				&acpi_processor_latency_notifier);
 #endif
 	}
 #endif
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index 96e614a..59e65ed 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -108,17 +108,6 @@
 	u8	cached_pirq_mask;
 };
 
-static int inic_slave_config(struct scsi_device *sdev)
-{
-	/* This controller is braindamaged.  dma_boundary is 0xffff
-	 * like others but it will lock up the whole machine HARD if
-	 * 65536 byte PRD entry is fed.  Reduce maximum segment size.
-	 */
-	blk_queue_max_segment_size(sdev->request_queue, 65536 - 512);
-
-	return ata_scsi_slave_config(sdev);
-}
-
 static struct scsi_host_template inic_sht = {
 	.module			= THIS_MODULE,
 	.name			= DRV_NAME,
@@ -132,7 +121,7 @@
 	.use_clustering		= ATA_SHT_USE_CLUSTERING,
 	.proc_name		= DRV_NAME,
 	.dma_boundary		= ATA_DMA_BOUNDARY,
-	.slave_configure	= inic_slave_config,
+	.slave_configure	= ata_scsi_slave_config,
 	.slave_destroy		= ata_scsi_slave_destroy,
 	.bios_param		= ata_std_bios_param,
 };
@@ -730,6 +719,18 @@
 		return rc;
 	}
 
+	/*
+	 * This controller is braindamaged.  dma_boundary is 0xffff
+	 * like others but it will lock up the whole machine HARD if
+	 * 65536 byte PRD entry is fed. Reduce maximum segment size.
+	 */
+	rc = pci_set_dma_max_seg_size(pdev, 65536 - 512);
+	if (rc) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "failed to set the maximum segment size.\n");
+		return rc;
+	}
+
 	rc = init_controller(iomap[MMIO_BAR], hpriv->cached_hctl);
 	if (rc) {
 		dev_printk(KERN_ERR, &pdev->dev,
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index 1375b53..3b28658 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -423,6 +423,7 @@
 		break;
 
 	default:
+		usb_free_urb(urb);
 		return -EILSEQ;
 	}
 
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index a18f9b8..7703d6e 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -704,7 +704,7 @@
 
 static int bt3c_config(struct pcmcia_device *link)
 {
-	static kio_addr_t base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
+	static unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
 	bt3c_info_t *info = link->priv;
 	tuple_t tuple;
 	u_short buf[256];
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index b786f61..58630cc 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -162,10 +162,8 @@
 	bt_cb(skb)->pkt_type = hdr[3];
 
 	err = hci_recv_frame(skb);
-	if (err < 0) {
-		kfree(skb);
+	if (err < 0)
 		return err;
-	}
 
 	sdio_writeb(data->func, 0x00, REG_PC_RRT, NULL);
 
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index dade162..68d1d25 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -634,7 +634,7 @@
 
 static int btuart_config(struct pcmcia_device *link)
 {
-	static kio_addr_t base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
+	static unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
 	btuart_info_t *info = link->priv;
 	tuple_t tuple;
 	u_short buf[256];
diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c
index 98a9cde..372c7ef6 100644
--- a/drivers/bluetooth/hci_usb.c
+++ b/drivers/bluetooth/hci_usb.c
@@ -111,6 +111,7 @@
 	{ USB_DEVICE(0x0a5c, 0x2033), .driver_info = HCI_IGNORE },
 
 	/* Broadcom BCM2035 */
+	{ USB_DEVICE(0x0a5c, 0x2035), .driver_info = HCI_RESET | HCI_WRONG_SCO_MTU },
 	{ USB_DEVICE(0x0a5c, 0x200a), .driver_info = HCI_RESET | HCI_WRONG_SCO_MTU },
 	{ USB_DEVICE(0x0a5c, 0x2009), .driver_info = HCI_BCM92035 },
 
diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index b83824c..c69f795 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -117,7 +117,8 @@
 	void (*free_by_type)(struct agp_memory *);
 	void *(*agp_alloc_page)(struct agp_bridge_data *);
 	void (*agp_destroy_page)(void *, int flags);
-        int (*agp_type_to_mask_type) (struct agp_bridge_data *, int);
+	int (*agp_type_to_mask_type) (struct agp_bridge_data *, int);
+	void (*chipset_flush)(struct agp_bridge_data *);
 };
 
 struct agp_bridge_data {
@@ -235,6 +236,9 @@
 #define I965_PGETBL_SIZE_512KB	(0 << 1)
 #define I965_PGETBL_SIZE_256KB	(1 << 1)
 #define I965_PGETBL_SIZE_128KB	(2 << 1)
+#define I965_PGETBL_SIZE_1MB	(3 << 1)
+#define I965_PGETBL_SIZE_2MB	(4 << 1)
+#define I965_PGETBL_SIZE_1_5MB	(5 << 1)
 #define G33_PGETBL_SIZE_MASK    (3 << 8)
 #define G33_PGETBL_SIZE_1M      (1 << 8)
 #define G33_PGETBL_SIZE_2M      (2 << 8)
diff --git a/drivers/char/agp/alpha-agp.c b/drivers/char/agp/alpha-agp.c
index aa8f3a3..e77c178 100644
--- a/drivers/char/agp/alpha-agp.c
+++ b/drivers/char/agp/alpha-agp.c
@@ -11,29 +11,28 @@
 
 #include "agp.h"
 
-static struct page *alpha_core_agp_vm_nopage(struct vm_area_struct *vma,
-					     unsigned long address,
-					     int *type)
+static int alpha_core_agp_vm_fault(struct vm_area_struct *vma,
+					struct vm_fault *vmf)
 {
 	alpha_agp_info *agp = agp_bridge->dev_private_data;
 	dma_addr_t dma_addr;
 	unsigned long pa;
 	struct page *page;
 
-	dma_addr = address - vma->vm_start + agp->aperture.bus_base;
+	dma_addr = (unsigned long)vmf->virtual_address - vma->vm_start
+						+ agp->aperture.bus_base;
 	pa = agp->ops->translate(agp, dma_addr);
 
 	if (pa == (unsigned long)-EINVAL)
-		return NULL;	/* no translation */
+		return VM_FAULT_SIGBUS;	/* no translation */
 
 	/*
 	 * Get the page, inc the use count, and return it
 	 */
 	page = virt_to_page(__va(pa));
 	get_page(page);
-	if (type)
-		*type = VM_FAULT_MINOR;
-	return page;
+	vmf->page = page;
+	return 0;
 }
 
 static struct aper_size_info_fixed alpha_core_agp_sizes[] =
@@ -42,7 +41,7 @@
 };
 
 struct vm_operations_struct alpha_core_agp_vm_ops = {
-	.nopage = alpha_core_agp_vm_nopage,
+	.fault = alpha_core_agp_vm_fault,
 };
 
 
diff --git a/drivers/char/agp/amd-k7-agp.c b/drivers/char/agp/amd-k7-agp.c
index 1405a42..87be464 100644
--- a/drivers/char/agp/amd-k7-agp.c
+++ b/drivers/char/agp/amd-k7-agp.c
@@ -436,10 +436,6 @@
 				return -ENODEV;
 			}
 			cap_ptr = pci_find_capability(gfxcard, PCI_CAP_ID_AGP);
-			if (!cap_ptr) {
-				pci_dev_put(gfxcard);
-				continue;
-			}
 		}
 
 		/* With so many variants of NVidia cards, it's simpler just
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 2720882..b1bdd01 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -43,7 +43,7 @@
  * fix some real stupidity. It's only by chance we can bump
  * past 0.99 at all due to some boolean logic error. */
 #define AGPGART_VERSION_MAJOR 0
-#define AGPGART_VERSION_MINOR 102
+#define AGPGART_VERSION_MINOR 103
 static const struct agp_version agp_current_version =
 {
 	.major = AGPGART_VERSION_MAJOR,
diff --git a/drivers/char/agp/compat_ioctl.c b/drivers/char/agp/compat_ioctl.c
index ecd4248..3927579 100644
--- a/drivers/char/agp/compat_ioctl.c
+++ b/drivers/char/agp/compat_ioctl.c
@@ -273,6 +273,10 @@
 	case AGPIOC_UNBIND32:
 		ret_val = compat_agpioc_unbind_wrap(curr_priv, (void __user *) arg);
 		break;
+
+	case AGPIOC_CHIPSET_FLUSH32:
+		ret_val = agpioc_chipset_flush_wrap(curr_priv);
+		break;
 	}
 
 ioctl_out:
diff --git a/drivers/char/agp/compat_ioctl.h b/drivers/char/agp/compat_ioctl.h
index 71939d6..0c9678a 100644
--- a/drivers/char/agp/compat_ioctl.h
+++ b/drivers/char/agp/compat_ioctl.h
@@ -39,6 +39,7 @@
 #define AGPIOC_DEALLOCATE32 _IOW (AGPIOC_BASE, 7, compat_int_t)
 #define AGPIOC_BIND32       _IOW (AGPIOC_BASE, 8, compat_uptr_t)
 #define AGPIOC_UNBIND32     _IOW (AGPIOC_BASE, 9, compat_uptr_t)
+#define AGPIOC_CHIPSET_FLUSH32 _IO (AGPIOC_BASE, 10)
 
 struct agp_info32 {
 	struct agp_version version;	/* version of the driver        */
@@ -101,5 +102,6 @@
 struct agp_memory *agp_allocate_memory_wrap(size_t pg_count, u32 type);
 struct agp_memory *agp_find_mem_by_key(int key);
 struct agp_client *agp_find_client_by_pid(pid_t id);
+int agpioc_chipset_flush_wrap(struct agp_file_private *priv);
 
 #endif /* _AGP_COMPAT_H */
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index 7791e98..55d7a82 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -689,7 +689,7 @@
 	set_bit(AGP_FF_ALLOW_CLIENT, &priv->access_flags);
 	priv->my_pid = current->pid;
 
-	if ((current->uid == 0) || (current->suid == 0)) {
+	if (capable(CAP_SYS_RAWIO)) {
 		/* Root priv, can be controller */
 		set_bit(AGP_FF_ALLOW_CONTROLLER, &priv->access_flags);
 	}
@@ -960,6 +960,13 @@
 	return agp_unbind_memory(memory);
 }
 
+int agpioc_chipset_flush_wrap(struct agp_file_private *priv)
+{
+	DBG("");
+	agp_flush_chipset(agp_bridge);
+	return 0;
+}
+
 static int agp_ioctl(struct inode *inode, struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
@@ -1033,6 +1040,10 @@
 	case AGPIOC_UNBIND:
 		ret_val = agpioc_unbind_wrap(curr_priv, (void __user *) arg);
 		break;
+	       
+	case AGPIOC_CHIPSET_FLUSH:
+		ret_val = agpioc_chipset_flush_wrap(curr_priv);
+		break;
 	}
 
 ioctl_out:
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 1a4674c..7484bc7 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -80,6 +80,13 @@
 	return -1;
 }
 
+void agp_flush_chipset(struct agp_bridge_data *bridge)
+{
+	if (bridge->driver->chipset_flush)
+		bridge->driver->chipset_flush(bridge);
+}
+EXPORT_SYMBOL(agp_flush_chipset);
+
 /*
  * Use kmalloc if possible for the page list. Otherwise fall back to
  * vmalloc. This speeds things up and also saves memory for small AGP
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 189efb6..eeea50a 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -14,8 +14,8 @@
 #define PCI_DEVICE_ID_INTEL_E7221_IG	0x258a
 #define PCI_DEVICE_ID_INTEL_82946GZ_HB      0x2970
 #define PCI_DEVICE_ID_INTEL_82946GZ_IG      0x2972
-#define PCI_DEVICE_ID_INTEL_82965G_1_HB     0x2980
-#define PCI_DEVICE_ID_INTEL_82965G_1_IG     0x2982
+#define PCI_DEVICE_ID_INTEL_82G35_HB     0x2980
+#define PCI_DEVICE_ID_INTEL_82G35_IG     0x2982
 #define PCI_DEVICE_ID_INTEL_82965Q_HB       0x2990
 #define PCI_DEVICE_ID_INTEL_82965Q_IG       0x2992
 #define PCI_DEVICE_ID_INTEL_82965G_HB       0x29A0
@@ -32,13 +32,24 @@
 #define PCI_DEVICE_ID_INTEL_Q35_IG          0x29B2
 #define PCI_DEVICE_ID_INTEL_Q33_HB          0x29D0
 #define PCI_DEVICE_ID_INTEL_Q33_IG          0x29D2
+#define PCI_DEVICE_ID_INTEL_IGD_HB          0x2A40
+#define PCI_DEVICE_ID_INTEL_IGD_IG          0x2A42
+
+/* cover 915 and 945 variants */
+#define IS_I915 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_E7221_HB || \
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB || \
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB || \
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB || \
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || \
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GME_HB)
 
 #define IS_I965 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82946GZ_HB || \
-                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_1_HB || \
-                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965Q_HB || \
-                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_HB || \
-                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965GM_HB || \
-                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965GME_HB)
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82G35_HB || \
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965Q_HB || \
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_HB || \
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965GM_HB || \
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965GME_HB || \
+		 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGD_HB)
 
 #define IS_G33 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G33_HB || \
 		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_Q35_HB || \
@@ -71,9 +82,11 @@
 #define I915_GMCH_GMS_STOLEN_64M	(0x7 << 4)
 #define G33_GMCH_GMS_STOLEN_128M       (0x8 << 4)
 #define G33_GMCH_GMS_STOLEN_256M       (0x9 << 4)
+#define I915_IFPADDR    0x60
 
 /* Intel 965G registers */
 #define I965_MSAC 0x62
+#define I965_IFPADDR    0x70
 
 /* Intel 7505 registers */
 #define INTEL_I7505_APSIZE	0x74
@@ -115,6 +128,13 @@
 	 * popup and for the GTT.
 	 */
 	int gtt_entries;			/* i830+ */
+	union {
+		void __iomem *i9xx_flush_page;
+		void *i8xx_flush_page;
+	};
+	struct page *i8xx_page;
+	struct resource ifp_resource;
+	int resource_valid;
 } intel_private;
 
 static int intel_i810_fetch_size(void)
@@ -204,7 +224,7 @@
 /* Exists to support ARGB cursors */
 static void *i8xx_alloc_pages(void)
 {
-	struct page * page;
+	struct page *page;
 
 	page = alloc_pages(GFP_KERNEL | GFP_DMA32, 2);
 	if (page == NULL)
@@ -433,7 +453,7 @@
 	static const int ddt[4] = { 0, 16, 32, 64 };
 	int size; /* reserved space (in kb) at the top of stolen memory */
 
-	pci_read_config_word(agp_bridge->dev,I830_GMCH_CTRL,&gmch_ctrl);
+	pci_read_config_word(agp_bridge->dev, I830_GMCH_CTRL, &gmch_ctrl);
 
 	if (IS_I965) {
 		u32 pgetbl_ctl;
@@ -453,6 +473,15 @@
 		case I965_PGETBL_SIZE_512KB:
 			size = 512;
 			break;
+		case I965_PGETBL_SIZE_1MB:
+			size = 1024;
+			break;
+		case I965_PGETBL_SIZE_2MB:
+			size = 2048;
+			break;
+		case I965_PGETBL_SIZE_1_5MB:
+			size = 1024 + 512;
+			break;
 		default:
 			printk(KERN_INFO PFX "Unknown page table size, "
 			       "assuming 512KB\n");
@@ -523,26 +552,14 @@
 			break;
 		case I915_GMCH_GMS_STOLEN_48M:
 			/* Check it's really I915G */
-			if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_E7221_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GME_HB ||
-			    IS_I965 || IS_G33)
+			if (IS_I915 || IS_I965 || IS_G33)
 				gtt_entries = MB(48) - KB(size);
 			else
 				gtt_entries = 0;
 			break;
 		case I915_GMCH_GMS_STOLEN_64M:
 			/* Check it's really I915G */
-			if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_E7221_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GME_HB ||
-			    IS_I965 || IS_G33)
+			if (IS_I915 || IS_I965 || IS_G33)
 				gtt_entries = MB(64) - KB(size);
 			else
 				gtt_entries = 0;
@@ -575,6 +592,45 @@
 	intel_private.gtt_entries = gtt_entries;
 }
 
+static void intel_i830_fini_flush(void)
+{
+	kunmap(intel_private.i8xx_page);
+	intel_private.i8xx_flush_page = NULL;
+	unmap_page_from_agp(intel_private.i8xx_page);
+
+	__free_page(intel_private.i8xx_page);
+	intel_private.i8xx_page = NULL;
+}
+
+static void intel_i830_setup_flush(void)
+{
+	/* return if we've already set the flush mechanism up */
+	if (intel_private.i8xx_page)
+		return;
+
+	intel_private.i8xx_page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA32);
+	if (!intel_private.i8xx_page)
+		return;
+
+	/* make page uncached */
+	map_page_into_agp(intel_private.i8xx_page);
+
+	intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page);
+	if (!intel_private.i8xx_flush_page)
+		intel_i830_fini_flush();
+}
+
+static void intel_i830_chipset_flush(struct agp_bridge_data *bridge)
+{
+	unsigned int *pg = intel_private.i8xx_flush_page;
+	int i;
+
+	for (i = 0; i < 256; i += 2)
+		*(pg + i) = i;
+
+	wmb();
+}
+
 /* The intel i830 automatically initializes the agp aperture during POST.
  * Use the memory already set aside for in the GTT.
  */
@@ -590,10 +646,10 @@
 	num_entries = size->num_entries;
 	agp_bridge->gatt_table_real = NULL;
 
-	pci_read_config_dword(intel_private.pcidev,I810_MMADDR,&temp);
+	pci_read_config_dword(intel_private.pcidev, I810_MMADDR, &temp);
 	temp &= 0xfff80000;
 
-	intel_private.registers = ioremap(temp,128 * 4096);
+	intel_private.registers = ioremap(temp, 128 * 4096);
 	if (!intel_private.registers)
 		return -ENOMEM;
 
@@ -633,7 +689,7 @@
 		return values[0].size;
 	}
 
-	pci_read_config_word(agp_bridge->dev,I830_GMCH_CTRL,&gmch_ctrl);
+	pci_read_config_word(agp_bridge->dev, I830_GMCH_CTRL, &gmch_ctrl);
 
 	if ((gmch_ctrl & I830_GMCH_MEM_MASK) == I830_GMCH_MEM_128M) {
 		agp_bridge->previous_size = agp_bridge->current_size = (void *) values;
@@ -657,12 +713,12 @@
 
 	current_size = A_SIZE_FIX(agp_bridge->current_size);
 
-	pci_read_config_dword(intel_private.pcidev,I810_GMADDR,&temp);
+	pci_read_config_dword(intel_private.pcidev, I810_GMADDR, &temp);
 	agp_bridge->gart_bus_addr = (temp & PCI_BASE_ADDRESS_MEM_MASK);
 
-	pci_read_config_word(agp_bridge->dev,I830_GMCH_CTRL,&gmch_ctrl);
+	pci_read_config_word(agp_bridge->dev, I830_GMCH_CTRL, &gmch_ctrl);
 	gmch_ctrl |= I830_GMCH_ENABLED;
-	pci_write_config_word(agp_bridge->dev,I830_GMCH_CTRL,gmch_ctrl);
+	pci_write_config_word(agp_bridge->dev, I830_GMCH_CTRL, gmch_ctrl);
 
 	writel(agp_bridge->gatt_bus_addr|I810_PGETBL_ENABLED, intel_private.registers+I810_PGETBL_CTL);
 	readl(intel_private.registers+I810_PGETBL_CTL);	/* PCI Posting. */
@@ -675,6 +731,8 @@
 	}
 
 	global_cache_flush();
+
+	intel_i830_setup_flush();
 	return 0;
 }
 
@@ -683,9 +741,10 @@
 	iounmap(intel_private.registers);
 }
 
-static int intel_i830_insert_entries(struct agp_memory *mem,off_t pg_start, int type)
+static int intel_i830_insert_entries(struct agp_memory *mem, off_t pg_start,
+				     int type)
 {
-	int i,j,num_entries;
+	int i, j, num_entries;
 	void *temp;
 	int ret = -EINVAL;
 	int mask_type;
@@ -697,10 +756,10 @@
 	num_entries = A_SIZE_FIX(temp)->num_entries;
 
 	if (pg_start < intel_private.gtt_entries) {
-		printk (KERN_DEBUG PFX "pg_start == 0x%.8lx,intel_private.gtt_entries == 0x%.8x\n",
-				pg_start,intel_private.gtt_entries);
+		printk(KERN_DEBUG PFX "pg_start == 0x%.8lx,intel_private.gtt_entries == 0x%.8x\n",
+				pg_start, intel_private.gtt_entries);
 
-		printk (KERN_INFO PFX "Trying to insert into local/stolen memory\n");
+		printk(KERN_INFO PFX "Trying to insert into local/stolen memory\n");
 		goto out_err;
 	}
 
@@ -738,8 +797,8 @@
 	return ret;
 }
 
-static int intel_i830_remove_entries(struct agp_memory *mem,off_t pg_start,
-				int type)
+static int intel_i830_remove_entries(struct agp_memory *mem, off_t pg_start,
+				     int type)
 {
 	int i;
 
@@ -747,7 +806,7 @@
 		return 0;
 
 	if (pg_start < intel_private.gtt_entries) {
-		printk (KERN_INFO PFX "Trying to disable local/stolen memory\n");
+		printk(KERN_INFO PFX "Trying to disable local/stolen memory\n");
 		return -EINVAL;
 	}
 
@@ -760,7 +819,7 @@
 	return 0;
 }
 
-static struct agp_memory *intel_i830_alloc_by_type(size_t pg_count,int type)
+static struct agp_memory *intel_i830_alloc_by_type(size_t pg_count, int type)
 {
 	if (type == AGP_PHYS_MEMORY)
 		return alloc_agpphysmem_i8xx(pg_count, type);
@@ -768,6 +827,95 @@
 	return NULL;
 }
 
+static int intel_alloc_chipset_flush_resource(void)
+{
+	int ret;
+	ret = pci_bus_alloc_resource(agp_bridge->dev->bus, &intel_private.ifp_resource, PAGE_SIZE,
+				     PAGE_SIZE, PCIBIOS_MIN_MEM, 0,
+				     pcibios_align_resource, agp_bridge->dev);
+
+	return ret;
+}
+
+static void intel_i915_setup_chipset_flush(void)
+{
+	int ret;
+	u32 temp;
+
+	pci_read_config_dword(agp_bridge->dev, I915_IFPADDR, &temp);
+	if (!(temp & 0x1)) {
+		intel_alloc_chipset_flush_resource();
+		intel_private.resource_valid = 1;
+		pci_write_config_dword(agp_bridge->dev, I915_IFPADDR, (intel_private.ifp_resource.start & 0xffffffff) | 0x1);
+	} else {
+		temp &= ~1;
+
+		intel_private.resource_valid = 1;
+		intel_private.ifp_resource.start = temp;
+		intel_private.ifp_resource.end = temp + PAGE_SIZE;
+		ret = request_resource(&iomem_resource, &intel_private.ifp_resource);
+		/* some BIOSes reserve this area in a pnp some don't */
+		if (ret)
+			intel_private.resource_valid = 0;
+	}
+}
+
+static void intel_i965_g33_setup_chipset_flush(void)
+{
+	u32 temp_hi, temp_lo;
+	int ret;
+
+	pci_read_config_dword(agp_bridge->dev, I965_IFPADDR + 4, &temp_hi);
+	pci_read_config_dword(agp_bridge->dev, I965_IFPADDR, &temp_lo);
+
+	if (!(temp_lo & 0x1)) {
+
+		intel_alloc_chipset_flush_resource();
+
+		intel_private.resource_valid = 1;
+		pci_write_config_dword(agp_bridge->dev, I965_IFPADDR + 4,
+			upper_32_bits(intel_private.ifp_resource.start));
+		pci_write_config_dword(agp_bridge->dev, I965_IFPADDR, (intel_private.ifp_resource.start & 0xffffffff) | 0x1);
+	} else {
+		u64 l64;
+
+		temp_lo &= ~0x1;
+		l64 = ((u64)temp_hi << 32) | temp_lo;
+
+		intel_private.resource_valid = 1;
+		intel_private.ifp_resource.start = l64;
+		intel_private.ifp_resource.end = l64 + PAGE_SIZE;
+		ret = request_resource(&iomem_resource, &intel_private.ifp_resource);
+		/* some BIOSes reserve this area in a pnp some don't */
+		if (ret)
+			intel_private.resource_valid = 0;
+	}
+}
+
+static void intel_i9xx_setup_flush(void)
+{
+	/* return if already configured */
+	if (intel_private.ifp_resource.start)
+		return;
+
+	/* setup a resource for this object */
+	intel_private.ifp_resource.name = "Intel Flush Page";
+	intel_private.ifp_resource.flags = IORESOURCE_MEM;
+
+	/* Setup chipset flush for 915 */
+	if (IS_I965 || IS_G33) {
+		intel_i965_g33_setup_chipset_flush();
+	} else {
+		intel_i915_setup_chipset_flush();
+	}
+
+	if (intel_private.ifp_resource.start) {
+		intel_private.i9xx_flush_page = ioremap_nocache(intel_private.ifp_resource.start, PAGE_SIZE);
+		if (!intel_private.i9xx_flush_page)
+			printk(KERN_INFO "unable to ioremap flush  page - no chipset flushing");
+	}
+}
+
 static int intel_i915_configure(void)
 {
 	struct aper_size_info_fixed *current_size;
@@ -781,9 +929,9 @@
 
 	agp_bridge->gart_bus_addr = (temp & PCI_BASE_ADDRESS_MEM_MASK);
 
-	pci_read_config_word(agp_bridge->dev,I830_GMCH_CTRL,&gmch_ctrl);
+	pci_read_config_word(agp_bridge->dev, I830_GMCH_CTRL, &gmch_ctrl);
 	gmch_ctrl |= I830_GMCH_ENABLED;
-	pci_write_config_word(agp_bridge->dev,I830_GMCH_CTRL,gmch_ctrl);
+	pci_write_config_word(agp_bridge->dev, I830_GMCH_CTRL, gmch_ctrl);
 
 	writel(agp_bridge->gatt_bus_addr|I810_PGETBL_ENABLED, intel_private.registers+I810_PGETBL_CTL);
 	readl(intel_private.registers+I810_PGETBL_CTL);	/* PCI Posting. */
@@ -796,19 +944,34 @@
 	}
 
 	global_cache_flush();
+
+	intel_i9xx_setup_flush();
+
 	return 0;
 }
 
 static void intel_i915_cleanup(void)
 {
+	if (intel_private.i9xx_flush_page)
+		iounmap(intel_private.i9xx_flush_page);
+	if (intel_private.resource_valid)
+		release_resource(&intel_private.ifp_resource);
+	intel_private.ifp_resource.start = 0;
+	intel_private.resource_valid = 0;
 	iounmap(intel_private.gtt);
 	iounmap(intel_private.registers);
 }
 
-static int intel_i915_insert_entries(struct agp_memory *mem,off_t pg_start,
-				int type)
+static void intel_i915_chipset_flush(struct agp_bridge_data *bridge)
 {
-	int i,j,num_entries;
+	if (intel_private.i9xx_flush_page)
+		writel(1, intel_private.i9xx_flush_page);
+}
+
+static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start,
+				     int type)
+{
+	int i, j, num_entries;
 	void *temp;
 	int ret = -EINVAL;
 	int mask_type;
@@ -820,10 +983,10 @@
 	num_entries = A_SIZE_FIX(temp)->num_entries;
 
 	if (pg_start < intel_private.gtt_entries) {
-		printk (KERN_DEBUG PFX "pg_start == 0x%.8lx,intel_private.gtt_entries == 0x%.8x\n",
-				pg_start,intel_private.gtt_entries);
+		printk(KERN_DEBUG PFX "pg_start == 0x%.8lx,intel_private.gtt_entries == 0x%.8x\n",
+				pg_start, intel_private.gtt_entries);
 
-		printk (KERN_INFO PFX "Trying to insert into local/stolen memory\n");
+		printk(KERN_INFO PFX "Trying to insert into local/stolen memory\n");
 		goto out_err;
 	}
 
@@ -861,8 +1024,8 @@
 	return ret;
 }
 
-static int intel_i915_remove_entries(struct agp_memory *mem,off_t pg_start,
-				int type)
+static int intel_i915_remove_entries(struct agp_memory *mem, off_t pg_start,
+				     int type)
 {
 	int i;
 
@@ -870,13 +1033,13 @@
 		return 0;
 
 	if (pg_start < intel_private.gtt_entries) {
-		printk (KERN_INFO PFX "Trying to disable local/stolen memory\n");
+		printk(KERN_INFO PFX "Trying to disable local/stolen memory\n");
 		return -EINVAL;
 	}
 
-	for (i = pg_start; i < (mem->page_count + pg_start); i++) {
+	for (i = pg_start; i < (mem->page_count + pg_start); i++)
 		writel(agp_bridge->scratch_page, intel_private.gtt+i);
-	}
+
 	readl(intel_private.gtt+i-1);
 
 	agp_bridge->driver->tlb_flush(mem);
@@ -923,7 +1086,7 @@
 	agp_bridge->gatt_table_real = NULL;
 
 	pci_read_config_dword(intel_private.pcidev, I915_MMADDR, &temp);
-	pci_read_config_dword(intel_private.pcidev, I915_PTEADDR,&temp2);
+	pci_read_config_dword(intel_private.pcidev, I915_PTEADDR, &temp2);
 
 	if (IS_G33)
 	    gtt_map_size = 1024 * 1024; /* 1M on G33 */
@@ -933,7 +1096,7 @@
 
 	temp &= 0xfff80000;
 
-	intel_private.registers = ioremap(temp,128 * 4096);
+	intel_private.registers = ioremap(temp, 128 * 4096);
 	if (!intel_private.registers) {
 		iounmap(intel_private.gtt);
 		return -ENOMEM;
@@ -980,6 +1143,7 @@
        struct aper_size_info_fixed *size;
        int num_entries;
        u32 temp;
+       int gtt_offset, gtt_size;
 
        size = agp_bridge->current_size;
        page_order = size->page_order;
@@ -989,13 +1153,18 @@
        pci_read_config_dword(intel_private.pcidev, I915_MMADDR, &temp);
 
        temp &= 0xfff00000;
-       intel_private.gtt = ioremap((temp + (512 * 1024)) , 512 * 1024);
 
-	if (!intel_private.gtt)
-		return -ENOMEM;
+       if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGD_HB)
+	       gtt_offset = gtt_size = MB(2);
+       else
+	       gtt_offset = gtt_size = KB(512);
 
+       intel_private.gtt = ioremap((temp + gtt_offset) , gtt_size);
 
-       intel_private.registers = ioremap(temp,128 * 4096);
+       if (!intel_private.gtt)
+	       return -ENOMEM;
+
+       intel_private.registers = ioremap(temp, 128 * 4096);
        if (!intel_private.registers) {
 		iounmap(intel_private.gtt);
 		return -ENOMEM;
@@ -1154,7 +1323,7 @@
 	/* the Intel 815 chipset spec. says that bits 29-31 in the
 	* ATTBASE register are reserved -> try not to write them */
 	if (agp_bridge->gatt_bus_addr & INTEL_815_ATTBASE_MASK) {
-		printk (KERN_EMERG PFX "gatt bus addr too high");
+		printk(KERN_EMERG PFX "gatt bus addr too high");
 		return -EINVAL;
 	}
 
@@ -1296,6 +1465,8 @@
 	pci_write_config_byte(agp_bridge->dev, INTEL_I845_AGPM, temp2 | (1 << 1));
 	/* clear any possible error conditions */
 	pci_write_config_word(agp_bridge->dev, INTEL_I845_ERRSTS, 0x001c);
+
+	intel_i830_setup_flush();
 	return 0;
 }
 
@@ -1552,6 +1723,7 @@
 	.agp_alloc_page		= agp_generic_alloc_page,
 	.agp_destroy_page	= agp_generic_destroy_page,
 	.agp_type_to_mask_type  = intel_i830_type_to_mask_type,
+	.chipset_flush		= intel_i830_chipset_flush,
 };
 
 static const struct agp_bridge_driver intel_820_driver = {
@@ -1648,6 +1820,7 @@
 	.agp_alloc_page		= agp_generic_alloc_page,
 	.agp_destroy_page	= agp_generic_destroy_page,
 	.agp_type_to_mask_type  = agp_generic_type_to_mask_type,
+	.chipset_flush		= intel_i830_chipset_flush,
 };
 
 static const struct agp_bridge_driver intel_850_driver = {
@@ -1721,6 +1894,7 @@
 	.agp_alloc_page		= agp_generic_alloc_page,
 	.agp_destroy_page	= agp_generic_destroy_page,
 	.agp_type_to_mask_type  = intel_i830_type_to_mask_type,
+	.chipset_flush		= intel_i915_chipset_flush,
 };
 
 static const struct agp_bridge_driver intel_i965_driver = {
@@ -1746,6 +1920,7 @@
        .agp_alloc_page         = agp_generic_alloc_page,
        .agp_destroy_page       = agp_generic_destroy_page,
        .agp_type_to_mask_type  = intel_i830_type_to_mask_type,
+	.chipset_flush		= intel_i915_chipset_flush,
 };
 
 static const struct agp_bridge_driver intel_7505_driver = {
@@ -1795,6 +1970,7 @@
 	.agp_alloc_page         = agp_generic_alloc_page,
 	.agp_destroy_page       = agp_generic_destroy_page,
 	.agp_type_to_mask_type  = intel_i830_type_to_mask_type,
+	.chipset_flush		= intel_i915_chipset_flush,
 };
 
 static int find_gmch(u16 device)
@@ -1804,7 +1980,7 @@
 	gmch_device = pci_get_device(PCI_VENDOR_ID_INTEL, device, NULL);
 	if (gmch_device && PCI_FUNC(gmch_device->devfn) != 0) {
 		gmch_device = pci_get_device(PCI_VENDOR_ID_INTEL,
-                                device, gmch_device);
+					     device, gmch_device);
 	}
 
 	if (!gmch_device)
@@ -1867,7 +2043,7 @@
 		NULL, &intel_915_driver },
 	{ PCI_DEVICE_ID_INTEL_82946GZ_HB, PCI_DEVICE_ID_INTEL_82946GZ_IG, 0, "946GZ",
 		NULL, &intel_i965_driver },
-	{ PCI_DEVICE_ID_INTEL_82965G_1_HB, PCI_DEVICE_ID_INTEL_82965G_1_IG, 0, "965G",
+	{ PCI_DEVICE_ID_INTEL_82G35_HB, PCI_DEVICE_ID_INTEL_82G35_IG, 0, "G35",
 		NULL, &intel_i965_driver },
 	{ PCI_DEVICE_ID_INTEL_82965Q_HB, PCI_DEVICE_ID_INTEL_82965Q_IG, 0, "965Q",
 		NULL, &intel_i965_driver },
@@ -1885,6 +2061,8 @@
 		NULL, &intel_g33_driver },
 	{ PCI_DEVICE_ID_INTEL_Q33_HB, PCI_DEVICE_ID_INTEL_Q33_IG, 0, "Q33",
 		NULL, &intel_g33_driver },
+	{ PCI_DEVICE_ID_INTEL_IGD_HB, PCI_DEVICE_ID_INTEL_IGD_IG, 0,
+	    "Intel Integrated Graphics Device", NULL, &intel_i965_driver },
 	{ 0, 0, 0, NULL, NULL, NULL }
 };
 
@@ -1924,7 +2102,7 @@
 	if (intel_agp_chipsets[i].name == NULL) {
 		if (cap_ptr)
 			printk(KERN_WARNING PFX "Unsupported Intel chipset"
-                               "(device id: %04x)\n", pdev->device);
+			       "(device id: %04x)\n", pdev->device);
 		agp_put_bridge(bridge);
 		return -ENODEV;
 	}
@@ -1937,7 +2115,7 @@
 				intel_agp_chipsets[i].gmch_chip_id);
 		agp_put_bridge(bridge);
 		return -ENODEV;
-        }
+	}
 
 	bridge->dev = pdev;
 	bridge->capndx = cap_ptr;
@@ -2067,7 +2245,7 @@
 	ID(PCI_DEVICE_ID_INTEL_82945GM_HB),
 	ID(PCI_DEVICE_ID_INTEL_82945GME_HB),
 	ID(PCI_DEVICE_ID_INTEL_82946GZ_HB),
-	ID(PCI_DEVICE_ID_INTEL_82965G_1_HB),
+	ID(PCI_DEVICE_ID_INTEL_82G35_HB),
 	ID(PCI_DEVICE_ID_INTEL_82965Q_HB),
 	ID(PCI_DEVICE_ID_INTEL_82965G_HB),
 	ID(PCI_DEVICE_ID_INTEL_82965GM_HB),
@@ -2075,6 +2253,7 @@
 	ID(PCI_DEVICE_ID_INTEL_G33_HB),
 	ID(PCI_DEVICE_ID_INTEL_Q35_HB),
 	ID(PCI_DEVICE_ID_INTEL_Q33_HB),
+	ID(PCI_DEVICE_ID_INTEL_IGD_HB),
 	{ }
 };
 
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 0118b98..84cdf90 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -234,11 +234,11 @@
 		   NULL);
 
 
-static void unregister_miscdev(void)
+static void unregister_miscdev(bool suspended)
 {
 	device_remove_file(rng_miscdev.this_device, &dev_attr_rng_available);
 	device_remove_file(rng_miscdev.this_device, &dev_attr_rng_current);
-	misc_deregister(&rng_miscdev);
+	__misc_deregister(&rng_miscdev, suspended);
 }
 
 static int register_miscdev(void)
@@ -313,7 +313,7 @@
 }
 EXPORT_SYMBOL_GPL(hwrng_register);
 
-void hwrng_unregister(struct hwrng *rng)
+void __hwrng_unregister(struct hwrng *rng, bool suspended)
 {
 	int err;
 
@@ -332,11 +332,11 @@
 		}
 	}
 	if (list_empty(&rng_list))
-		unregister_miscdev();
+		unregister_miscdev(suspended);
 
 	mutex_unlock(&rng_mutex);
 }
-EXPORT_SYMBOL_GPL(hwrng_unregister);
+EXPORT_SYMBOL_GPL(__hwrng_unregister);
 
 
 MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver");
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 71c8cd7..a39101f 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -232,8 +232,9 @@
 }
 
 /**
- *	misc_deregister - unregister a miscellaneous device
+ *	__misc_deregister - unregister a miscellaneous device
  *	@misc: device to unregister
+ *	@suspended: to be set if the function is used during suspend/resume
  *
  *	Unregister a miscellaneous device that was previously
  *	successfully registered with misc_register(). Success
@@ -241,7 +242,7 @@
  *	indicates an error.
  */
 
-int misc_deregister(struct miscdevice * misc)
+int __misc_deregister(struct miscdevice *misc, bool suspended)
 {
 	int i = misc->minor;
 
@@ -250,7 +251,11 @@
 
 	mutex_lock(&misc_mtx);
 	list_del(&misc->list);
-	device_destroy(misc_class, MKDEV(MISC_MAJOR, misc->minor));
+	if (suspended)
+		destroy_suspended_device(misc_class,
+					MKDEV(MISC_MAJOR, misc->minor));
+	else
+		device_destroy(misc_class, MKDEV(MISC_MAJOR, misc->minor));
 	if (i < DYNAMIC_MINORS && i>0) {
 		misc_minors[i>>3] &= ~(1 << (misc->minor & 7));
 	}
@@ -259,7 +264,7 @@
 }
 
 EXPORT_SYMBOL(misc_register);
-EXPORT_SYMBOL(misc_deregister);
+EXPORT_SYMBOL(__misc_deregister);
 
 static int __init misc_init(void)
 {
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 02518da..454d732 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -308,7 +308,8 @@
 	return (wcrcf / wbrcf);
 }
 
-static unsigned short io_read_num_rec_bytes(ioaddr_t iobase, unsigned short *s)
+static unsigned short io_read_num_rec_bytes(unsigned int iobase,
+					    unsigned short *s)
 {
 	unsigned short tmp;
 
@@ -426,7 +427,7 @@
 static void set_cardparameter(struct cm4000_dev *dev)
 {
 	int i;
-	ioaddr_t iobase = dev->p_dev->io.BasePort1;
+	unsigned int iobase = dev->p_dev->io.BasePort1;
 	u_int8_t stopbits = 0x02; /* ISO default */
 
 	DEBUGP(3, dev, "-> set_cardparameter\n");
@@ -459,7 +460,7 @@
 	unsigned short num_bytes_read;
 	unsigned char pts_reply[4];
 	ssize_t rc;
-	ioaddr_t iobase = dev->p_dev->io.BasePort1;
+	unsigned int iobase = dev->p_dev->io.BasePort1;
 
 	rc = 0;
 
@@ -610,7 +611,7 @@
 	return rc;
 }
 
-static int io_detect_cm4000(ioaddr_t iobase, struct cm4000_dev *dev)
+static int io_detect_cm4000(unsigned int iobase, struct cm4000_dev *dev)
 {
 
 	/* note: statemachine is assumed to be reset */
@@ -671,7 +672,7 @@
 static void monitor_card(unsigned long p)
 {
 	struct cm4000_dev *dev = (struct cm4000_dev *) p;
-	ioaddr_t iobase = dev->p_dev->io.BasePort1;
+	unsigned int iobase = dev->p_dev->io.BasePort1;
 	unsigned short s;
 	struct ptsreq ptsreq;
 	int i, atrc;
@@ -933,7 +934,7 @@
 			loff_t *ppos)
 {
 	struct cm4000_dev *dev = filp->private_data;
-	ioaddr_t iobase = dev->p_dev->io.BasePort1;
+	unsigned int iobase = dev->p_dev->io.BasePort1;
 	ssize_t rc;
 	int i, j, k;
 
@@ -1054,7 +1055,7 @@
 			 size_t count, loff_t *ppos)
 {
 	struct cm4000_dev *dev = (struct cm4000_dev *) filp->private_data;
-	ioaddr_t iobase = dev->p_dev->io.BasePort1;
+	unsigned int iobase = dev->p_dev->io.BasePort1;
 	unsigned short s;
 	unsigned char tmp;
 	unsigned char infolen;
@@ -1408,7 +1409,7 @@
 		     unsigned long arg)
 {
 	struct cm4000_dev *dev = filp->private_data;
-	ioaddr_t iobase = dev->p_dev->io.BasePort1;
+	unsigned int iobase = dev->p_dev->io.BasePort1;
 	struct pcmcia_device *link;
 	int size;
 	int rc;
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index d2fabe7..2a98d99 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -12,7 +12,7 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/notifier.h>
-#include <linux/latency.h>
+#include <linux/pm_qos_params.h>
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
 
@@ -265,7 +265,10 @@
 	.notifier_call = cpuidle_latency_notify,
 };
 
-#define latency_notifier_init(x) do { register_latency_notifier(x); } while (0)
+static inline void latency_notifier_init(struct notifier_block *n)
+{
+	pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
+}
 
 #else /* CONFIG_SMP */
 
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index eb666ec..ba7b9a6 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -14,7 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/cpuidle.h>
-#include <linux/latency.h>
+#include <linux/pm_qos_params.h>
 #include <linux/moduleparam.h>
 #include <linux/jiffies.h>
 
@@ -81,7 +81,8 @@
 	/* consider promotion */
 	if (last_idx < dev->state_count - 1 &&
 	    last_residency > last_state->threshold.promotion_time &&
-	    dev->states[last_idx + 1].exit_latency <= system_latency_constraint()) {
+	    dev->states[last_idx + 1].exit_latency <=
+			pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
 		last_state->stats.promotion_count++;
 		last_state->stats.demotion_count = 0;
 		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 299d45c..78d77c5 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -8,7 +8,7 @@
 
 #include <linux/kernel.h>
 #include <linux/cpuidle.h>
-#include <linux/latency.h>
+#include <linux/pm_qos_params.h>
 #include <linux/time.h>
 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
@@ -48,7 +48,7 @@
 			break;
 		if (s->target_residency > data->predicted_us)
 			break;
-		if (s->exit_latency > system_latency_constraint())
+		if (s->exit_latency > pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY))
 			break;
 	}
 
diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c
index 17502d6..07f274f 100644
--- a/drivers/dio/dio.c
+++ b/drivers/dio/dio.c
@@ -88,8 +88,6 @@
 #undef DIONAME
 #undef DIOFBNAME
 
-#define NUMNAMES (sizeof(names) / sizeof(struct dioname))
-
 static const char *unknowndioname 
         = "unknown DIO board -- please email <linux-m68k@lists.linux-m68k.org>!";
 
@@ -97,7 +95,7 @@
 {
         /* return pointer to a constant string describing the board with given ID */
 	unsigned int i;
-        for (i = 0; i < NUMNAMES; i++)
+	for (i = 0; i < ARRAY_SIZE(names); i++)
                 if (names[i].id == id) 
                         return names[i].name;
 
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
new file mode 100644
index 0000000..74fac0f
--- /dev/null
+++ b/drivers/gpio/Kconfig
@@ -0,0 +1,72 @@
+#
+# GPIO infrastructure and expanders
+#
+
+config HAVE_GPIO_LIB
+	bool
+	help
+	  Platforms select gpiolib if they use this infrastructure
+	  for all their GPIOs, usually starting with ones integrated
+	  into SOC processors.
+
+menu "GPIO Support"
+	depends on HAVE_GPIO_LIB
+
+config DEBUG_GPIO
+	bool "Debug GPIO calls"
+	depends on DEBUG_KERNEL
+	help
+	  Say Y here to add some extra checks and diagnostics to GPIO calls.
+	  The checks help ensure that GPIOs have been properly initialized
+	  before they are used and that sleeping calls aren not made from
+	  nonsleeping contexts.  They can make bitbanged serial protocols
+	  slower.  The diagnostics help catch the type of setup errors
+	  that are most common when setting up new platforms or boards.
+
+# put expanders in the right section, in alphabetical order
+
+comment "I2C GPIO expanders:"
+
+config GPIO_PCA9539
+	tristate "PCA9539 16-bit I/O port"
+	depends on I2C
+	help
+	  Say yes here to support the PCA9539 16-bit I/O port. These
+	  parts are made by NXP and TI.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called pca9539.
+
+config GPIO_PCF857X
+	tristate "PCF857x, PCA857x, and PCA967x I2C GPIO expanders"
+	depends on I2C
+	help
+	  Say yes here to provide access to most "quasi-bidirectional" I2C
+	  GPIO expanders used for additional digital outputs or inputs.
+	  Most of these parts are from NXP, though TI is a second source for
+	  some of them.  Compatible models include:
+
+	  8 bits:   pcf8574, pcf8574a, pca8574, pca8574a,
+	            pca9670, pca9672, pca9674, pca9674a
+
+	  16 bits:  pcf8575, pcf8575c, pca8575,
+	            pca9671, pca9673, pca9675
+
+	  Your board setup code will need to declare the expanders in
+	  use, and assign numbers to the GPIOs they expose.  Those GPIOs
+	  can then be used from drivers and other kernel code, just like
+	  other GPIOs, but only accessible from task contexts.
+
+	  This driver provides an in-kernel interface to those GPIOs using
+	  platform-neutral GPIO calls.
+
+comment "SPI GPIO expanders:"
+
+config GPIO_MCP23S08
+	tristate "Microchip MCP23S08 I/O expander"
+	depends on SPI_MASTER
+	help
+	  SPI driver for Microchip MCP23S08 I/O expander.  This provides
+	  a GPIO interface supporting inputs and outputs.
+
+endmenu
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
new file mode 100644
index 0000000..470ecd6
--- /dev/null
+++ b/drivers/gpio/Makefile
@@ -0,0 +1,9 @@
+# gpio support: dedicated expander chips, etc
+
+ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
+
+obj-$(CONFIG_HAVE_GPIO_LIB)	+= gpiolib.o
+
+obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
+obj-$(CONFIG_GPIO_PCA9539)	+= pca9539.o
+obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
new file mode 100644
index 0000000..d8db2f8
--- /dev/null
+++ b/drivers/gpio/gpiolib.c
@@ -0,0 +1,567 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+
+#include <asm/gpio.h>
+
+
+/* Optional implementation infrastructure for GPIO interfaces.
+ *
+ * Platforms may want to use this if they tend to use very many GPIOs
+ * that aren't part of a System-On-Chip core; or across I2C/SPI/etc.
+ *
+ * When kernel footprint or instruction count is an issue, simpler
+ * implementations may be preferred.  The GPIO programming interface
+ * allows for inlining speed-critical get/set operations for common
+ * cases, so that access to SOC-integrated GPIOs can sometimes cost
+ * only an instruction or two per bit.
+ */
+
+
+/* When debugging, extend minimal trust to callers and platform code.
+ * Also emit diagnostic messages that may help initial bringup, when
+ * board setup or driver bugs are most common.
+ *
+ * Otherwise, minimize overhead in what may be bitbanging codepaths.
+ */
+#ifdef	DEBUG
+#define	extra_checks	1
+#else
+#define	extra_checks	0
+#endif
+
+/* gpio_lock prevents conflicts during gpio_desc[] table updates.
+ * While any GPIO is requested, its gpio_chip is not removable;
+ * each GPIO's "requested" flag serves as a lock and refcount.
+ */
+static DEFINE_SPINLOCK(gpio_lock);
+
+struct gpio_desc {
+	struct gpio_chip	*chip;
+	unsigned long		flags;
+/* flag symbols are bit numbers */
+#define FLAG_REQUESTED	0
+#define FLAG_IS_OUT	1
+
+#ifdef CONFIG_DEBUG_FS
+	const char		*label;
+#endif
+};
+static struct gpio_desc gpio_desc[ARCH_NR_GPIOS];
+
+static inline void desc_set_label(struct gpio_desc *d, const char *label)
+{
+#ifdef CONFIG_DEBUG_FS
+	d->label = label;
+#endif
+}
+
+/* Warn when drivers omit gpio_request() calls -- legal but ill-advised
+ * when setting direction, and otherwise illegal.  Until board setup code
+ * and drivers use explicit requests everywhere (which won't happen when
+ * those calls have no teeth) we can't avoid autorequesting.  This nag
+ * message should motivate switching to explicit requests...
+ */
+static void gpio_ensure_requested(struct gpio_desc *desc)
+{
+	if (test_and_set_bit(FLAG_REQUESTED, &desc->flags) == 0) {
+		pr_warning("GPIO-%d autorequested\n", (int)(desc - gpio_desc));
+		desc_set_label(desc, "[auto]");
+	}
+}
+
+/* caller holds gpio_lock *OR* gpio is marked as requested */
+static inline struct gpio_chip *gpio_to_chip(unsigned gpio)
+{
+	return gpio_desc[gpio].chip;
+}
+
+/**
+ * gpiochip_add() - register a gpio_chip
+ * @chip: the chip to register, with chip->base initialized
+ * Context: potentially before irqs or kmalloc will work
+ *
+ * Returns a negative errno if the chip can't be registered, such as
+ * because the chip->base is invalid or already associated with a
+ * different chip.  Otherwise it returns zero as a success code.
+ */
+int gpiochip_add(struct gpio_chip *chip)
+{
+	unsigned long	flags;
+	int		status = 0;
+	unsigned	id;
+
+	/* NOTE chip->base negative is reserved to mean a request for
+	 * dynamic allocation.  We don't currently support that.
+	 */
+
+	if (chip->base < 0 || (chip->base  + chip->ngpio) >= ARCH_NR_GPIOS) {
+		status = -EINVAL;
+		goto fail;
+	}
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	/* these GPIO numbers must not be managed by another gpio_chip */
+	for (id = chip->base; id < chip->base + chip->ngpio; id++) {
+		if (gpio_desc[id].chip != NULL) {
+			status = -EBUSY;
+			break;
+		}
+	}
+	if (status == 0) {
+		for (id = chip->base; id < chip->base + chip->ngpio; id++) {
+			gpio_desc[id].chip = chip;
+			gpio_desc[id].flags = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+fail:
+	/* failures here can mean systems won't boot... */
+	if (status)
+		pr_err("gpiochip_add: gpios %d..%d (%s) not registered\n",
+			chip->base, chip->base + chip->ngpio,
+			chip->label ? : "generic");
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpiochip_add);
+
+/**
+ * gpiochip_remove() - unregister a gpio_chip
+ * @chip: the chip to unregister
+ *
+ * A gpio_chip with any GPIOs still requested may not be removed.
+ */
+int gpiochip_remove(struct gpio_chip *chip)
+{
+	unsigned long	flags;
+	int		status = 0;
+	unsigned	id;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	for (id = chip->base; id < chip->base + chip->ngpio; id++) {
+		if (test_bit(FLAG_REQUESTED, &gpio_desc[id].flags)) {
+			status = -EBUSY;
+			break;
+		}
+	}
+	if (status == 0) {
+		for (id = chip->base; id < chip->base + chip->ngpio; id++)
+			gpio_desc[id].chip = NULL;
+	}
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpiochip_remove);
+
+
+/* These "optional" allocation calls help prevent drivers from stomping
+ * on each other, and help provide better diagnostics in debugfs.
+ * They're called even less than the "set direction" calls.
+ */
+int gpio_request(unsigned gpio, const char *label)
+{
+	struct gpio_desc	*desc;
+	int			status = -EINVAL;
+	unsigned long		flags;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	if (gpio >= ARCH_NR_GPIOS)
+		goto done;
+	desc = &gpio_desc[gpio];
+	if (desc->chip == NULL)
+		goto done;
+
+	/* NOTE:  gpio_request() can be called in early boot,
+	 * before IRQs are enabled.
+	 */
+
+	if (test_and_set_bit(FLAG_REQUESTED, &desc->flags) == 0) {
+		desc_set_label(desc, label ? : "?");
+		status = 0;
+	} else
+		status = -EBUSY;
+
+done:
+	if (status)
+		pr_debug("gpio_request: gpio-%d (%s) status %d\n",
+			gpio, label ? : "?", status);
+	spin_unlock_irqrestore(&gpio_lock, flags);
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpio_request);
+
+void gpio_free(unsigned gpio)
+{
+	unsigned long		flags;
+	struct gpio_desc	*desc;
+
+	if (gpio >= ARCH_NR_GPIOS) {
+		WARN_ON(extra_checks);
+		return;
+	}
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	desc = &gpio_desc[gpio];
+	if (desc->chip && test_and_clear_bit(FLAG_REQUESTED, &desc->flags))
+		desc_set_label(desc, NULL);
+	else
+		WARN_ON(extra_checks);
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gpio_free);
+
+
+/**
+ * gpiochip_is_requested - return string iff signal was requested
+ * @chip: controller managing the signal
+ * @offset: of signal within controller's 0..(ngpio - 1) range
+ *
+ * Returns NULL if the GPIO is not currently requested, else a string.
+ * If debugfs support is enabled, the string returned is the label passed
+ * to gpio_request(); otherwise it is a meaningless constant.
+ *
+ * This function is for use by GPIO controller drivers.  The label can
+ * help with diagnostics, and knowing that the signal is used as a GPIO
+ * can help avoid accidentally multiplexing it to another controller.
+ */
+const char *gpiochip_is_requested(struct gpio_chip *chip, unsigned offset)
+{
+	unsigned gpio = chip->base + offset;
+
+	if (gpio >= ARCH_NR_GPIOS || gpio_desc[gpio].chip != chip)
+		return NULL;
+	if (test_bit(FLAG_REQUESTED, &gpio_desc[gpio].flags) == 0)
+		return NULL;
+#ifdef CONFIG_DEBUG_FS
+	return gpio_desc[gpio].label;
+#else
+	return "?";
+#endif
+}
+EXPORT_SYMBOL_GPL(gpiochip_is_requested);
+
+
+/* Drivers MUST set GPIO direction before making get/set calls.  In
+ * some cases this is done in early boot, before IRQs are enabled.
+ *
+ * As a rule these aren't called more than once (except for drivers
+ * using the open-drain emulation idiom) so these are natural places
+ * to accumulate extra debugging checks.  Note that we can't (yet)
+ * rely on gpio_request() having been called beforehand.
+ */
+
+int gpio_direction_input(unsigned gpio)
+{
+	unsigned long		flags;
+	struct gpio_chip	*chip;
+	struct gpio_desc	*desc = &gpio_desc[gpio];
+	int			status = -EINVAL;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	if (gpio >= ARCH_NR_GPIOS)
+		goto fail;
+	chip = desc->chip;
+	if (!chip || !chip->get || !chip->direction_input)
+		goto fail;
+	gpio -= chip->base;
+	if (gpio >= chip->ngpio)
+		goto fail;
+	gpio_ensure_requested(desc);
+
+	/* now we know the gpio is valid and chip won't vanish */
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	might_sleep_if(extra_checks && chip->can_sleep);
+
+	status = chip->direction_input(chip, gpio);
+	if (status == 0)
+		clear_bit(FLAG_IS_OUT, &desc->flags);
+	return status;
+fail:
+	spin_unlock_irqrestore(&gpio_lock, flags);
+	if (status)
+		pr_debug("%s: gpio-%d status %d\n",
+			__FUNCTION__, gpio, status);
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpio_direction_input);
+
+int gpio_direction_output(unsigned gpio, int value)
+{
+	unsigned long		flags;
+	struct gpio_chip	*chip;
+	struct gpio_desc	*desc = &gpio_desc[gpio];
+	int			status = -EINVAL;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	if (gpio >= ARCH_NR_GPIOS)
+		goto fail;
+	chip = desc->chip;
+	if (!chip || !chip->set || !chip->direction_output)
+		goto fail;
+	gpio -= chip->base;
+	if (gpio >= chip->ngpio)
+		goto fail;
+	gpio_ensure_requested(desc);
+
+	/* now we know the gpio is valid and chip won't vanish */
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	might_sleep_if(extra_checks && chip->can_sleep);
+
+	status = chip->direction_output(chip, gpio, value);
+	if (status == 0)
+		set_bit(FLAG_IS_OUT, &desc->flags);
+	return status;
+fail:
+	spin_unlock_irqrestore(&gpio_lock, flags);
+	if (status)
+		pr_debug("%s: gpio-%d status %d\n",
+			__FUNCTION__, gpio, status);
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpio_direction_output);
+
+
+/* I/O calls are only valid after configuration completed; the relevant
+ * "is this a valid GPIO" error checks should already have been done.
+ *
+ * "Get" operations are often inlinable as reading a pin value register,
+ * and masking the relevant bit in that register.
+ *
+ * When "set" operations are inlinable, they involve writing that mask to
+ * one register to set a low value, or a different register to set it high.
+ * Otherwise locking is needed, so there may be little value to inlining.
+ *
+ *------------------------------------------------------------------------
+ *
+ * IMPORTANT!!!  The hot paths -- get/set value -- assume that callers
+ * have requested the GPIO.  That can include implicit requesting by
+ * a direction setting call.  Marking a gpio as requested locks its chip
+ * in memory, guaranteeing that these table lookups need no more locking
+ * and that gpiochip_remove() will fail.
+ *
+ * REVISIT when debugging, consider adding some instrumentation to ensure
+ * that the GPIO was actually requested.
+ */
+
+/**
+ * __gpio_get_value() - return a gpio's value
+ * @gpio: gpio whose value will be returned
+ * Context: any
+ *
+ * This is used directly or indirectly to implement gpio_get_value().
+ * It returns the zero or nonzero value provided by the associated
+ * gpio_chip.get() method; or zero if no such method is provided.
+ */
+int __gpio_get_value(unsigned gpio)
+{
+	struct gpio_chip	*chip;
+
+	chip = gpio_to_chip(gpio);
+	WARN_ON(extra_checks && chip->can_sleep);
+	return chip->get ? chip->get(chip, gpio - chip->base) : 0;
+}
+EXPORT_SYMBOL_GPL(__gpio_get_value);
+
+/**
+ * __gpio_set_value() - assign a gpio's value
+ * @gpio: gpio whose value will be assigned
+ * @value: value to assign
+ * Context: any
+ *
+ * This is used directly or indirectly to implement gpio_set_value().
+ * It invokes the associated gpio_chip.set() method.
+ */
+void __gpio_set_value(unsigned gpio, int value)
+{
+	struct gpio_chip	*chip;
+
+	chip = gpio_to_chip(gpio);
+	WARN_ON(extra_checks && chip->can_sleep);
+	chip->set(chip, gpio - chip->base, value);
+}
+EXPORT_SYMBOL_GPL(__gpio_set_value);
+
+/**
+ * __gpio_cansleep() - report whether gpio value access will sleep
+ * @gpio: gpio in question
+ * Context: any
+ *
+ * This is used directly or indirectly to implement gpio_cansleep().  It
+ * returns nonzero if access reading or writing the GPIO value can sleep.
+ */
+int __gpio_cansleep(unsigned gpio)
+{
+	struct gpio_chip	*chip;
+
+	/* only call this on GPIOs that are valid! */
+	chip = gpio_to_chip(gpio);
+
+	return chip->can_sleep;
+}
+EXPORT_SYMBOL_GPL(__gpio_cansleep);
+
+
+
+/* There's no value in making it easy to inline GPIO calls that may sleep.
+ * Common examples include ones connected to I2C or SPI chips.
+ */
+
+int gpio_get_value_cansleep(unsigned gpio)
+{
+	struct gpio_chip	*chip;
+
+	might_sleep_if(extra_checks);
+	chip = gpio_to_chip(gpio);
+	return chip->get(chip, gpio - chip->base);
+}
+EXPORT_SYMBOL_GPL(gpio_get_value_cansleep);
+
+void gpio_set_value_cansleep(unsigned gpio, int value)
+{
+	struct gpio_chip	*chip;
+
+	might_sleep_if(extra_checks);
+	chip = gpio_to_chip(gpio);
+	chip->set(chip, gpio - chip->base, value);
+}
+EXPORT_SYMBOL_GPL(gpio_set_value_cansleep);
+
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+
+static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
+{
+	unsigned		i;
+	unsigned		gpio = chip->base;
+	struct gpio_desc	*gdesc = &gpio_desc[gpio];
+	int			is_out;
+
+	for (i = 0; i < chip->ngpio; i++, gpio++, gdesc++) {
+		if (!test_bit(FLAG_REQUESTED, &gdesc->flags))
+			continue;
+
+		is_out = test_bit(FLAG_IS_OUT, &gdesc->flags);
+		seq_printf(s, " gpio-%-3d (%-12s) %s %s",
+			gpio, gdesc->label,
+			is_out ? "out" : "in ",
+			chip->get
+				? (chip->get(chip, i) ? "hi" : "lo")
+				: "?  ");
+
+		if (!is_out) {
+			int		irq = gpio_to_irq(gpio);
+			struct irq_desc	*desc = irq_desc + irq;
+
+			/* This races with request_irq(), set_irq_type(),
+			 * and set_irq_wake() ... but those are "rare".
+			 *
+			 * More significantly, trigger type flags aren't
+			 * currently maintained by genirq.
+			 */
+			if (irq >= 0 && desc->action) {
+				char *trigger;
+
+				switch (desc->status & IRQ_TYPE_SENSE_MASK) {
+				case IRQ_TYPE_NONE:
+					trigger = "(default)";
+					break;
+				case IRQ_TYPE_EDGE_FALLING:
+					trigger = "edge-falling";
+					break;
+				case IRQ_TYPE_EDGE_RISING:
+					trigger = "edge-rising";
+					break;
+				case IRQ_TYPE_EDGE_BOTH:
+					trigger = "edge-both";
+					break;
+				case IRQ_TYPE_LEVEL_HIGH:
+					trigger = "level-high";
+					break;
+				case IRQ_TYPE_LEVEL_LOW:
+					trigger = "level-low";
+					break;
+				default:
+					trigger = "?trigger?";
+					break;
+				}
+
+				seq_printf(s, " irq-%d %s%s",
+					irq, trigger,
+					(desc->status & IRQ_WAKEUP)
+						? " wakeup" : "");
+			}
+		}
+
+		seq_printf(s, "\n");
+	}
+}
+
+static int gpiolib_show(struct seq_file *s, void *unused)
+{
+	struct gpio_chip	*chip = NULL;
+	unsigned		gpio;
+	int			started = 0;
+
+	/* REVISIT this isn't locked against gpio_chip removal ... */
+
+	for (gpio = 0; gpio < ARCH_NR_GPIOS; gpio++) {
+		if (chip == gpio_desc[gpio].chip)
+			continue;
+		chip = gpio_desc[gpio].chip;
+		if (!chip)
+			continue;
+
+		seq_printf(s, "%sGPIOs %d-%d, %s%s:\n",
+				started ? "\n" : "",
+				chip->base, chip->base + chip->ngpio - 1,
+				chip->label ? : "generic",
+				chip->can_sleep ? ", can sleep" : "");
+		started = 1;
+		if (chip->dbg_show)
+			chip->dbg_show(s, chip);
+		else
+			gpiolib_dbg_show(s, chip);
+	}
+	return 0;
+}
+
+static int gpiolib_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gpiolib_show, NULL);
+}
+
+static struct file_operations gpiolib_operations = {
+	.open		= gpiolib_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init gpiolib_debugfs_init(void)
+{
+	/* /sys/kernel/debug/gpio */
+	(void) debugfs_create_file("gpio", S_IFREG | S_IRUGO,
+				NULL, NULL, &gpiolib_operations);
+	return 0;
+}
+subsys_initcall(gpiolib_debugfs_init);
+
+#endif	/* DEBUG_FS */
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
new file mode 100644
index 0000000..bb60e8c
--- /dev/null
+++ b/drivers/gpio/mcp23s08.c
@@ -0,0 +1,357 @@
+/*
+ * mcp23s08.c - SPI gpio expander driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/mcp23s08.h>
+
+#include <asm/gpio.h>
+
+
+/* Registers are all 8 bits wide.
+ *
+ * The mcp23s17 has twice as many bits, and can be configured to work
+ * with either 16 bit registers or with two adjacent 8 bit banks.
+ *
+ * Also, there are I2C versions of both chips.
+ */
+#define MCP_IODIR	0x00		/* init/reset:  all ones */
+#define MCP_IPOL	0x01
+#define MCP_GPINTEN	0x02
+#define MCP_DEFVAL	0x03
+#define MCP_INTCON	0x04
+#define MCP_IOCON	0x05
+#	define IOCON_SEQOP	(1 << 5)
+#	define IOCON_HAEN	(1 << 3)
+#	define IOCON_ODR	(1 << 2)
+#	define IOCON_INTPOL	(1 << 1)
+#define MCP_GPPU	0x06
+#define MCP_INTF	0x07
+#define MCP_INTCAP	0x08
+#define MCP_GPIO	0x09
+#define MCP_OLAT	0x0a
+
+struct mcp23s08 {
+	struct spi_device	*spi;
+	u8			addr;
+
+	/* lock protects the cached values */
+	struct mutex		lock;
+	u8			cache[11];
+
+	struct gpio_chip	chip;
+
+	struct work_struct	work;
+};
+
+static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg)
+{
+	u8	tx[2], rx[1];
+	int	status;
+
+	tx[0] = mcp->addr | 0x01;
+	tx[1] = reg;
+	status = spi_write_then_read(mcp->spi, tx, sizeof tx, rx, sizeof rx);
+	return (status < 0) ? status : rx[0];
+}
+
+static int mcp23s08_write(struct mcp23s08 *mcp, unsigned reg, u8 val)
+{
+	u8	tx[3];
+
+	tx[0] = mcp->addr;
+	tx[1] = reg;
+	tx[2] = val;
+	return spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0);
+}
+
+static int
+mcp23s08_read_regs(struct mcp23s08 *mcp, unsigned reg, u8 *vals, unsigned n)
+{
+	u8	tx[2];
+
+	if ((n + reg) > sizeof mcp->cache)
+		return -EINVAL;
+	tx[0] = mcp->addr | 0x01;
+	tx[1] = reg;
+	return spi_write_then_read(mcp->spi, tx, sizeof tx, vals, n);
+}
+
+/*----------------------------------------------------------------------*/
+
+static int mcp23s08_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct mcp23s08	*mcp = container_of(chip, struct mcp23s08, chip);
+	int status;
+
+	mutex_lock(&mcp->lock);
+	mcp->cache[MCP_IODIR] |= (1 << offset);
+	status = mcp23s08_write(mcp, MCP_IODIR, mcp->cache[MCP_IODIR]);
+	mutex_unlock(&mcp->lock);
+	return status;
+}
+
+static int mcp23s08_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct mcp23s08	*mcp = container_of(chip, struct mcp23s08, chip);
+	int status;
+
+	mutex_lock(&mcp->lock);
+
+	/* REVISIT reading this clears any IRQ ... */
+	status = mcp23s08_read(mcp, MCP_GPIO);
+	if (status < 0)
+		status = 0;
+	else {
+		mcp->cache[MCP_GPIO] = status;
+		status = !!(status & (1 << offset));
+	}
+	mutex_unlock(&mcp->lock);
+	return status;
+}
+
+static int __mcp23s08_set(struct mcp23s08 *mcp, unsigned mask, int value)
+{
+	u8 olat = mcp->cache[MCP_OLAT];
+
+	if (value)
+		olat |= mask;
+	else
+		olat &= ~mask;
+	mcp->cache[MCP_OLAT] = olat;
+	return mcp23s08_write(mcp, MCP_OLAT, olat);
+}
+
+static void mcp23s08_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct mcp23s08	*mcp = container_of(chip, struct mcp23s08, chip);
+	u8 mask = 1 << offset;
+
+	mutex_lock(&mcp->lock);
+	__mcp23s08_set(mcp, mask, value);
+	mutex_unlock(&mcp->lock);
+}
+
+static int
+mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct mcp23s08	*mcp = container_of(chip, struct mcp23s08, chip);
+	u8 mask = 1 << offset;
+	int status;
+
+	mutex_lock(&mcp->lock);
+	status = __mcp23s08_set(mcp, mask, value);
+	if (status == 0) {
+		mcp->cache[MCP_IODIR] &= ~mask;
+		status = mcp23s08_write(mcp, MCP_IODIR, mcp->cache[MCP_IODIR]);
+	}
+	mutex_unlock(&mcp->lock);
+	return status;
+}
+
+/*----------------------------------------------------------------------*/
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/seq_file.h>
+
+/*
+ * This shows more info than the generic gpio dump code:
+ * pullups, deglitching, open drain drive.
+ */
+static void mcp23s08_dbg_show(struct seq_file *s, struct gpio_chip *chip)
+{
+	struct mcp23s08	*mcp;
+	char		bank;
+	unsigned	t;
+	unsigned	mask;
+
+	mcp = container_of(chip, struct mcp23s08, chip);
+
+	/* NOTE: we only handle one bank for now ... */
+	bank = '0' + ((mcp->addr >> 1) & 0x3);
+
+	mutex_lock(&mcp->lock);
+	t = mcp23s08_read_regs(mcp, 0, mcp->cache, sizeof mcp->cache);
+	if (t < 0) {
+		seq_printf(s, " I/O ERROR %d\n", t);
+		goto done;
+	}
+
+	for (t = 0, mask = 1; t < 8; t++, mask <<= 1) {
+		const char	*label;
+
+		label = gpiochip_is_requested(chip, t);
+		if (!label)
+			continue;
+
+		seq_printf(s, " gpio-%-3d P%c.%d (%-12s) %s %s %s",
+			chip->base + t, bank, t, label,
+			(mcp->cache[MCP_IODIR] & mask) ? "in " : "out",
+			(mcp->cache[MCP_GPIO] & mask) ? "hi" : "lo",
+			(mcp->cache[MCP_GPPU] & mask) ? "  " : "up");
+		/* NOTE:  ignoring the irq-related registers */
+		seq_printf(s, "\n");
+	}
+done:
+	mutex_unlock(&mcp->lock);
+}
+
+#else
+#define mcp23s08_dbg_show	NULL
+#endif
+
+/*----------------------------------------------------------------------*/
+
+static int mcp23s08_probe(struct spi_device *spi)
+{
+	struct mcp23s08			*mcp;
+	struct mcp23s08_platform_data	*pdata;
+	int				status;
+	int				do_update = 0;
+
+	pdata = spi->dev.platform_data;
+	if (!pdata || pdata->slave > 3 || !pdata->base)
+		return -ENODEV;
+
+	mcp = kzalloc(sizeof *mcp, GFP_KERNEL);
+	if (!mcp)
+		return -ENOMEM;
+
+	mutex_init(&mcp->lock);
+
+	mcp->spi = spi;
+	mcp->addr = 0x40 | (pdata->slave << 1);
+
+	mcp->chip.label = "mcp23s08",
+
+	mcp->chip.direction_input = mcp23s08_direction_input;
+	mcp->chip.get = mcp23s08_get;
+	mcp->chip.direction_output = mcp23s08_direction_output;
+	mcp->chip.set = mcp23s08_set;
+	mcp->chip.dbg_show = mcp23s08_dbg_show;
+
+	mcp->chip.base = pdata->base;
+	mcp->chip.ngpio = 8;
+	mcp->chip.can_sleep = 1;
+
+	spi_set_drvdata(spi, mcp);
+
+	/* verify MCP_IOCON.SEQOP = 0, so sequential reads work */
+	status = mcp23s08_read(mcp, MCP_IOCON);
+	if (status < 0)
+		goto fail;
+	if (status & IOCON_SEQOP) {
+		status &= ~IOCON_SEQOP;
+		status = mcp23s08_write(mcp, MCP_IOCON, (u8) status);
+		if (status < 0)
+			goto fail;
+	}
+
+	/* configure ~100K pullups */
+	status = mcp23s08_write(mcp, MCP_GPPU, pdata->pullups);
+	if (status < 0)
+		goto fail;
+
+	status = mcp23s08_read_regs(mcp, 0, mcp->cache, sizeof mcp->cache);
+	if (status < 0)
+		goto fail;
+
+	/* disable inverter on input */
+	if (mcp->cache[MCP_IPOL] != 0) {
+		mcp->cache[MCP_IPOL] = 0;
+		do_update = 1;
+	}
+
+	/* disable irqs */
+	if (mcp->cache[MCP_GPINTEN] != 0) {
+		mcp->cache[MCP_GPINTEN] = 0;
+		do_update = 1;
+	}
+
+	if (do_update) {
+		u8 tx[4];
+
+		tx[0] = mcp->addr;
+		tx[1] = MCP_IPOL;
+		memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2);
+		status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0);
+
+		/* FIXME check status... */
+	}
+
+	status = gpiochip_add(&mcp->chip);
+
+	/* NOTE:  these chips have a relatively sane IRQ framework, with
+	 * per-signal masking and level/edge triggering.  It's not yet
+	 * handled here...
+	 */
+
+	if (pdata->setup) {
+		status = pdata->setup(spi, mcp->chip.base,
+				mcp->chip.ngpio, pdata->context);
+		if (status < 0)
+			dev_dbg(&spi->dev, "setup --> %d\n", status);
+	}
+
+	return 0;
+
+fail:
+	kfree(mcp);
+	return status;
+}
+
+static int mcp23s08_remove(struct spi_device *spi)
+{
+	struct mcp23s08			*mcp = spi_get_drvdata(spi);
+	struct mcp23s08_platform_data	*pdata = spi->dev.platform_data;
+	int				status = 0;
+
+	if (pdata->teardown) {
+		status = pdata->teardown(spi,
+				mcp->chip.base, mcp->chip.ngpio,
+				pdata->context);
+		if (status < 0) {
+			dev_err(&spi->dev, "%s --> %d\n", "teardown", status);
+			return status;
+		}
+	}
+
+	status = gpiochip_remove(&mcp->chip);
+	if (status == 0)
+		kfree(mcp);
+	else
+		dev_err(&spi->dev, "%s --> %d\n", "remove", status);
+	return status;
+}
+
+static struct spi_driver mcp23s08_driver = {
+	.probe		= mcp23s08_probe,
+	.remove		= mcp23s08_remove,
+	.driver = {
+		.name	= "mcp23s08",
+		.owner	= THIS_MODULE,
+	},
+};
+
+/*----------------------------------------------------------------------*/
+
+static int __init mcp23s08_init(void)
+{
+	return spi_register_driver(&mcp23s08_driver);
+}
+module_init(mcp23s08_init);
+
+static void __exit mcp23s08_exit(void)
+{
+	spi_unregister_driver(&mcp23s08_driver);
+}
+module_exit(mcp23s08_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/gpio/pca9539.c b/drivers/gpio/pca9539.c
new file mode 100644
index 0000000..3e85c92
--- /dev/null
+++ b/drivers/gpio/pca9539.c
@@ -0,0 +1,271 @@
+/*
+ *  pca9539.c - 16-bit I/O port with interrupt and reset
+ *
+ *  Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com>
+ *  Copyright (C) 2007 Marvell International Ltd.
+ *
+ *  Derived from drivers/i2c/chips/pca9539.c
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/i2c/pca9539.h>
+
+#include <asm/gpio.h>
+
+
+#define NR_PCA9539_GPIOS	16
+
+#define PCA9539_INPUT		0
+#define PCA9539_OUTPUT		2
+#define PCA9539_INVERT		4
+#define PCA9539_DIRECTION	6
+
+struct pca9539_chip {
+	unsigned gpio_start;
+	uint16_t reg_output;
+	uint16_t reg_direction;
+
+	struct i2c_client *client;
+	struct gpio_chip gpio_chip;
+};
+
+/* NOTE:  we can't currently rely on fault codes to come from SMBus
+ * calls, so we map all errors to EIO here and return zero otherwise.
+ */
+static int pca9539_write_reg(struct pca9539_chip *chip, int reg, uint16_t val)
+{
+	if (i2c_smbus_write_word_data(chip->client, reg, val) < 0)
+		return -EIO;
+	else
+		return 0;
+}
+
+static int pca9539_read_reg(struct pca9539_chip *chip, int reg, uint16_t *val)
+{
+	int ret;
+
+	ret = i2c_smbus_read_word_data(chip->client, reg);
+	if (ret < 0) {
+		dev_err(&chip->client->dev, "failed reading register\n");
+		return -EIO;
+	}
+
+	*val = (uint16_t)ret;
+	return 0;
+}
+
+static int pca9539_gpio_direction_input(struct gpio_chip *gc, unsigned off)
+{
+	struct pca9539_chip *chip;
+	uint16_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+
+	reg_val = chip->reg_direction | (1u << off);
+	ret = pca9539_write_reg(chip, PCA9539_DIRECTION, reg_val);
+	if (ret)
+		return ret;
+
+	chip->reg_direction = reg_val;
+	return 0;
+}
+
+static int pca9539_gpio_direction_output(struct gpio_chip *gc,
+		unsigned off, int val)
+{
+	struct pca9539_chip *chip;
+	uint16_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+
+	/* set output level */
+	if (val)
+		reg_val = chip->reg_output | (1u << off);
+	else
+		reg_val = chip->reg_output & ~(1u << off);
+
+	ret = pca9539_write_reg(chip, PCA9539_OUTPUT, reg_val);
+	if (ret)
+		return ret;
+
+	chip->reg_output = reg_val;
+
+	/* then direction */
+	reg_val = chip->reg_direction & ~(1u << off);
+	ret = pca9539_write_reg(chip, PCA9539_DIRECTION, reg_val);
+	if (ret)
+		return ret;
+
+	chip->reg_direction = reg_val;
+	return 0;
+}
+
+static int pca9539_gpio_get_value(struct gpio_chip *gc, unsigned off)
+{
+	struct pca9539_chip *chip;
+	uint16_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+
+	ret = pca9539_read_reg(chip, PCA9539_INPUT, &reg_val);
+	if (ret < 0) {
+		/* NOTE:  diagnostic already emitted; that's all we should
+		 * do unless gpio_*_value_cansleep() calls become different
+		 * from their nonsleeping siblings (and report faults).
+		 */
+		return 0;
+	}
+
+	return (reg_val & (1u << off)) ? 1 : 0;
+}
+
+static void pca9539_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct pca9539_chip *chip;
+	uint16_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct pca9539_chip, gpio_chip);
+
+	if (val)
+		reg_val = chip->reg_output | (1u << off);
+	else
+		reg_val = chip->reg_output & ~(1u << off);
+
+	ret = pca9539_write_reg(chip, PCA9539_OUTPUT, reg_val);
+	if (ret)
+		return;
+
+	chip->reg_output = reg_val;
+}
+
+static int pca9539_init_gpio(struct pca9539_chip *chip)
+{
+	struct gpio_chip *gc;
+
+	gc = &chip->gpio_chip;
+
+	gc->direction_input  = pca9539_gpio_direction_input;
+	gc->direction_output = pca9539_gpio_direction_output;
+	gc->get = pca9539_gpio_get_value;
+	gc->set = pca9539_gpio_set_value;
+
+	gc->base = chip->gpio_start;
+	gc->ngpio = NR_PCA9539_GPIOS;
+	gc->label = "pca9539";
+
+	return gpiochip_add(gc);
+}
+
+static int __devinit pca9539_probe(struct i2c_client *client)
+{
+	struct pca9539_platform_data *pdata;
+	struct pca9539_chip *chip;
+	int ret;
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL)
+		return -ENODEV;
+
+	chip = kzalloc(sizeof(struct pca9539_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+
+	chip->client = client;
+
+	chip->gpio_start = pdata->gpio_base;
+
+	/* initialize cached registers from their original values.
+	 * we can't share this chip with another i2c master.
+	 */
+	ret = pca9539_read_reg(chip, PCA9539_OUTPUT, &chip->reg_output);
+	if (ret)
+		goto out_failed;
+
+	ret = pca9539_read_reg(chip, PCA9539_DIRECTION, &chip->reg_direction);
+	if (ret)
+		goto out_failed;
+
+	/* set platform specific polarity inversion */
+	ret = pca9539_write_reg(chip, PCA9539_INVERT, pdata->invert);
+	if (ret)
+		goto out_failed;
+
+	ret = pca9539_init_gpio(chip);
+	if (ret)
+		goto out_failed;
+
+	if (pdata->setup) {
+		ret = pdata->setup(client, chip->gpio_chip.base,
+				chip->gpio_chip.ngpio, pdata->context);
+		if (ret < 0)
+			dev_warn(&client->dev, "setup failed, %d\n", ret);
+	}
+
+	i2c_set_clientdata(client, chip);
+	return 0;
+
+out_failed:
+	kfree(chip);
+	return ret;
+}
+
+static int pca9539_remove(struct i2c_client *client)
+{
+	struct pca9539_platform_data *pdata = client->dev.platform_data;
+	struct pca9539_chip *chip = i2c_get_clientdata(client);
+	int ret = 0;
+
+	if (pdata->teardown) {
+		ret = pdata->teardown(client, chip->gpio_chip.base,
+				chip->gpio_chip.ngpio, pdata->context);
+		if (ret < 0) {
+			dev_err(&client->dev, "%s failed, %d\n",
+					"teardown", ret);
+			return ret;
+		}
+	}
+
+	ret = gpiochip_remove(&chip->gpio_chip);
+	if (ret) {
+		dev_err(&client->dev, "%s failed, %d\n",
+				"gpiochip_remove()", ret);
+		return ret;
+	}
+
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver pca9539_driver = {
+	.driver = {
+		.name	= "pca9539",
+	},
+	.probe		= pca9539_probe,
+	.remove		= pca9539_remove,
+};
+
+static int __init pca9539_init(void)
+{
+	return i2c_add_driver(&pca9539_driver);
+}
+module_init(pca9539_init);
+
+static void __exit pca9539_exit(void)
+{
+	i2c_del_driver(&pca9539_driver);
+}
+module_exit(pca9539_exit);
+
+MODULE_AUTHOR("eric miao <eric.miao@marvell.com>");
+MODULE_DESCRIPTION("GPIO expander driver for PCA9539");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c
new file mode 100644
index 0000000..c6b3b53
--- /dev/null
+++ b/drivers/gpio/pcf857x.c
@@ -0,0 +1,330 @@
+/*
+ * pcf857x - driver for pcf857x, pca857x, and pca967x I2C GPIO expanders
+ *
+ * Copyright (C) 2007 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/i2c/pcf857x.h>
+
+#include <asm/gpio.h>
+
+
+/*
+ * The pcf857x, pca857x, and pca967x chips only expose one read and one
+ * write register.  Writing a "one" bit (to match the reset state) lets
+ * that pin be used as an input; it's not an open-drain model, but acts
+ * a bit like one.  This is described as "quasi-bidirectional"; read the
+ * chip documentation for details.
+ *
+ * Many other I2C GPIO expander chips (like the pca953x models) have
+ * more complex register models and more conventional circuitry using
+ * push/pull drivers.  They often use the same 0x20..0x27 addresses as
+ * pcf857x parts, making the "legacy" I2C driver model problematic.
+ */
+struct pcf857x {
+	struct gpio_chip	chip;
+	struct i2c_client	*client;
+	unsigned		out;		/* software latch */
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* Talk to 8-bit I/O expander */
+
+static int pcf857x_input8(struct gpio_chip *chip, unsigned offset)
+{
+	struct pcf857x	*gpio = container_of(chip, struct pcf857x, chip);
+
+	gpio->out |= (1 << offset);
+	return i2c_smbus_write_byte(gpio->client, gpio->out);
+}
+
+static int pcf857x_get8(struct gpio_chip *chip, unsigned offset)
+{
+	struct pcf857x	*gpio = container_of(chip, struct pcf857x, chip);
+	s32		value;
+
+	value = i2c_smbus_read_byte(gpio->client);
+	return (value < 0) ? 0 : (value & (1 << offset));
+}
+
+static int pcf857x_output8(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct pcf857x	*gpio = container_of(chip, struct pcf857x, chip);
+	unsigned	bit = 1 << offset;
+
+	if (value)
+		gpio->out |= bit;
+	else
+		gpio->out &= ~bit;
+	return i2c_smbus_write_byte(gpio->client, gpio->out);
+}
+
+static void pcf857x_set8(struct gpio_chip *chip, unsigned offset, int value)
+{
+	pcf857x_output8(chip, offset, value);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* Talk to 16-bit I/O expander */
+
+static int i2c_write_le16(struct i2c_client *client, u16 word)
+{
+	u8 buf[2] = { word & 0xff, word >> 8, };
+	int status;
+
+	status = i2c_master_send(client, buf, 2);
+	return (status < 0) ? status : 0;
+}
+
+static int i2c_read_le16(struct i2c_client *client)
+{
+	u8 buf[2];
+	int status;
+
+	status = i2c_master_recv(client, buf, 2);
+	if (status < 0)
+		return status;
+	return (buf[1] << 8) | buf[0];
+}
+
+static int pcf857x_input16(struct gpio_chip *chip, unsigned offset)
+{
+	struct pcf857x	*gpio = container_of(chip, struct pcf857x, chip);
+
+	gpio->out |= (1 << offset);
+	return i2c_write_le16(gpio->client, gpio->out);
+}
+
+static int pcf857x_get16(struct gpio_chip *chip, unsigned offset)
+{
+	struct pcf857x	*gpio = container_of(chip, struct pcf857x, chip);
+	int		value;
+
+	value = i2c_read_le16(gpio->client);
+	return (value < 0) ? 0 : (value & (1 << offset));
+}
+
+static int pcf857x_output16(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct pcf857x	*gpio = container_of(chip, struct pcf857x, chip);
+	unsigned	bit = 1 << offset;
+
+	if (value)
+		gpio->out |= bit;
+	else
+		gpio->out &= ~bit;
+	return i2c_write_le16(gpio->client, gpio->out);
+}
+
+static void pcf857x_set16(struct gpio_chip *chip, unsigned offset, int value)
+{
+	pcf857x_output16(chip, offset, value);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static int pcf857x_probe(struct i2c_client *client)
+{
+	struct pcf857x_platform_data	*pdata;
+	struct pcf857x			*gpio;
+	int				status;
+
+	pdata = client->dev.platform_data;
+	if (!pdata)
+		return -ENODEV;
+
+	/* Allocate, initialize, and register this gpio_chip. */
+	gpio = kzalloc(sizeof *gpio, GFP_KERNEL);
+	if (!gpio)
+		return -ENOMEM;
+
+	gpio->chip.base = pdata->gpio_base;
+	gpio->chip.can_sleep = 1;
+
+	/* NOTE:  the OnSemi jlc1562b is also largely compatible with
+	 * these parts, notably for output.  It has a low-resolution
+	 * DAC instead of pin change IRQs; and its inputs can be the
+	 * result of comparators.
+	 */
+
+	/* 8574 addresses are 0x20..0x27; 8574a uses 0x38..0x3f;
+	 * 9670, 9672, 9764, and 9764a use quite a variety.
+	 *
+	 * NOTE: we don't distinguish here between *4 and *4a parts.
+	 */
+	if (strcmp(client->name, "pcf8574") == 0
+			|| strcmp(client->name, "pca8574") == 0
+			|| strcmp(client->name, "pca9670") == 0
+			|| strcmp(client->name, "pca9672") == 0
+			|| strcmp(client->name, "pca9674") == 0
+			) {
+		gpio->chip.ngpio = 8;
+		gpio->chip.direction_input = pcf857x_input8;
+		gpio->chip.get = pcf857x_get8;
+		gpio->chip.direction_output = pcf857x_output8;
+		gpio->chip.set = pcf857x_set8;
+
+		if (!i2c_check_functionality(client->adapter,
+				I2C_FUNC_SMBUS_BYTE))
+			status = -EIO;
+
+		/* fail if there's no chip present */
+		else
+			status = i2c_smbus_read_byte(client);
+
+	/* '75/'75c addresses are 0x20..0x27, just like the '74;
+	 * the '75c doesn't have a current source pulling high.
+	 * 9671, 9673, and 9765 use quite a variety of addresses.
+	 *
+	 * NOTE: we don't distinguish here between '75 and '75c parts.
+	 */
+	} else if (strcmp(client->name, "pcf8575") == 0
+			|| strcmp(client->name, "pca8575") == 0
+			|| strcmp(client->name, "pca9671") == 0
+			|| strcmp(client->name, "pca9673") == 0
+			|| strcmp(client->name, "pca9675") == 0
+			) {
+		gpio->chip.ngpio = 16;
+		gpio->chip.direction_input = pcf857x_input16;
+		gpio->chip.get = pcf857x_get16;
+		gpio->chip.direction_output = pcf857x_output16;
+		gpio->chip.set = pcf857x_set16;
+
+		if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+			status = -EIO;
+
+		/* fail if there's no chip present */
+		else
+			status = i2c_read_le16(client);
+
+	} else
+		status = -ENODEV;
+
+	if (status < 0)
+		goto fail;
+
+	gpio->chip.label = client->name;
+
+	gpio->client = client;
+	i2c_set_clientdata(client, gpio);
+
+	/* NOTE:  these chips have strange "quasi-bidirectional" I/O pins.
+	 * We can't actually know whether a pin is configured (a) as output
+	 * and driving the signal low, or (b) as input and reporting a low
+	 * value ... without knowing the last value written since the chip
+	 * came out of reset (if any).  We can't read the latched output.
+	 *
+	 * In short, the only reliable solution for setting up pin direction
+	 * is to do it explicitly.  The setup() method can do that, but it
+	 * may cause transient glitching since it can't know the last value
+	 * written (some pins may need to be driven low).
+	 *
+	 * Using pdata->n_latch avoids that trouble.  When left initialized
+	 * to zero, our software copy of the "latch" then matches the chip's
+	 * all-ones reset state.  Otherwise it flags pins to be driven low.
+	 */
+	gpio->out = ~pdata->n_latch;
+
+	status = gpiochip_add(&gpio->chip);
+	if (status < 0)
+		goto fail;
+
+	/* NOTE: these chips can issue "some pin-changed" IRQs, which we
+	 * don't yet even try to use.  Among other issues, the relevant
+	 * genirq state isn't available to modular drivers; and most irq
+	 * methods can't be called from sleeping contexts.
+	 */
+
+	dev_info(&client->dev, "gpios %d..%d on a %s%s\n",
+			gpio->chip.base,
+			gpio->chip.base + gpio->chip.ngpio - 1,
+			client->name,
+			client->irq ? " (irq ignored)" : "");
+
+	/* Let platform code set up the GPIOs and their users.
+	 * Now is the first time anyone could use them.
+	 */
+	if (pdata->setup) {
+		status = pdata->setup(client,
+				gpio->chip.base, gpio->chip.ngpio,
+				pdata->context);
+		if (status < 0)
+			dev_warn(&client->dev, "setup --> %d\n", status);
+	}
+
+	return 0;
+
+fail:
+	dev_dbg(&client->dev, "probe error %d for '%s'\n",
+			status, client->name);
+	kfree(gpio);
+	return status;
+}
+
+static int pcf857x_remove(struct i2c_client *client)
+{
+	struct pcf857x_platform_data	*pdata = client->dev.platform_data;
+	struct pcf857x			*gpio = i2c_get_clientdata(client);
+	int				status = 0;
+
+	if (pdata->teardown) {
+		status = pdata->teardown(client,
+				gpio->chip.base, gpio->chip.ngpio,
+				pdata->context);
+		if (status < 0) {
+			dev_err(&client->dev, "%s --> %d\n",
+					"teardown", status);
+			return status;
+		}
+	}
+
+	status = gpiochip_remove(&gpio->chip);
+	if (status == 0)
+		kfree(gpio);
+	else
+		dev_err(&client->dev, "%s --> %d\n", "remove", status);
+	return status;
+}
+
+static struct i2c_driver pcf857x_driver = {
+	.driver = {
+		.name	= "pcf857x",
+		.owner	= THIS_MODULE,
+	},
+	.probe	= pcf857x_probe,
+	.remove	= pcf857x_remove,
+};
+
+static int __init pcf857x_init(void)
+{
+	return i2c_add_driver(&pcf857x_driver);
+}
+module_init(pcf857x_init);
+
+static void __exit pcf857x_exit(void)
+{
+	i2c_del_driver(&pcf857x_driver);
+}
+module_exit(pcf857x_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David Brownell");
diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index bd7082c..b21593f 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -54,8 +54,8 @@
 	  hardware.  If unsure, say N.
 
 config SENSORS_PCA9539
-	tristate "Philips PCA9539 16-bit I/O port"
-	depends on EXPERIMENTAL
+	tristate "Philips PCA9539 16-bit I/O port (DEPRECATED)"
+	depends on EXPERIMENTAL && GPIO_PCA9539 = "n"
 	help
 	  If you say yes here you get support for the Philips PCA9539
 	  16-bit I/O port.
@@ -63,6 +63,9 @@
 	  This driver can also be built as a module.  If so, the module
 	  will be called pca9539.
 
+	  This driver is deprecated and will be dropped soon. Use
+	  drivers/gpio/pca9539.c instead.
+
 config SENSORS_PCF8591
 	tristate "Philips PCF8591"
 	depends on EXPERIMENTAL
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index a193dfb..a5dc78a 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -44,8 +44,8 @@
 source "drivers/infiniband/hw/ehca/Kconfig"
 source "drivers/infiniband/hw/amso1100/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
-
 source "drivers/infiniband/hw/mlx4/Kconfig"
+source "drivers/infiniband/hw/nes/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index 75f325e..ed35e44 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -5,6 +5,7 @@
 obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
 obj-$(CONFIG_INFINIBAND_CXGB3)		+= hw/cxgb3/
 obj-$(CONFIG_MLX4_INFINIBAND)		+= hw/mlx4/
+obj-$(CONFIG_INFINIBAND_NES)		+= hw/nes/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_ISER)		+= ulp/iser/
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c015014..638b727 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -974,6 +974,9 @@
 			  struct cm_id_private *cm_id_priv,
 			  struct ib_cm_req_param *param)
 {
+	struct ib_sa_path_rec *pri_path = param->primary_path;
+	struct ib_sa_path_rec *alt_path = param->alternate_path;
+
 	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
 			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
 
@@ -997,35 +1000,46 @@
 	cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
 	cm_req_set_srq(req_msg, param->srq);
 
-	req_msg->primary_local_lid = param->primary_path->slid;
-	req_msg->primary_remote_lid = param->primary_path->dlid;
-	req_msg->primary_local_gid = param->primary_path->sgid;
-	req_msg->primary_remote_gid = param->primary_path->dgid;
-	cm_req_set_primary_flow_label(req_msg, param->primary_path->flow_label);
-	cm_req_set_primary_packet_rate(req_msg, param->primary_path->rate);
-	req_msg->primary_traffic_class = param->primary_path->traffic_class;
-	req_msg->primary_hop_limit = param->primary_path->hop_limit;
-	cm_req_set_primary_sl(req_msg, param->primary_path->sl);
-	cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */
+	if (pri_path->hop_limit <= 1) {
+		req_msg->primary_local_lid = pri_path->slid;
+		req_msg->primary_remote_lid = pri_path->dlid;
+	} else {
+		/* Work-around until there's a way to obtain remote LID info */
+		req_msg->primary_local_lid = IB_LID_PERMISSIVE;
+		req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
+	}
+	req_msg->primary_local_gid = pri_path->sgid;
+	req_msg->primary_remote_gid = pri_path->dgid;
+	cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
+	cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
+	req_msg->primary_traffic_class = pri_path->traffic_class;
+	req_msg->primary_hop_limit = pri_path->hop_limit;
+	cm_req_set_primary_sl(req_msg, pri_path->sl);
+	cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
 	cm_req_set_primary_local_ack_timeout(req_msg,
 		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
-			       param->primary_path->packet_life_time));
+			       pri_path->packet_life_time));
 
-	if (param->alternate_path) {
-		req_msg->alt_local_lid = param->alternate_path->slid;
-		req_msg->alt_remote_lid = param->alternate_path->dlid;
-		req_msg->alt_local_gid = param->alternate_path->sgid;
-		req_msg->alt_remote_gid = param->alternate_path->dgid;
+	if (alt_path) {
+		if (alt_path->hop_limit <= 1) {
+			req_msg->alt_local_lid = alt_path->slid;
+			req_msg->alt_remote_lid = alt_path->dlid;
+		} else {
+			req_msg->alt_local_lid = IB_LID_PERMISSIVE;
+			req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
+		}
+		req_msg->alt_local_gid = alt_path->sgid;
+		req_msg->alt_remote_gid = alt_path->dgid;
 		cm_req_set_alt_flow_label(req_msg,
-					  param->alternate_path->flow_label);
-		cm_req_set_alt_packet_rate(req_msg, param->alternate_path->rate);
-		req_msg->alt_traffic_class = param->alternate_path->traffic_class;
-		req_msg->alt_hop_limit = param->alternate_path->hop_limit;
-		cm_req_set_alt_sl(req_msg, param->alternate_path->sl);
-		cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */
+					  alt_path->flow_label);
+		cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
+		req_msg->alt_traffic_class = alt_path->traffic_class;
+		req_msg->alt_hop_limit = alt_path->hop_limit;
+		cm_req_set_alt_sl(req_msg, alt_path->sl);
+		cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
 		cm_req_set_alt_local_ack_timeout(req_msg,
 			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
-				       param->alternate_path->packet_life_time));
+				       alt_path->packet_life_time));
 	}
 
 	if (param->private_data && param->private_data_len)
@@ -1441,6 +1455,34 @@
 	return listen_cm_id_priv;
 }
 
+/*
+ * Work-around for inter-subnet connections.  If the LIDs are permissive,
+ * we need to override the LID/SL data in the REQ with the LID information
+ * in the work completion.
+ */
+static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
+{
+	if (!cm_req_get_primary_subnet_local(req_msg)) {
+		if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
+			req_msg->primary_local_lid = cpu_to_be16(wc->slid);
+			cm_req_set_primary_sl(req_msg, wc->sl);
+		}
+
+		if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
+			req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+	}
+
+	if (!cm_req_get_alt_subnet_local(req_msg)) {
+		if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
+			req_msg->alt_local_lid = cpu_to_be16(wc->slid);
+			cm_req_set_alt_sl(req_msg, wc->sl);
+		}
+
+		if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
+			req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+	}
+}
+
 static int cm_req_handler(struct cm_work *work)
 {
 	struct ib_cm_id *cm_id;
@@ -1481,6 +1523,7 @@
 	cm_id_priv->id.service_id = req_msg->service_id;
 	cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL);
 
+	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
 	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
 	if (ret) {
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 6c7aa59..7f00347 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -320,10 +320,13 @@
 			.max_maps   = pool->max_remaps,
 			.page_shift = params->page_shift
 		};
+		int bytes_per_fmr = sizeof *fmr;
+
+		if (pool->cache_bucket)
+			bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);
 
 		for (i = 0; i < params->pool_size; ++i) {
-			fmr = kmalloc(sizeof *fmr + params->max_pages_per_fmr * sizeof (u64),
-				      GFP_KERNEL);
+			fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
 			if (!fmr) {
 				printk(KERN_WARNING PFX "failed to allocate fmr "
 				       "struct for FMR %d\n", i);
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index f281d16..92cce8a 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -101,6 +101,7 @@
 	spinlock_t mod_sqp_lock;
 	enum ib_port_state port_state;
 	struct ehca_sma_attr saved_attr;
+	u32 pma_qp_nr;
 };
 
 #define HCA_CAP_MR_PGSIZE_4K  0x80000000
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 863b34f..b5ca94c 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -403,6 +403,8 @@
 			sport->port_state = IB_PORT_ACTIVE;
 			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
 					    "is active");
+			ehca_query_sma_attr(shca, port,
+					    &sport->saved_attr);
 		} else
 			notify_port_conf_change(shca, port);
 		break;
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index c469bfd..a8a2ea5 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -187,6 +187,11 @@
 
 int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
 
+int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		     struct ib_wc *in_wc, struct ib_grh *in_grh,
+		     struct ib_mad *in_mad,
+		     struct ib_mad *out_mad);
+
 void ehca_poll_eqs(unsigned long data);
 
 int ehca_calc_ipd(struct ehca_shca *shca, int port,
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 84c9b7b..a86ebcc 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -472,7 +472,7 @@
 	shca->ib_device.dealloc_fmr	    = ehca_dealloc_fmr;
 	shca->ib_device.attach_mcast	    = ehca_attach_mcast;
 	shca->ib_device.detach_mcast	    = ehca_detach_mcast;
-	/* shca->ib_device.process_mad	    = ehca_process_mad;	    */
+	shca->ib_device.process_mad	    = ehca_process_mad;
 	shca->ib_device.mmap		    = ehca_mmap;
 
 	if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 3aacc8c..2ce8cff 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -209,6 +209,10 @@
 			ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
 			return -EINVAL;
 		}
+		if (unlikely(send_wr->wr.ud.remote_qpn == 0)) {
+			ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
+			return -EINVAL;
+		}
 		my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
 		wqe_p->u.ud_av.ud_av = my_av->av;
 
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
index 79e72b2..706d97a 100644
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -39,12 +39,18 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <rdma/ib_mad.h>
 
 #include "ehca_classes.h"
 #include "ehca_tools.h"
 #include "ehca_iverbs.h"
 #include "hcp_if.h"
 
+#define IB_MAD_STATUS_REDIRECT		__constant_htons(0x0002)
+#define IB_MAD_STATUS_UNSUP_VERSION	__constant_htons(0x0004)
+#define IB_MAD_STATUS_UNSUP_METHOD	__constant_htons(0x0008)
+
+#define IB_PMA_CLASS_PORT_INFO		__constant_htons(0x0001)
 
 /**
  * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
@@ -83,6 +89,9 @@
 				 port, ret);
 			return ret;
 		}
+		shca->sport[port - 1].pma_qp_nr = pma_qp_nr;
+		ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x",
+			 port, pma_qp_nr);
 		break;
 	default:
 		ehca_err(&shca->ib_device, "invalid qp_type=%x",
@@ -109,3 +118,85 @@
 
 	return H_SUCCESS;
 }
+
+struct ib_perf {
+	struct ib_mad_hdr mad_hdr;
+	u8 reserved[40];
+	u8 data[192];
+} __attribute__ ((packed));
+
+
+static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
+			     struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	struct ib_perf *in_perf = (struct ib_perf *)in_mad;
+	struct ib_perf *out_perf = (struct ib_perf *)out_mad;
+	struct ib_class_port_info *poi =
+		(struct ib_class_port_info *)out_perf->data;
+	struct ehca_shca *shca =
+		container_of(ibdev, struct ehca_shca, ib_device);
+	struct ehca_sport *sport = &shca->sport[port_num - 1];
+
+	ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method);
+
+	*out_mad = *in_mad;
+
+	if (in_perf->mad_hdr.class_version != 1) {
+		ehca_warn(ibdev, "Unsupported class_version=%x",
+			  in_perf->mad_hdr.class_version);
+		out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION;
+		goto perf_reply;
+	}
+
+	switch (in_perf->mad_hdr.method) {
+	case IB_MGMT_METHOD_GET:
+	case IB_MGMT_METHOD_SET:
+		/* set class port info for redirection */
+		out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO;
+		out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT;
+		memset(poi, 0, sizeof(*poi));
+		poi->base_version = 1;
+		poi->class_version = 1;
+		poi->resp_time_value = 18;
+		poi->redirect_lid = sport->saved_attr.lid;
+		poi->redirect_qp = sport->pma_qp_nr;
+		poi->redirect_qkey = IB_QP1_QKEY;
+		poi->redirect_pkey = IB_DEFAULT_PKEY_FULL;
+
+		ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
+			 sport->saved_attr.lid, sport->pma_qp_nr);
+		break;
+
+	case IB_MGMT_METHOD_GET_RESP:
+		return IB_MAD_RESULT_FAILURE;
+
+	default:
+		out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD;
+		break;
+	}
+
+perf_reply:
+	out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		     struct ib_wc *in_wc, struct ib_grh *in_grh,
+		     struct ib_mad *in_mad,
+		     struct ib_mad *out_mad)
+{
+	int ret;
+
+	if (!port_num || port_num > ibdev->phys_port_cnt)
+		return IB_MAD_RESULT_FAILURE;
+
+	/* accept only pma request */
+	if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+		return IB_MAD_RESULT_SUCCESS;
+
+	ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
+	ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad);
+
+	return ret;
+}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index d8287d9..96a39b5 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -52,7 +52,7 @@
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
-static const char mlx4_ib_version[] __devinitdata =
+static const char mlx4_ib_version[] =
 	DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 
@@ -468,6 +468,7 @@
 	if (err)
 		goto out;
 
+	dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
 	memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
 
 out:
@@ -516,9 +517,16 @@
 
 static void *mlx4_ib_add(struct mlx4_dev *dev)
 {
+	static int mlx4_ib_version_printed;
 	struct mlx4_ib_dev *ibdev;
 	int i;
 
+
+	if (!mlx4_ib_version_printed) {
+		printk(KERN_INFO "%s", mlx4_ib_version);
+		++mlx4_ib_version_printed;
+	}
+
 	ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
 	if (!ibdev) {
 		dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 6966f94..09a30dd 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1255,9 +1255,14 @@
 	if (err)
 		goto out;
 
-	MTHCA_GET(adapter->vendor_id, outbox,   QUERY_ADAPTER_VENDOR_ID_OFFSET);
-	MTHCA_GET(adapter->device_id, outbox,   QUERY_ADAPTER_DEVICE_ID_OFFSET);
-	MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
+	if (!mthca_is_memfree(dev)) {
+		MTHCA_GET(adapter->vendor_id, outbox,
+			  QUERY_ADAPTER_VENDOR_ID_OFFSET);
+		MTHCA_GET(adapter->device_id, outbox,
+			  QUERY_ADAPTER_DEVICE_ID_OFFSET);
+		MTHCA_GET(adapter->revision_id, outbox,
+			  QUERY_ADAPTER_REVISION_ID_OFFSET);
+	}
 	MTHCA_GET(adapter->inta_pin, outbox,    QUERY_ADAPTER_INTA_PIN_OFFSET);
 
 	get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4,
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 5cf8250..cd3d8ad 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -126,7 +126,7 @@
 MODULE_PARM_DESC(fmr_reserved_mtts,
 		 "number of memory translation table segments reserved for FMR");
 
-static const char mthca_version[] __devinitdata =
+static char mthca_version[] __devinitdata =
 	DRV_NAME ": Mellanox InfiniBand HCA driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 
@@ -735,7 +735,8 @@
 	}
 
 	mdev->eq_table.inta_pin = adapter.inta_pin;
-	mdev->rev_id            = adapter.revision_id;
+	if (!mthca_is_memfree(mdev))
+		mdev->rev_id = adapter.revision_id;
 	memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id);
 
 	return 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index aa6c70a..3b69855 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -613,8 +613,10 @@
 			sizeof *(mr->mem.tavor.mpt) * idx;
 
 	mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
-	if (IS_ERR(mr->mtt))
+	if (IS_ERR(mr->mtt)) {
+		err = PTR_ERR(mr->mtt);
 		goto err_out_table;
+	}
 
 	mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
 
@@ -627,8 +629,10 @@
 		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
 
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
-	if (IS_ERR(mailbox))
+	if (IS_ERR(mailbox)) {
+		err = PTR_ERR(mailbox);
 		goto err_out_free_mtt;
+	}
 
 	mpt_entry = mailbox->buf;
 
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 6bcde1c..9e491df 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -923,17 +923,13 @@
 	struct mthca_mr *mr;
 	u64 *page_list;
 	u64 total_size;
-	u64 mask;
+	unsigned long mask;
 	int shift;
 	int npages;
 	int err;
 	int i, j, n;
 
-	/* First check that we have enough alignment */
-	if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK))
-		return ERR_PTR(-EINVAL);
-
-	mask = 0;
+	mask = buffer_list[0].addr ^ *iova_start;
 	total_size = 0;
 	for (i = 0; i < num_phys_buf; ++i) {
 		if (i != 0)
@@ -947,17 +943,7 @@
 	if (mask & ~PAGE_MASK)
 		return ERR_PTR(-EINVAL);
 
-	/* Find largest page shift we can use to cover buffers */
-	for (shift = PAGE_SHIFT; shift < 31; ++shift)
-		if (num_phys_buf > 1) {
-			if ((1ULL << shift) & mask)
-				break;
-		} else {
-			if (1ULL << shift >=
-			    buffer_list[0].size +
-			    (buffer_list[0].addr & ((1ULL << shift) - 1)))
-				break;
-		}
+	shift = __ffs(mask | 1 << 31);
 
 	buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
 	buffer_list[0].addr &= ~0ull << shift;
@@ -1270,6 +1256,8 @@
 		goto out;
 	}
 
+	if (mthca_is_memfree(dev))
+		dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
 	memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
 
 out:
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 0e5461c..db5595b 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1175,6 +1175,7 @@
 {
 	int ret;
 	int i;
+	struct mthca_next_seg *next;
 
 	qp->refcount = 1;
 	init_waitqueue_head(&qp->wait);
@@ -1217,7 +1218,6 @@
 	}
 
 	if (mthca_is_memfree(dev)) {
-		struct mthca_next_seg *next;
 		struct mthca_data_seg *scatter;
 		int size = (sizeof (struct mthca_next_seg) +
 			    qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16;
@@ -1240,6 +1240,13 @@
 						    qp->sq.wqe_shift) +
 						   qp->send_wqe_offset);
 		}
+	} else {
+		for (i = 0; i < qp->rq.max; ++i) {
+			next = get_recv_wqe(qp, i);
+			next->nda_op = htonl((((i + 1) % qp->rq.max) <<
+					      qp->rq.wqe_shift) | 1);
+		}
+
 	}
 
 	qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
@@ -1863,7 +1870,6 @@
 		prev_wqe = qp->rq.last;
 		qp->rq.last = wqe;
 
-		((struct mthca_next_seg *) wqe)->nda_op = 0;
 		((struct mthca_next_seg *) wqe)->ee_nds =
 			cpu_to_be32(MTHCA_NEXT_DBD);
 		((struct mthca_next_seg *) wqe)->flags = 0;
@@ -1885,9 +1891,6 @@
 
 		qp->wrid[ind] = wr->wr_id;
 
-		((struct mthca_next_seg *) prev_wqe)->nda_op =
-			cpu_to_be32((ind << qp->rq.wqe_shift) | 1);
-		wmb();
 		((struct mthca_next_seg *) prev_wqe)->ee_nds =
 			cpu_to_be32(MTHCA_NEXT_DBD | size);
 
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 553d681..a5ffff6 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -175,9 +175,17 @@
 	 * scatter list L_Keys to the sentry value of 0x100.
 	 */
 	for (i = 0; i < srq->max; ++i) {
-		wqe = get_wqe(srq, i);
+		struct mthca_next_seg *next;
 
-		*wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1;
+		next = wqe = get_wqe(srq, i);
+
+		if (i < srq->max - 1) {
+			*wqe_to_link(wqe) = i + 1;
+			next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1);
+		} else {
+			*wqe_to_link(wqe) = -1;
+			next->nda_op = 0;
+		}
 
 		for (scatter = wqe + sizeof (struct mthca_next_seg);
 		     (void *) scatter < wqe + (1 << srq->wqe_shift);
@@ -470,16 +478,15 @@
 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
 {
 	int ind;
+	struct mthca_next_seg *last_free;
 
 	ind = wqe_addr >> srq->wqe_shift;
 
 	spin_lock(&srq->lock);
 
-	if (likely(srq->first_free >= 0))
-		*wqe_to_link(get_wqe(srq, srq->last_free)) = ind;
-	else
-		srq->first_free = ind;
-
+	last_free = get_wqe(srq, srq->last_free);
+	*wqe_to_link(last_free) = ind;
+	last_free->nda_op = htonl((ind << srq->wqe_shift) | 1);
 	*wqe_to_link(get_wqe(srq, ind)) = -1;
 	srq->last_free = ind;
 
@@ -506,15 +513,7 @@
 	first_ind = srq->first_free;
 
 	for (nreq = 0; wr; wr = wr->next) {
-		ind = srq->first_free;
-
-		if (unlikely(ind < 0)) {
-			mthca_err(dev, "SRQ %06x full\n", srq->srqn);
-			err = -ENOMEM;
-			*bad_wr = wr;
-			break;
-		}
-
+		ind       = srq->first_free;
 		wqe       = get_wqe(srq, ind);
 		next_ind  = *wqe_to_link(wqe);
 
@@ -528,7 +527,6 @@
 		prev_wqe  = srq->last;
 		srq->last = wqe;
 
-		((struct mthca_next_seg *) wqe)->nda_op = 0;
 		((struct mthca_next_seg *) wqe)->ee_nds = 0;
 		/* flags field will always remain 0 */
 
@@ -549,9 +547,6 @@
 		if (i < srq->max_gs)
 			mthca_set_data_seg_inval(wqe);
 
-		((struct mthca_next_seg *) prev_wqe)->nda_op =
-			cpu_to_be32((ind << srq->wqe_shift) | 1);
-		wmb();
 		((struct mthca_next_seg *) prev_wqe)->ee_nds =
 			cpu_to_be32(MTHCA_NEXT_DBD);
 
@@ -614,15 +609,7 @@
 	spin_lock_irqsave(&srq->lock, flags);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		ind = srq->first_free;
-
-		if (unlikely(ind < 0)) {
-			mthca_err(dev, "SRQ %06x full\n", srq->srqn);
-			err = -ENOMEM;
-			*bad_wr = wr;
-			break;
-		}
-
+		ind       = srq->first_free;
 		wqe       = get_wqe(srq, ind);
 		next_ind  = *wqe_to_link(wqe);
 
@@ -633,8 +620,6 @@
 			break;
 		}
 
-		((struct mthca_next_seg *) wqe)->nda_op =
-			cpu_to_be32((next_ind << srq->wqe_shift) | 1);
 		((struct mthca_next_seg *) wqe)->ee_nds = 0;
 		/* flags field will always remain 0 */
 
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
new file mode 100644
index 0000000..2aeb7ac
--- /dev/null
+++ b/drivers/infiniband/hw/nes/Kconfig
@@ -0,0 +1,16 @@
+config INFINIBAND_NES
+	tristate "NetEffect RNIC Driver"
+	depends on PCI && INET && INFINIBAND
+	select LIBCRC32C
+	---help---
+	  This is a low-level driver for NetEffect RDMA enabled
+	  Network Interface Cards (RNIC).
+
+config INFINIBAND_NES_DEBUG
+	bool "Verbose debugging output"
+	depends on INFINIBAND_NES
+	default n
+	---help---
+	  This option causes the NetEffect RNIC driver to produce debug
+	  messages.  Select this if you are developing the driver
+	  or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile
new file mode 100644
index 0000000..3514851
--- /dev/null
+++ b/drivers/infiniband/hw/nes/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o
+
+iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
new file mode 100644
index 0000000..7f8853b
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -0,0 +1,1152 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/if_arp.h>
+#include <linux/highmem.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/iw_cm.h>
+
+#include "nes.h"
+
+#include <net/netevent.h>
+#include <net/neighbour.h>
+#include <linux/route.h>
+#include <net/ip_fib.h>
+
+MODULE_AUTHOR("NetEffect");
+MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+int max_mtu = 9000;
+int nics_per_function = 1;
+int interrupt_mod_interval = 0;
+
+
+/* Interoperability */
+int mpa_version = 1;
+module_param(mpa_version, int, 0);
+MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)");
+
+/* Interoperability */
+int disable_mpa_crc = 0;
+module_param(disable_mpa_crc, int, 0);
+MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC");
+
+unsigned int send_first = 0;
+module_param(send_first, int, 0);
+MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection");
+
+
+unsigned int nes_drv_opt = 0;
+module_param(nes_drv_opt, int, 0);
+MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters");
+
+unsigned int nes_debug_level = 0;
+module_param_named(debug_level, nes_debug_level, uint, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug output level");
+
+LIST_HEAD(nes_adapter_list);
+LIST_HEAD(nes_dev_list);
+
+atomic_t qps_destroyed;
+atomic_t cqp_reqs_allocated;
+atomic_t cqp_reqs_freed;
+atomic_t cqp_reqs_dynallocated;
+atomic_t cqp_reqs_dynfreed;
+atomic_t cqp_reqs_queued;
+atomic_t cqp_reqs_redriven;
+
+static void nes_print_macaddr(struct net_device *netdev);
+static irqreturn_t nes_interrupt(int, void *);
+static int __devinit nes_probe(struct pci_dev *, const struct pci_device_id *);
+static void __devexit nes_remove(struct pci_dev *);
+static int __init nes_init_module(void);
+static void __exit nes_exit_module(void);
+static unsigned int ee_flsh_adapter;
+static unsigned int sysfs_nonidx_addr;
+static unsigned int sysfs_idx_addr;
+
+static struct pci_device_id nes_pci_table[] = {
+	{PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID},
+	{0}
+};
+
+MODULE_DEVICE_TABLE(pci, nes_pci_table);
+
+static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
+static int nes_net_event(struct notifier_block *, unsigned long, void *);
+static int nes_notifiers_registered;
+
+
+static struct notifier_block nes_inetaddr_notifier = {
+	.notifier_call = nes_inetaddr_event
+};
+
+static struct notifier_block nes_net_notifier = {
+	.notifier_call = nes_net_event
+};
+
+
+
+
+/**
+ * nes_inetaddr_event
+ */
+static int nes_inetaddr_event(struct notifier_block *notifier,
+		unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = ptr;
+	struct net_device *event_netdev = ifa->ifa_dev->dev;
+	struct nes_device *nesdev;
+	struct net_device *netdev;
+	struct nes_vnic *nesvnic;
+	unsigned int addr;
+	unsigned int mask;
+
+	addr = ntohl(ifa->ifa_address);
+	mask = ntohl(ifa->ifa_mask);
+	nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %08X, netmask %08X.\n",
+			addr, mask);
+	list_for_each_entry(nesdev, &nes_dev_list, list) {
+		nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n",
+				nesdev, nesdev->netdev[0]->name);
+		netdev = nesdev->netdev[0];
+		nesvnic = netdev_priv(netdev);
+		if (netdev == event_netdev) {
+			if (nesvnic->rdma_enabled == 0) {
+				nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since"
+						" RDMA is not enabled.\n",
+						netdev->name);
+				return NOTIFY_OK;
+			}
+			/* we have ifa->ifa_address/mask here if we need it */
+			switch (event) {
+				case NETDEV_DOWN:
+					nes_debug(NES_DBG_NETDEV, "event:DOWN\n");
+					nes_write_indexed(nesdev,
+							NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), 0);
+
+					nes_manage_arp_cache(netdev, netdev->dev_addr,
+							ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE);
+					nesvnic->local_ipaddr = 0;
+					return NOTIFY_OK;
+					break;
+				case NETDEV_UP:
+					nes_debug(NES_DBG_NETDEV, "event:UP\n");
+
+					if (nesvnic->local_ipaddr != 0) {
+						nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n");
+						return NOTIFY_OK;
+					}
+					/* Add the address to the IP table */
+					nesvnic->local_ipaddr = ifa->ifa_address;
+
+					nes_write_indexed(nesdev,
+							NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)),
+							ntohl(ifa->ifa_address));
+					nes_manage_arp_cache(netdev, netdev->dev_addr,
+							ntohl(nesvnic->local_ipaddr), NES_ARP_ADD);
+					return NOTIFY_OK;
+					break;
+				default:
+					break;
+			}
+		}
+	}
+
+	return NOTIFY_DONE;
+}
+
+
+/**
+ * nes_net_event
+ */
+static int nes_net_event(struct notifier_block *notifier,
+		unsigned long event, void *ptr)
+{
+	struct neighbour *neigh = ptr;
+	struct nes_device *nesdev;
+	struct net_device *netdev;
+	struct nes_vnic *nesvnic;
+
+	switch (event) {
+		case NETEVENT_NEIGH_UPDATE:
+			list_for_each_entry(nesdev, &nes_dev_list, list) {
+				/* nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p.\n", nesdev); */
+				netdev = nesdev->netdev[0];
+				nesvnic = netdev_priv(netdev);
+				if (netdev == neigh->dev) {
+					if (nesvnic->rdma_enabled == 0) {
+						nes_debug(NES_DBG_NETDEV, "Skipping device %s since no RDMA\n",
+								netdev->name);
+					} else {
+						if (neigh->nud_state & NUD_VALID) {
+							nes_manage_arp_cache(neigh->dev, neigh->ha,
+									ntohl(*(__be32 *)neigh->primary_key), NES_ARP_ADD);
+						} else {
+							nes_manage_arp_cache(neigh->dev, neigh->ha,
+									ntohl(*(__be32 *)neigh->primary_key), NES_ARP_DELETE);
+						}
+					}
+					return NOTIFY_OK;
+				}
+			}
+			break;
+		default:
+			nes_debug(NES_DBG_NETDEV, "NETEVENT_ %lu undefined\n", event);
+			break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+
+/**
+ * nes_add_ref
+ */
+void nes_add_ref(struct ib_qp *ibqp)
+{
+	struct nes_qp *nesqp;
+
+	nesqp = to_nesqp(ibqp);
+	nes_debug(NES_DBG_QP, "Bumping refcount for QP%u.  Pre-inc value = %u\n",
+			ibqp->qp_num, atomic_read(&nesqp->refcount));
+	atomic_inc(&nesqp->refcount);
+}
+
+static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
+{
+	unsigned long flags;
+	struct nes_qp *nesqp = cqp_request->cqp_callback_pointer;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u32 qp_id;
+
+	atomic_inc(&qps_destroyed);
+
+	/* Free the control structures */
+
+	qp_id = nesqp->hwqp.qp_id;
+	if (nesqp->pbl_vbase) {
+		pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
+				nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
+		spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+		nesadapter->free_256pbl++;
+		spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+		pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase);
+		nesqp->pbl_vbase = NULL;
+
+	} else {
+		pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
+				nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase);
+	}
+	nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id);
+
+	kfree(nesqp->allocated_buffer);
+
+}
+
+/**
+ * nes_rem_ref
+ */
+void nes_rem_ref(struct ib_qp *ibqp)
+{
+	u64 u64temp;
+	struct nes_qp *nesqp;
+	struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_cqp_request *cqp_request;
+	u32 opcode;
+
+	nesqp = to_nesqp(ibqp);
+
+	if (atomic_read(&nesqp->refcount) == 0) {
+		printk(KERN_INFO PFX "%s: Reference count already 0 for QP%d, last aeq = 0x%04X.\n",
+				__FUNCTION__, ibqp->qp_num, nesqp->last_aeq);
+		BUG();
+	}
+
+	if (atomic_dec_and_test(&nesqp->refcount)) {
+		nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;
+
+		/* Destroy the QP */
+		cqp_request = nes_get_cqp_request(nesdev);
+		if (cqp_request == NULL) {
+			nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
+			return;
+		}
+		cqp_request->waiting = 0;
+		cqp_request->callback = 1;
+		cqp_request->cqp_callback = nes_cqp_rem_ref_callback;
+		cqp_request->cqp_callback_pointer = nesqp;
+		cqp_wqe = &cqp_request->cqp_wqe;
+
+		nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+		opcode = NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_IWARP;
+
+		if (nesqp->hte_added) {
+			opcode  |= NES_CQP_QP_DEL_HTE;
+			nesqp->hte_added = 0;
+		}
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
+		u64temp = (u64)nesqp->nesqp_context_pbase;
+		set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+		nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+	}
+}
+
+
+/**
+ * nes_get_qp
+ */
+struct ib_qp *nes_get_qp(struct ib_device *device, int qpn)
+{
+	struct nes_vnic *nesvnic = to_nesvnic(device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+	if ((qpn < NES_FIRST_QPN) || (qpn >= (NES_FIRST_QPN + nesadapter->max_qp)))
+		return NULL;
+
+	return &nesadapter->qp_table[qpn - NES_FIRST_QPN]->ibqp;
+}
+
+
+/**
+ * nes_print_macaddr
+ */
+static void nes_print_macaddr(struct net_device *netdev)
+{
+	nes_debug(NES_DBG_INIT, "%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, IRQ %u\n",
+			netdev->name,
+			netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+			netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
+			netdev->irq);
+}
+
+
+/**
+ * nes_interrupt - handle interrupts
+ */
+static irqreturn_t nes_interrupt(int irq, void *dev_id)
+{
+	struct nes_device *nesdev = (struct nes_device *)dev_id;
+	int handled = 0;
+	u32 int_mask;
+	u32 int_req;
+	u32 int_stat;
+	u32 intf_int_stat;
+	u32 timer_stat;
+
+	if (nesdev->msi_enabled) {
+		/* No need to read the interrupt pending register if msi is enabled */
+		handled = 1;
+	} else {
+		if (unlikely(nesdev->nesadapter->hw_rev == NE020_REV)) {
+			/* Master interrupt enable provides synchronization for kicking off bottom half
+			  when interrupt sharing is going on */
+			int_mask = nes_read32(nesdev->regs + NES_INT_MASK);
+			if (int_mask & 0x80000000) {
+				/* Check interrupt status to see if this might be ours */
+				int_stat = nes_read32(nesdev->regs + NES_INT_STAT);
+				int_req = nesdev->int_req;
+				if (int_stat&int_req) {
+					/* if interesting CEQ or AEQ is pending, claim the interrupt */
+					if ((int_stat&int_req) & (~(NES_INT_TIMER|NES_INT_INTF))) {
+						handled = 1;
+					} else {
+						if (((int_stat & int_req) & NES_INT_TIMER) == NES_INT_TIMER) {
+							/* Timer might be running but might be for another function */
+							timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT);
+							if ((timer_stat & nesdev->timer_int_req) != 0) {
+								handled = 1;
+							}
+						}
+						if ((((int_stat & int_req) & NES_INT_INTF) == NES_INT_INTF) &&
+								(handled == 0)) {
+							intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
+							if ((intf_int_stat & nesdev->intf_int_req) != 0) {
+								handled = 1;
+							}
+						}
+					}
+					if (handled) {
+						nes_write32(nesdev->regs+NES_INT_MASK, int_mask & (~0x80000000));
+						int_mask = nes_read32(nesdev->regs+NES_INT_MASK);
+						/* Save off the status to save an additional read */
+						nesdev->int_stat = int_stat;
+						nesdev->napi_isr_ran = 1;
+					}
+				}
+			}
+		} else {
+			handled = nes_read32(nesdev->regs+NES_INT_PENDING);
+		}
+	}
+
+	if (handled) {
+
+		if (nes_napi_isr(nesdev) == 0) {
+			tasklet_schedule(&nesdev->dpc_tasklet);
+
+		}
+		return IRQ_HANDLED;
+	} else {
+		return IRQ_NONE;
+	}
+}
+
+
+/**
+ * nes_probe - Device initialization
+ */
+static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev = NULL;
+	struct nes_device *nesdev = NULL;
+	int ret = 0;
+	struct nes_vnic *nesvnic = NULL;
+	void __iomem *mmio_regs = NULL;
+	u8 hw_rev;
+
+	assert(pcidev != NULL);
+	assert(ent != NULL);
+
+	printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n",
+			DRV_VERSION, pci_name(pcidev));
+
+	ret = pci_enable_device(pcidev);
+	if (ret) {
+		printk(KERN_ERR PFX "Unable to enable PCI device. (%s)\n", pci_name(pcidev));
+		goto bail0;
+	}
+
+	nes_debug(NES_DBG_INIT, "BAR0 (@0x%08lX) size = 0x%lX bytes\n",
+			(long unsigned int)pci_resource_start(pcidev, BAR_0),
+			(long unsigned int)pci_resource_len(pcidev, BAR_0));
+	nes_debug(NES_DBG_INIT, "BAR1 (@0x%08lX) size = 0x%lX bytes\n",
+			(long unsigned int)pci_resource_start(pcidev, BAR_1),
+			(long unsigned int)pci_resource_len(pcidev, BAR_1));
+
+	/* Make sure PCI base addr are MMIO */
+	if (!(pci_resource_flags(pcidev, BAR_0) & IORESOURCE_MEM) ||
+			!(pci_resource_flags(pcidev, BAR_1) & IORESOURCE_MEM)) {
+		printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
+		ret = -ENODEV;
+		goto bail1;
+	}
+
+	/* Reserve PCI I/O and memory resources */
+	ret = pci_request_regions(pcidev, DRV_NAME);
+	if (ret) {
+		printk(KERN_ERR PFX "Unable to request regions. (%s)\n", pci_name(pcidev));
+		goto bail1;
+	}
+
+	if ((sizeof(dma_addr_t) > 4)) {
+		ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "64b DMA mask configuration failed\n");
+			goto bail2;
+		}
+		ret = pci_set_consistent_dma_mask(pcidev, DMA_64BIT_MASK);
+		if (ret) {
+			printk(KERN_ERR PFX "64b DMA consistent mask configuration failed\n");
+			goto bail2;
+		}
+	} else {
+		ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "32b DMA mask configuration failed\n");
+			goto bail2;
+		}
+		ret = pci_set_consistent_dma_mask(pcidev, DMA_32BIT_MASK);
+		if (ret) {
+			printk(KERN_ERR PFX "32b DMA consistent mask configuration failed\n");
+			goto bail2;
+		}
+	}
+
+	pci_set_master(pcidev);
+
+	/* Allocate hardware structure */
+	nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL);
+	if (!nesdev) {
+		printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev));
+		ret = -ENOMEM;
+		goto bail2;
+	}
+
+	nes_debug(NES_DBG_INIT, "Allocated nes device at %p\n", nesdev);
+	nesdev->pcidev = pcidev;
+	pci_set_drvdata(pcidev, nesdev);
+
+	pci_read_config_byte(pcidev, 0x0008, &hw_rev);
+	nes_debug(NES_DBG_INIT, "hw_rev=%u\n", hw_rev);
+
+	spin_lock_init(&nesdev->indexed_regs_lock);
+
+	/* Remap the PCI registers in adapter BAR0 to kernel VA space */
+	mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs));
+	if (mmio_regs == NULL) {
+		printk(KERN_ERR PFX "Unable to remap BAR0\n");
+		ret = -EIO;
+		goto bail3;
+	}
+	nesdev->regs = mmio_regs;
+	nesdev->index_reg = 0x50 + (PCI_FUNC(pcidev->devfn)*8) + mmio_regs;
+
+	/* Ensure interrupts are disabled */
+	nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff);
+
+	if (nes_drv_opt & NES_DRV_OPT_ENABLE_MSI) {
+		if (!pci_enable_msi(nesdev->pcidev)) {
+			nesdev->msi_enabled = 1;
+			nes_debug(NES_DBG_INIT, "MSI is enabled for device %s\n",
+					pci_name(pcidev));
+		} else {
+			nes_debug(NES_DBG_INIT, "MSI is disabled by linux for device %s\n",
+					pci_name(pcidev));
+		}
+	} else {
+		nes_debug(NES_DBG_INIT, "MSI not requested due to driver options for device %s\n",
+				pci_name(pcidev));
+	}
+
+	nesdev->csr_start = pci_resource_start(nesdev->pcidev, BAR_0);
+	nesdev->doorbell_region = pci_resource_start(nesdev->pcidev, BAR_1);
+
+	/* Init the adapter */
+	nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev);
+	nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
+	if (!nesdev->nesadapter) {
+		printk(KERN_ERR PFX "Unable to initialize adapter.\n");
+		ret = -ENOMEM;
+		goto bail5;
+	}
+
+	/* nesdev->base_doorbell_index =
+			nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */
+	nesdev->base_doorbell_index = 1;
+	nesdev->doorbell_start = nesdev->nesadapter->doorbell_start;
+	nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % nesdev->nesadapter->port_count;
+
+	tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev);
+
+	/* bring up the Control QP */
+	if (nes_init_cqp(nesdev)) {
+		ret = -ENODEV;
+		goto bail6;
+	}
+
+	/* Arm the CCQ */
+	nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+			PCI_FUNC(nesdev->pcidev->devfn));
+	nes_read32(nesdev->regs+NES_CQE_ALLOC);
+
+	/* Enable the interrupts */
+	nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) |
+			(1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
+	if (PCI_FUNC(nesdev->pcidev->devfn) < 4) {
+		nesdev->int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+24));
+	}
+
+	/* TODO: This really should be the first driver to load, not function 0 */
+	if (PCI_FUNC(nesdev->pcidev->devfn) == 0) {
+		/* pick up PCI and critical errors if the first driver to load */
+		nesdev->intf_int_req = NES_INTF_INT_PCIERR | NES_INTF_INT_CRITERR;
+		nesdev->int_req |= NES_INT_INTF;
+	} else {
+		nesdev->intf_int_req = 0;
+	}
+	nesdev->intf_int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
+	nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, 0);
+	nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 0);
+	nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS2, 0x00001265);
+	nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS4, 0x18021804);
+
+	nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS3, 0x17801790);
+
+	/* deal with both periodic and one_shot */
+	nesdev->timer_int_req = 0x101 << PCI_FUNC(nesdev->pcidev->devfn);
+	nesdev->nesadapter->timer_int_req |= nesdev->timer_int_req;
+	nes_debug(NES_DBG_INIT, "setting int_req for function %u, nesdev = 0x%04X, adapter = 0x%04X\n",
+			PCI_FUNC(nesdev->pcidev->devfn),
+			nesdev->timer_int_req, nesdev->nesadapter->timer_int_req);
+
+	nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
+
+	list_add_tail(&nesdev->list, &nes_dev_list);
+
+	/* Request an interrupt line for the driver */
+	ret = request_irq(pcidev->irq, nes_interrupt, IRQF_SHARED, DRV_NAME, nesdev);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
+				pci_name(pcidev), pcidev->irq);
+		goto bail65;
+	}
+
+	nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+
+	if (nes_notifiers_registered == 0) {
+		register_inetaddr_notifier(&nes_inetaddr_notifier);
+		register_netevent_notifier(&nes_net_notifier);
+	}
+	nes_notifiers_registered++;
+
+	/* Initialize network devices */
+		if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) {
+			goto bail7;
+		}
+
+		/* Register network device */
+		ret = register_netdev(netdev);
+		if (ret) {
+			printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret);
+			nes_netdev_destroy(netdev);
+			goto bail7;
+		}
+
+		nes_print_macaddr(netdev);
+		/* create a CM core for this netdev */
+		nesvnic = netdev_priv(netdev);
+
+		nesdev->netdev_count++;
+		nesdev->nesadapter->netdev_count++;
+
+
+	printk(KERN_ERR PFX "%s: NetEffect RNIC driver successfully loaded.\n",
+			pci_name(pcidev));
+	return 0;
+
+	bail7:
+	printk(KERN_ERR PFX "bail7\n");
+	while (nesdev->netdev_count > 0) {
+		nesdev->netdev_count--;
+		nesdev->nesadapter->netdev_count--;
+
+		unregister_netdev(nesdev->netdev[nesdev->netdev_count]);
+		nes_netdev_destroy(nesdev->netdev[nesdev->netdev_count]);
+	}
+
+	nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
+			nesdev->netdev_count, nesdev->nesadapter->netdev_count);
+
+	nes_notifiers_registered--;
+	if (nes_notifiers_registered == 0) {
+		unregister_netevent_notifier(&nes_net_notifier);
+		unregister_inetaddr_notifier(&nes_inetaddr_notifier);
+	}
+
+	list_del(&nesdev->list);
+	nes_destroy_cqp(nesdev);
+
+	bail65:
+	printk(KERN_ERR PFX "bail65\n");
+	free_irq(pcidev->irq, nesdev);
+	if (nesdev->msi_enabled) {
+		pci_disable_msi(pcidev);
+	}
+	bail6:
+	printk(KERN_ERR PFX "bail6\n");
+	tasklet_kill(&nesdev->dpc_tasklet);
+	/* Deallocate the Adapter Structure */
+	nes_destroy_adapter(nesdev->nesadapter);
+
+	bail5:
+	printk(KERN_ERR PFX "bail5\n");
+	iounmap(nesdev->regs);
+
+	bail3:
+	printk(KERN_ERR PFX "bail3\n");
+	kfree(nesdev);
+
+	bail2:
+	pci_release_regions(pcidev);
+
+	bail1:
+	pci_disable_device(pcidev);
+
+	bail0:
+	return ret;
+}
+
+
+/**
+ * nes_remove - unload from kernel
+ */
+static void __devexit nes_remove(struct pci_dev *pcidev)
+{
+	struct nes_device *nesdev = pci_get_drvdata(pcidev);
+	struct net_device *netdev;
+	int netdev_index = 0;
+
+		if (nesdev->netdev_count) {
+			netdev = nesdev->netdev[netdev_index];
+			if (netdev) {
+				netif_stop_queue(netdev);
+				unregister_netdev(netdev);
+				nes_netdev_destroy(netdev);
+
+				nesdev->netdev[netdev_index] = NULL;
+				nesdev->netdev_count--;
+				nesdev->nesadapter->netdev_count--;
+			}
+		}
+
+	nes_notifiers_registered--;
+	if (nes_notifiers_registered == 0) {
+		unregister_netevent_notifier(&nes_net_notifier);
+		unregister_inetaddr_notifier(&nes_inetaddr_notifier);
+	}
+
+	list_del(&nesdev->list);
+	nes_destroy_cqp(nesdev);
+	tasklet_kill(&nesdev->dpc_tasklet);
+
+	/* Deallocate the Adapter Structure */
+	nes_destroy_adapter(nesdev->nesadapter);
+
+	free_irq(pcidev->irq, nesdev);
+
+	if (nesdev->msi_enabled) {
+		pci_disable_msi(pcidev);
+	}
+
+	iounmap(nesdev->regs);
+	kfree(nesdev);
+
+	/* nes_debug(NES_DBG_SHUTDOWN, "calling pci_release_regions.\n"); */
+	pci_release_regions(pcidev);
+	pci_disable_device(pcidev);
+	pci_set_drvdata(pcidev, NULL);
+}
+
+
+static struct pci_driver nes_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = nes_pci_table,
+	.probe = nes_probe,
+	.remove = __devexit_p(nes_remove),
+};
+
+static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf)
+{
+	unsigned int  devfn = 0xffffffff;
+	unsigned char bus_number = 0xff;
+	unsigned int  i = 0;
+	struct nes_device *nesdev;
+
+	list_for_each_entry(nesdev, &nes_dev_list, list) {
+		if (i == ee_flsh_adapter) {
+			devfn      = nesdev->nesadapter->devfn;
+			bus_number = nesdev->nesadapter->bus_number;
+			break;
+		}
+		i++;
+	}
+
+	return snprintf(buf, PAGE_SIZE, "%x:%x", bus_number, devfn);
+}
+
+static ssize_t nes_store_adapter(struct device_driver *ddp,
+	const char *buf, size_t count)
+{
+	char *p = (char *)buf;
+
+	ee_flsh_adapter = simple_strtoul(p, &p, 10);
+	return strnlen(buf, count);
+}
+
+static ssize_t nes_show_ee_cmd(struct device_driver *ddp, char *buf)
+{
+	u32 eeprom_cmd = 0xdead;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	list_for_each_entry(nesdev, &nes_dev_list, list) {
+		if (i == ee_flsh_adapter) {
+			eeprom_cmd = nes_read32(nesdev->regs + NES_EEPROM_COMMAND);
+			break;
+		}
+		i++;
+	}
+	return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_cmd);
+}
+
+static ssize_t nes_store_ee_cmd(struct device_driver *ddp,
+	const char *buf, size_t count)
+{
+	char *p = (char *)buf;
+	u32 val;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+		val = simple_strtoul(p, &p, 16);
+		list_for_each_entry(nesdev, &nes_dev_list, list) {
+			if (i == ee_flsh_adapter) {
+				nes_write32(nesdev->regs + NES_EEPROM_COMMAND, val);
+				break;
+			}
+			i++;
+		}
+	}
+	return strnlen(buf, count);
+}
+
+static ssize_t nes_show_ee_data(struct device_driver *ddp, char *buf)
+{
+	u32 eeprom_data = 0xdead;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	list_for_each_entry(nesdev, &nes_dev_list, list) {
+		if (i == ee_flsh_adapter) {
+			eeprom_data = nes_read32(nesdev->regs + NES_EEPROM_DATA);
+			break;
+		}
+		i++;
+	}
+
+	return  snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_data);
+}
+
+static ssize_t nes_store_ee_data(struct device_driver *ddp,
+	const char *buf, size_t count)
+{
+	char *p = (char *)buf;
+	u32 val;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+		val = simple_strtoul(p, &p, 16);
+		list_for_each_entry(nesdev, &nes_dev_list, list) {
+			if (i == ee_flsh_adapter) {
+				nes_write32(nesdev->regs + NES_EEPROM_DATA, val);
+				break;
+			}
+			i++;
+		}
+	}
+	return strnlen(buf, count);
+}
+
+static ssize_t nes_show_flash_cmd(struct device_driver *ddp, char *buf)
+{
+	u32 flash_cmd = 0xdead;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	list_for_each_entry(nesdev, &nes_dev_list, list) {
+		if (i == ee_flsh_adapter) {
+			flash_cmd = nes_read32(nesdev->regs + NES_FLASH_COMMAND);
+			break;
+		}
+		i++;
+	}
+
+	return  snprintf(buf, PAGE_SIZE, "0x%x\n", flash_cmd);
+}
+
+static ssize_t nes_store_flash_cmd(struct device_driver *ddp,
+	const char *buf, size_t count)
+{
+	char *p = (char *)buf;
+	u32 val;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+		val = simple_strtoul(p, &p, 16);
+		list_for_each_entry(nesdev, &nes_dev_list, list) {
+			if (i == ee_flsh_adapter) {
+				nes_write32(nesdev->regs + NES_FLASH_COMMAND, val);
+				break;
+			}
+			i++;
+		}
+	}
+	return strnlen(buf, count);
+}
+
+static ssize_t nes_show_flash_data(struct device_driver *ddp, char *buf)
+{
+	u32 flash_data = 0xdead;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	list_for_each_entry(nesdev, &nes_dev_list, list) {
+		if (i == ee_flsh_adapter) {
+			flash_data = nes_read32(nesdev->regs + NES_FLASH_DATA);
+			break;
+		}
+		i++;
+	}
+
+	return  snprintf(buf, PAGE_SIZE, "0x%x\n", flash_data);
+}
+
+static ssize_t nes_store_flash_data(struct device_driver *ddp,
+	const char *buf, size_t count)
+{
+	char *p = (char *)buf;
+	u32 val;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+		val = simple_strtoul(p, &p, 16);
+		list_for_each_entry(nesdev, &nes_dev_list, list) {
+			if (i == ee_flsh_adapter) {
+				nes_write32(nesdev->regs + NES_FLASH_DATA, val);
+				break;
+			}
+			i++;
+		}
+	}
+	return strnlen(buf, count);
+}
+
+static ssize_t nes_show_nonidx_addr(struct device_driver *ddp, char *buf)
+{
+	return  snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_nonidx_addr);
+}
+
+static ssize_t nes_store_nonidx_addr(struct device_driver *ddp,
+	const char *buf, size_t count)
+{
+	char *p = (char *)buf;
+
+	if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X')
+		sysfs_nonidx_addr = simple_strtoul(p, &p, 16);
+
+	return strnlen(buf, count);
+}
+
+static ssize_t nes_show_nonidx_data(struct device_driver *ddp, char *buf)
+{
+	u32 nonidx_data = 0xdead;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	list_for_each_entry(nesdev, &nes_dev_list, list) {
+		if (i == ee_flsh_adapter) {
+			nonidx_data = nes_read32(nesdev->regs + sysfs_nonidx_addr);
+			break;
+		}
+		i++;
+	}
+
+	return  snprintf(buf, PAGE_SIZE, "0x%x\n", nonidx_data);
+}
+
+static ssize_t nes_store_nonidx_data(struct device_driver *ddp,
+	const char *buf, size_t count)
+{
+	char *p = (char *)buf;
+	u32 val;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+		val = simple_strtoul(p, &p, 16);
+		list_for_each_entry(nesdev, &nes_dev_list, list) {
+			if (i == ee_flsh_adapter) {
+				nes_write32(nesdev->regs + sysfs_nonidx_addr, val);
+				break;
+			}
+			i++;
+		}
+	}
+	return strnlen(buf, count);
+}
+
+static ssize_t nes_show_idx_addr(struct device_driver *ddp, char *buf)
+{
+	return  snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_idx_addr);
+}
+
+static ssize_t nes_store_idx_addr(struct device_driver *ddp,
+	const char *buf, size_t count)
+{
+	char *p = (char *)buf;
+
+	if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X')
+		sysfs_idx_addr = simple_strtoul(p, &p, 16);
+
+	return strnlen(buf, count);
+}
+
+static ssize_t nes_show_idx_data(struct device_driver *ddp, char *buf)
+{
+	u32 idx_data = 0xdead;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	list_for_each_entry(nesdev, &nes_dev_list, list) {
+		if (i == ee_flsh_adapter) {
+			idx_data = nes_read_indexed(nesdev, sysfs_idx_addr);
+			break;
+		}
+		i++;
+	}
+
+	return  snprintf(buf, PAGE_SIZE, "0x%x\n", idx_data);
+}
+
+static ssize_t nes_store_idx_data(struct device_driver *ddp,
+	const char *buf, size_t count)
+{
+	char *p = (char *)buf;
+	u32 val;
+	u32 i = 0;
+	struct nes_device *nesdev;
+
+	if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+		val = simple_strtoul(p, &p, 16);
+		list_for_each_entry(nesdev, &nes_dev_list, list) {
+			if (i == ee_flsh_adapter) {
+				nes_write_indexed(nesdev, sysfs_idx_addr, val);
+				break;
+			}
+			i++;
+		}
+	}
+	return strnlen(buf, count);
+}
+
+static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR,
+		   nes_show_adapter, nes_store_adapter);
+static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR,
+		   nes_show_ee_cmd, nes_store_ee_cmd);
+static DRIVER_ATTR(eeprom_data, S_IRUSR | S_IWUSR,
+		   nes_show_ee_data, nes_store_ee_data);
+static DRIVER_ATTR(flash_cmd, S_IRUSR | S_IWUSR,
+		   nes_show_flash_cmd, nes_store_flash_cmd);
+static DRIVER_ATTR(flash_data, S_IRUSR | S_IWUSR,
+		   nes_show_flash_data, nes_store_flash_data);
+static DRIVER_ATTR(nonidx_addr, S_IRUSR | S_IWUSR,
+		   nes_show_nonidx_addr, nes_store_nonidx_addr);
+static DRIVER_ATTR(nonidx_data, S_IRUSR | S_IWUSR,
+		   nes_show_nonidx_data, nes_store_nonidx_data);
+static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR,
+		   nes_show_idx_addr, nes_store_idx_addr);
+static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR,
+		   nes_show_idx_data, nes_store_idx_data);
+
+static int nes_create_driver_sysfs(struct pci_driver *drv)
+{
+	int error;
+	error  = driver_create_file(&drv->driver, &driver_attr_adapter);
+	error |= driver_create_file(&drv->driver, &driver_attr_eeprom_cmd);
+	error |= driver_create_file(&drv->driver, &driver_attr_eeprom_data);
+	error |= driver_create_file(&drv->driver, &driver_attr_flash_cmd);
+	error |= driver_create_file(&drv->driver, &driver_attr_flash_data);
+	error |= driver_create_file(&drv->driver, &driver_attr_nonidx_addr);
+	error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data);
+	error |= driver_create_file(&drv->driver, &driver_attr_idx_addr);
+	error |= driver_create_file(&drv->driver, &driver_attr_idx_data);
+	return error;
+}
+
+static void nes_remove_driver_sysfs(struct pci_driver *drv)
+{
+	driver_remove_file(&drv->driver, &driver_attr_adapter);
+	driver_remove_file(&drv->driver, &driver_attr_eeprom_cmd);
+	driver_remove_file(&drv->driver, &driver_attr_eeprom_data);
+	driver_remove_file(&drv->driver, &driver_attr_flash_cmd);
+	driver_remove_file(&drv->driver, &driver_attr_flash_data);
+	driver_remove_file(&drv->driver, &driver_attr_nonidx_addr);
+	driver_remove_file(&drv->driver, &driver_attr_nonidx_data);
+	driver_remove_file(&drv->driver, &driver_attr_idx_addr);
+	driver_remove_file(&drv->driver, &driver_attr_idx_data);
+}
+
+/**
+ * nes_init_module - module initialization entry point
+ */
+static int __init nes_init_module(void)
+{
+	int retval;
+	int retval1;
+
+	retval = nes_cm_start();
+	if (retval) {
+		printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n");
+		return retval;
+	}
+	retval = pci_register_driver(&nes_pci_driver);
+	if (retval >= 0) {
+		retval1 = nes_create_driver_sysfs(&nes_pci_driver);
+		if (retval1 < 0)
+			printk(KERN_ERR PFX "Unable to create NetEffect sys files.\n");
+	}
+	return retval;
+}
+
+
+/**
+ * nes_exit_module - module unload entry point
+ */
+static void __exit nes_exit_module(void)
+{
+	nes_cm_stop();
+	nes_remove_driver_sysfs(&nes_pci_driver);
+
+	pci_unregister_driver(&nes_pci_driver);
+}
+
+
+module_init(nes_init_module);
+module_exit(nes_exit_module);
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
new file mode 100644
index 0000000..fd57e8a
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NES_H
+#define __NES_H
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <asm/semaphore.h>
+#include <linux/version.h>
+#include <asm/io.h>
+#include <linux/crc32c.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/iw_cm.h>
+
+#define NES_SEND_FIRST_WRITE
+
+#define QUEUE_DISCONNECTS
+
+#define DRV_BUILD   "1"
+
+#define DRV_NAME    "iw_nes"
+#define DRV_VERSION "1.0 KO Build " DRV_BUILD
+#define PFX         DRV_NAME ": "
+
+/*
+ * NetEffect PCI vendor id and NE010 PCI device id.
+ */
+#ifndef PCI_VENDOR_ID_NETEFFECT	/* not in pci.ids yet */
+#define PCI_VENDOR_ID_NETEFFECT       0x1678
+#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100
+#endif
+
+#define NE020_REV   4
+#define NE020_REV1  5
+
+#define BAR_0       0
+#define BAR_1       2
+
+#define RX_BUF_SIZE             (1536 + 8)
+#define NES_REG0_SIZE           (4 * 1024)
+#define NES_TX_TIMEOUT          (6*HZ)
+#define NES_FIRST_QPN           64
+#define NES_SW_CONTEXT_ALIGN    1024
+
+#define NES_NIC_MAX_NICS        16
+#define NES_MAX_ARP_TABLE_SIZE  4096
+
+#define NES_NIC_CEQ_SIZE        8
+/* NICs will be on a separate CQ */
+#define NES_CCEQ_SIZE ((nesadapter->max_cq / nesadapter->port_count) - 32)
+
+#define NES_MAX_PORT_COUNT 4
+
+#define MAX_DPC_ITERATIONS               128
+
+#define NES_CQP_REQUEST_NO_DOORBELL_RING 0
+#define NES_CQP_REQUEST_RING_DOORBELL    1
+
+#define NES_DRV_OPT_ENABLE_MPA_VER_0     0x00000001
+#define NES_DRV_OPT_DISABLE_MPA_CRC      0x00000002
+#define NES_DRV_OPT_DISABLE_FIRST_WRITE  0x00000004
+#define NES_DRV_OPT_DISABLE_INTF         0x00000008
+#define NES_DRV_OPT_ENABLE_MSI           0x00000010
+#define NES_DRV_OPT_DUAL_LOGICAL_PORT    0x00000020
+#define NES_DRV_OPT_SUPRESS_OPTION_BC    0x00000040
+#define NES_DRV_OPT_NO_INLINE_DATA       0x00000080
+#define NES_DRV_OPT_DISABLE_INT_MOD      0x00000100
+#define NES_DRV_OPT_DISABLE_VIRT_WQ      0x00000200
+
+#define NES_AEQ_EVENT_TIMEOUT         2500
+#define NES_DISCONNECT_EVENT_TIMEOUT  2000
+
+/* debug levels */
+/* must match userspace */
+#define NES_DBG_HW          0x00000001
+#define NES_DBG_INIT        0x00000002
+#define NES_DBG_ISR         0x00000004
+#define NES_DBG_PHY         0x00000008
+#define NES_DBG_NETDEV      0x00000010
+#define NES_DBG_CM          0x00000020
+#define NES_DBG_CM1         0x00000040
+#define NES_DBG_NIC_RX      0x00000080
+#define NES_DBG_NIC_TX      0x00000100
+#define NES_DBG_CQP         0x00000200
+#define NES_DBG_MMAP        0x00000400
+#define NES_DBG_MR          0x00000800
+#define NES_DBG_PD          0x00001000
+#define NES_DBG_CQ          0x00002000
+#define NES_DBG_QP          0x00004000
+#define NES_DBG_MOD_QP      0x00008000
+#define NES_DBG_AEQ         0x00010000
+#define NES_DBG_IW_RX       0x00020000
+#define NES_DBG_IW_TX       0x00040000
+#define NES_DBG_SHUTDOWN    0x00080000
+#define NES_DBG_RSVD1       0x10000000
+#define NES_DBG_RSVD2       0x20000000
+#define NES_DBG_RSVD3       0x40000000
+#define NES_DBG_RSVD4       0x80000000
+#define NES_DBG_ALL         0xffffffff
+
+#ifdef CONFIG_INFINIBAND_NES_DEBUG
+#define nes_debug(level, fmt, args...) \
+	if (level & nes_debug_level) \
+		printk(KERN_ERR PFX "%s[%u]: " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define assert(expr)                                                \
+if (!(expr)) {                                                       \
+	printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n",  \
+		   #expr, __FILE__, __FUNCTION__, __LINE__);                \
+}
+
+#define NES_EVENT_TIMEOUT   1200000
+#else
+#define nes_debug(level, fmt, args...)
+#define assert(expr)          do {} while (0)
+
+#define NES_EVENT_TIMEOUT   100000
+#endif
+
+#include "nes_hw.h"
+#include "nes_verbs.h"
+#include "nes_context.h"
+#include "nes_user.h"
+#include "nes_cm.h"
+
+extern int max_mtu;
+extern int nics_per_function;
+#define max_frame_len (max_mtu+ETH_HLEN)
+extern int interrupt_mod_interval;
+extern int nes_if_count;
+extern int mpa_version;
+extern int disable_mpa_crc;
+extern unsigned int send_first;
+extern unsigned int nes_drv_opt;
+extern unsigned int nes_debug_level;
+
+extern struct list_head nes_adapter_list;
+extern struct list_head nes_dev_list;
+
+extern struct nes_cm_core *g_cm_core;
+
+extern atomic_t cm_connects;
+extern atomic_t cm_accepts;
+extern atomic_t cm_disconnects;
+extern atomic_t cm_closes;
+extern atomic_t cm_connecteds;
+extern atomic_t cm_connect_reqs;
+extern atomic_t cm_rejects;
+extern atomic_t mod_qp_timouts;
+extern atomic_t qps_created;
+extern atomic_t qps_destroyed;
+extern atomic_t sw_qps_destroyed;
+extern u32 mh_detected;
+extern u32 mh_pauses_sent;
+extern u32 cm_packets_sent;
+extern u32 cm_packets_bounced;
+extern u32 cm_packets_created;
+extern u32 cm_packets_received;
+extern u32 cm_packets_dropped;
+extern u32 cm_packets_retrans;
+extern u32 cm_listens_created;
+extern u32 cm_listens_destroyed;
+extern u32 cm_backlog_drops;
+extern atomic_t cm_loopbacks;
+extern atomic_t cm_nodes_created;
+extern atomic_t cm_nodes_destroyed;
+extern atomic_t cm_accel_dropped_pkts;
+extern atomic_t cm_resets_recvd;
+
+extern u32 crit_err_count;
+extern u32 int_mod_timer_init;
+extern u32 int_mod_cq_depth_256;
+extern u32 int_mod_cq_depth_128;
+extern u32 int_mod_cq_depth_32;
+extern u32 int_mod_cq_depth_24;
+extern u32 int_mod_cq_depth_16;
+extern u32 int_mod_cq_depth_4;
+extern u32 int_mod_cq_depth_1;
+
+extern atomic_t cqp_reqs_allocated;
+extern atomic_t cqp_reqs_freed;
+extern atomic_t cqp_reqs_dynallocated;
+extern atomic_t cqp_reqs_dynfreed;
+extern atomic_t cqp_reqs_queued;
+extern atomic_t cqp_reqs_redriven;
+
+
+struct nes_device {
+	struct nes_adapter	   *nesadapter;
+	void __iomem           *regs;
+	void __iomem           *index_reg;
+	struct pci_dev         *pcidev;
+	struct net_device      *netdev[NES_NIC_MAX_NICS];
+	u64                    link_status_interrupts;
+	struct tasklet_struct  dpc_tasklet;
+	spinlock_t             indexed_regs_lock;
+	unsigned long          csr_start;
+	unsigned long          doorbell_region;
+	unsigned long          doorbell_start;
+	unsigned long          mac_tx_errors;
+	unsigned long          mac_pause_frames_sent;
+	unsigned long          mac_pause_frames_received;
+	unsigned long          mac_rx_errors;
+	unsigned long          mac_rx_crc_errors;
+	unsigned long          mac_rx_symbol_err_frames;
+	unsigned long          mac_rx_jabber_frames;
+	unsigned long          mac_rx_oversized_frames;
+	unsigned long          mac_rx_short_frames;
+	unsigned long          port_rx_discards;
+	unsigned long          port_tx_discards;
+	unsigned int           mac_index;
+	unsigned int           nes_stack_start;
+
+	/* Control Structures */
+	void                   *cqp_vbase;
+	dma_addr_t             cqp_pbase;
+	u32                    cqp_mem_size;
+	u8                     ceq_index;
+	u8                     nic_ceq_index;
+	struct nes_hw_cqp      cqp;
+	struct nes_hw_cq       ccq;
+	struct list_head       cqp_avail_reqs;
+	struct list_head       cqp_pending_reqs;
+	struct nes_cqp_request *nes_cqp_requests;
+
+	u32                    int_req;
+	u32                    int_stat;
+	u32                    timer_int_req;
+	u32                    timer_only_int_count;
+	u32                    intf_int_req;
+	u32                    last_mac_tx_pauses;
+	u32                    last_used_chunks_tx;
+	struct list_head       list;
+
+	u16                    base_doorbell_index;
+	u16                    currcq_count;
+	u16                    deepcq_count;
+	u8                     msi_enabled;
+	u8                     netdev_count;
+	u8                     napi_isr_ran;
+	u8                     disable_rx_flow_control;
+	u8                     disable_tx_flow_control;
+};
+
+
+static inline void
+set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value)
+{
+	wqe_words[index]     = cpu_to_le32((u32) ((unsigned long)value));
+	wqe_words[index + 1] = cpu_to_le32((u32)(upper_32_bits((unsigned long)value)));
+}
+
+static inline void
+set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value)
+{
+	wqe_words[index] = cpu_to_le32(value);
+}
+
+static inline void
+nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev)
+{
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_CTX_LOW_IDX,
+			(u64)((unsigned long) &nesdev->cqp));
+	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX]   = 0;
+	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]  = 0;
+	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0;
+	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_LEN_IDX]       = 0;
+	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_LOW_IDX]       = 0;
+	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_LOW_IDX]        = 0;
+	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_HIGH_IDX]       = 0;
+}
+
+static inline void
+nes_fill_init_qp_wqe(struct nes_hw_qp_wqe *wqe, struct nes_qp *nesqp, u32 head)
+{
+	u32 value;
+	value = ((u32)((unsigned long) nesqp)) | head;
+	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX,
+			(u32)(upper_32_bits((unsigned long)(nesqp))));
+	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, value);
+}
+
+/* Read from memory-mapped device */
+static inline u32 nes_read_indexed(struct nes_device *nesdev, u32 reg_index)
+{
+	unsigned long flags;
+	void __iomem *addr = nesdev->index_reg;
+	u32 value;
+
+	spin_lock_irqsave(&nesdev->indexed_regs_lock, flags);
+
+	writel(reg_index, addr);
+	value = readl((void __iomem *)addr + 4);
+
+	spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
+	return value;
+}
+
+static inline u32 nes_read32(const void __iomem *addr)
+{
+	return readl(addr);
+}
+
+static inline u16 nes_read16(const void __iomem *addr)
+{
+	return readw(addr);
+}
+
+static inline u8 nes_read8(const void __iomem *addr)
+{
+	return readb(addr);
+}
+
+/* Write to memory-mapped device */
+static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u32 val)
+{
+	unsigned long flags;
+	void __iomem *addr = nesdev->index_reg;
+
+	spin_lock_irqsave(&nesdev->indexed_regs_lock, flags);
+
+	writel(reg_index, addr);
+	writel(val, (void __iomem *)addr + 4);
+
+	spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
+}
+
+static inline void nes_write32(void __iomem *addr, u32 val)
+{
+	writel(val, addr);
+}
+
+static inline void nes_write16(void __iomem *addr, u16 val)
+{
+	writew(val, addr);
+}
+
+static inline void nes_write8(void __iomem *addr, u8 val)
+{
+	writeb(val, addr);
+}
+
+
+
+static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
+		unsigned long *resource_array, u32 max_resources,
+		u32 *req_resource_num, u32 *next)
+{
+	unsigned long flags;
+	u32 resource_num;
+
+	spin_lock_irqsave(&nesadapter->resource_lock, flags);
+
+	resource_num = find_next_zero_bit(resource_array, max_resources, *next);
+	if (resource_num >= max_resources) {
+		resource_num = find_first_zero_bit(resource_array, max_resources);
+		if (resource_num >= max_resources) {
+			printk(KERN_ERR PFX "%s: No available resourcess.\n", __FUNCTION__);
+			spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
+			return -EMFILE;
+		}
+	}
+	set_bit(resource_num, resource_array);
+	*next = resource_num+1;
+	if (*next == max_resources) {
+		*next = 0;
+	}
+	spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
+	*req_resource_num = resource_num;
+
+	return 0;
+}
+
+static inline int nes_is_resource_allocated(struct nes_adapter *nesadapter,
+		unsigned long *resource_array, u32 resource_num)
+{
+	unsigned long flags;
+	int bit_is_set;
+
+	spin_lock_irqsave(&nesadapter->resource_lock, flags);
+
+	bit_is_set = test_bit(resource_num, resource_array);
+	nes_debug(NES_DBG_HW, "resource_num %u is%s allocated.\n",
+			resource_num, (bit_is_set ? "": " not"));
+	spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
+
+	return bit_is_set;
+}
+
+static inline void nes_free_resource(struct nes_adapter *nesadapter,
+		unsigned long *resource_array, u32 resource_num)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&nesadapter->resource_lock, flags);
+	clear_bit(resource_num, resource_array);
+	spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
+}
+
+static inline struct nes_vnic *to_nesvnic(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct nes_ib_device, ibdev)->nesvnic;
+}
+
+static inline struct nes_pd *to_nespd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct nes_pd, ibpd);
+}
+
+static inline struct nes_ucontext *to_nesucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct nes_ucontext, ibucontext);
+}
+
+static inline struct nes_mr *to_nesmr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct nes_mr, ibmr);
+}
+
+static inline struct nes_mr *to_nesmr_from_ibfmr(struct ib_fmr *ibfmr)
+{
+	return container_of(ibfmr, struct nes_mr, ibfmr);
+}
+
+static inline struct nes_mr *to_nesmw(struct ib_mw *ibmw)
+{
+	return container_of(ibmw, struct nes_mr, ibmw);
+}
+
+static inline struct nes_fmr *to_nesfmr(struct nes_mr *nesmr)
+{
+	return container_of(nesmr, struct nes_fmr, nesmr);
+}
+
+static inline struct nes_cq *to_nescq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct nes_cq, ibcq);
+}
+
+static inline struct nes_qp *to_nesqp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct nes_qp, ibqp);
+}
+
+
+
+/* nes.c */
+void nes_add_ref(struct ib_qp *);
+void nes_rem_ref(struct ib_qp *);
+struct ib_qp *nes_get_qp(struct ib_device *, int);
+
+
+/* nes_hw.c */
+struct nes_adapter *nes_init_adapter(struct nes_device *, u8);
+void  nes_nic_init_timer_defaults(struct nes_device *, u8);
+unsigned int nes_reset_adapter_ne020(struct nes_device *, u8 *);
+int nes_init_serdes(struct nes_device *, u8, u8, u8);
+void nes_init_csr_ne020(struct nes_device *, u8, u8);
+void nes_destroy_adapter(struct nes_adapter *);
+int nes_init_cqp(struct nes_device *);
+int nes_init_phy(struct nes_device *);
+int nes_init_nic_qp(struct nes_device *, struct net_device *);
+void nes_destroy_nic_qp(struct nes_vnic *);
+int nes_napi_isr(struct nes_device *);
+void nes_dpc(unsigned long);
+void nes_process_ceq(struct nes_device *, struct nes_hw_ceq *);
+void nes_process_aeq(struct nes_device *, struct nes_hw_aeq *);
+void nes_process_mac_intr(struct nes_device *, u32);
+void nes_nic_napi_ce_handler(struct nes_device *, struct nes_hw_nic_cq *);
+void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *);
+void nes_cqp_ce_handler(struct nes_device *, struct nes_hw_cq *);
+void nes_process_iwarp_aeqe(struct nes_device *, struct nes_hw_aeqe *);
+void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
+int nes_destroy_cqp(struct nes_device *);
+int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
+
+/* nes_nic.c */
+void nes_netdev_set_multicast_list(struct net_device *);
+void nes_netdev_exit(struct nes_vnic *);
+struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);
+void nes_netdev_destroy(struct net_device *);
+int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
+
+/* nes_cm.c */
+void *nes_cm_create(struct net_device *);
+int nes_cm_recv(struct sk_buff *, struct net_device *);
+void nes_update_arp(unsigned char *, u32, u32, u16, u16);
+void nes_manage_arp_cache(struct net_device *, unsigned char *, u32, u32);
+void nes_sock_release(struct nes_qp *, unsigned long *);
+struct nes_cm_core *nes_cm_alloc_core(void);
+void flush_wqes(struct nes_device *nesdev, struct nes_qp *, u32, u32);
+int nes_manage_apbvt(struct nes_vnic *, u32, u32, u32);
+int nes_cm_disconn(struct nes_qp *);
+void nes_cm_disconn_worker(void *);
+
+/* nes_verbs.c */
+int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32);
+int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
+struct nes_ib_device *nes_init_ofa_device(struct net_device *);
+void nes_destroy_ofa_device(struct nes_ib_device *);
+int nes_register_ofa_device(struct nes_ib_device *);
+void nes_unregister_ofa_device(struct nes_ib_device *);
+
+/* nes_util.c */
+int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *);
+void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16);
+void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *);
+void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16);
+void nes_read_10G_phy_reg(struct nes_device *, u16, u8);
+struct nes_cqp_request *nes_get_cqp_request(struct nes_device *);
+void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int);
+int nes_arp_table(struct nes_device *, u32, u8 *, u32);
+void nes_mh_fix(unsigned long);
+void nes_clc(unsigned long);
+void nes_dump_mem(unsigned int, void *, int);
+u32 nes_crc32(u32, u32, u32, u32, u8 *, u32, u32, u32);
+
+#endif	/* __NES_H */
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
new file mode 100644
index 0000000..bd5cfea
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -0,0 +1,3088 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#define TCPOPT_TIMESTAMP 8
+
+#include <asm/atomic.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/if_arp.h>
+#include <linux/notifier.h>
+#include <linux/net.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/random.h>
+#include <linux/list.h>
+#include <linux/threads.h>
+
+#include <net/neighbour.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+
+#include "nes.h"
+
+u32 cm_packets_sent;
+u32 cm_packets_bounced;
+u32 cm_packets_dropped;
+u32 cm_packets_retrans;
+u32 cm_packets_created;
+u32 cm_packets_received;
+u32 cm_listens_created;
+u32 cm_listens_destroyed;
+u32 cm_backlog_drops;
+atomic_t cm_loopbacks;
+atomic_t cm_nodes_created;
+atomic_t cm_nodes_destroyed;
+atomic_t cm_accel_dropped_pkts;
+atomic_t cm_resets_recvd;
+
+static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *);
+static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *,
+		struct nes_vnic *, struct nes_cm_info *);
+static int add_ref_cm_node(struct nes_cm_node *);
+static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
+static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *);
+
+
+/* External CM API Interface */
+/* instance of function pointers for client API */
+/* set address of this instance to cm_core->cm_ops at cm_core alloc */
+static struct nes_cm_ops nes_cm_api = {
+	mini_cm_accelerated,
+	mini_cm_listen,
+	mini_cm_del_listen,
+	mini_cm_connect,
+	mini_cm_close,
+	mini_cm_accept,
+	mini_cm_reject,
+	mini_cm_recv_pkt,
+	mini_cm_dealloc_core,
+	mini_cm_get,
+	mini_cm_set
+};
+
+struct nes_cm_core *g_cm_core;
+
+atomic_t cm_connects;
+atomic_t cm_accepts;
+atomic_t cm_disconnects;
+atomic_t cm_closes;
+atomic_t cm_connecteds;
+atomic_t cm_connect_reqs;
+atomic_t cm_rejects;
+
+
+/**
+ * create_event
+ */
+static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
+		enum nes_cm_event_type type)
+{
+	struct nes_cm_event *event;
+
+	if (!cm_node->cm_id)
+		return NULL;
+
+	/* allocate an empty event */
+	event = kzalloc(sizeof(*event), GFP_ATOMIC);
+
+	if (!event)
+		return NULL;
+
+	event->type = type;
+	event->cm_node = cm_node;
+	event->cm_info.rem_addr = cm_node->rem_addr;
+	event->cm_info.loc_addr = cm_node->loc_addr;
+	event->cm_info.rem_port = cm_node->rem_port;
+	event->cm_info.loc_port = cm_node->loc_port;
+	event->cm_info.cm_id = cm_node->cm_id;
+
+	nes_debug(NES_DBG_CM, "Created event=%p, type=%u, dst_addr=%08x[%x],"
+			" src_addr=%08x[%x]\n",
+			event, type,
+			event->cm_info.loc_addr, event->cm_info.loc_port,
+			event->cm_info.rem_addr, event->cm_info.rem_port);
+
+	nes_cm_post_event(event);
+	return event;
+}
+
+
+/**
+ * send_mpa_request
+ */
+int send_mpa_request(struct nes_cm_node *cm_node)
+{
+	struct sk_buff *skb;
+	int ret;
+
+	skb = get_free_pkt(cm_node);
+	if (!skb) {
+		nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+		return -1;
+	}
+
+	/* send an MPA Request frame */
+	form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame,
+			cm_node->mpa_frame_size, SET_ACK);
+
+	ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
+	if (ret < 0) {
+		return ret;
+	}
+
+	return 0;
+}
+
+
+/**
+ * recv_mpa - process a received TCP pkt, we are expecting an
+ * IETF MPA frame
+ */
+static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len)
+{
+	struct ietf_mpa_frame *mpa_frame;
+
+	/* assume req frame is in tcp data payload */
+	if (len < sizeof(struct ietf_mpa_frame)) {
+		nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len);
+		return -1;
+	}
+
+	mpa_frame = (struct ietf_mpa_frame *)buffer;
+	cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
+
+	if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
+		nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
+				" complete (%x + %x != %x)\n",
+				cm_node->mpa_frame_size, (u32)sizeof(struct ietf_mpa_frame), len);
+		return -1;
+	}
+
+	/* copy entire MPA frame to our cm_node's frame */
+	memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame),
+			cm_node->mpa_frame_size);
+
+	return 0;
+}
+
+
+/**
+ * handle_exception_pkt - process an exception packet.
+ * We have been in a TSA state, and we have now received SW
+ * TCP/IP traffic should be a FIN request or IP pkt with options
+ */
+static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb)
+{
+	int ret = 0;
+	struct tcphdr *tcph = tcp_hdr(skb);
+
+	/* first check to see if this a FIN pkt */
+	if (tcph->fin) {
+		/* we need to ACK the FIN request */
+		send_ack(cm_node);
+
+		/* check which side we are (client/server) and set next state accordingly */
+		if (cm_node->tcp_cntxt.client)
+			cm_node->state = NES_CM_STATE_CLOSING;
+		else {
+			/* we are the server side */
+			cm_node->state = NES_CM_STATE_CLOSE_WAIT;
+			/* since this is a self contained CM we don't wait for */
+			/* an APP to close us, just send final FIN immediately */
+			ret = send_fin(cm_node, NULL);
+			cm_node->state = NES_CM_STATE_LAST_ACK;
+		}
+	} else {
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+
+/**
+ * form_cm_frame - get a free packet and build empty frame Use
+ * node info to build.
+ */
+struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node,
+		void *options, u32 optionsize, void *data, u32 datasize, u8 flags)
+{
+	struct tcphdr *tcph;
+	struct iphdr *iph;
+	struct ethhdr *ethh;
+	u8 *buf;
+	u16 packetsize = sizeof(*iph);
+
+	packetsize += sizeof(*tcph);
+	packetsize +=  optionsize + datasize;
+
+	memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph));
+
+	skb->len = 0;
+	buf = skb_put(skb, packetsize + ETH_HLEN);
+
+	ethh = (struct ethhdr *) buf;
+	buf += ETH_HLEN;
+
+	iph = (struct iphdr *)buf;
+	buf += sizeof(*iph);
+	tcph = (struct tcphdr *)buf;
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, ETH_HLEN);
+	skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph));
+	buf += sizeof(*tcph);
+
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	skb->protocol = htons(0x800);
+	skb->data_len = 0;
+	skb->mac_len = ETH_HLEN;
+
+	memcpy(ethh->h_dest, cm_node->rem_mac, ETH_ALEN);
+	memcpy(ethh->h_source, cm_node->loc_mac, ETH_ALEN);
+	ethh->h_proto = htons(0x0800);
+
+	iph->version = IPVERSION;
+	iph->ihl = 5;		/* 5 * 4Byte words, IP headr len */
+	iph->tos = 0;
+	iph->tot_len = htons(packetsize);
+	iph->id = htons(++cm_node->tcp_cntxt.loc_id);
+
+	iph->frag_off = htons(0x4000);
+	iph->ttl = 0x40;
+	iph->protocol = 0x06;	/* IPPROTO_TCP */
+
+	iph->saddr = htonl(cm_node->loc_addr);
+	iph->daddr = htonl(cm_node->rem_addr);
+
+	tcph->source = htons(cm_node->loc_port);
+	tcph->dest = htons(cm_node->rem_port);
+	tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+	if (flags & SET_ACK) {
+		cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+		tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+		tcph->ack = 1;
+	} else
+		tcph->ack_seq = 0;
+
+	if (flags & SET_SYN) {
+		cm_node->tcp_cntxt.loc_seq_num++;
+		tcph->syn = 1;
+	} else
+		cm_node->tcp_cntxt.loc_seq_num += datasize;	/* data (no headers) */
+
+	if (flags & SET_FIN)
+		tcph->fin = 1;
+
+	if (flags & SET_RST)
+		tcph->rst = 1;
+
+	tcph->doff = (u16)((sizeof(*tcph) + optionsize + 3) >> 2);
+	tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+	tcph->urg_ptr = 0;
+	if (optionsize)
+		memcpy(buf, options, optionsize);
+	buf += optionsize;
+	if (datasize)
+		memcpy(buf, data, datasize);
+
+	skb_shinfo(skb)->nr_frags = 0;
+	cm_packets_created++;
+
+	return skb;
+}
+
+
+/**
+ * print_core - dump a cm core
+ */
+static void print_core(struct nes_cm_core *core)
+{
+	nes_debug(NES_DBG_CM, "---------------------------------------------\n");
+	nes_debug(NES_DBG_CM, "CM Core  -- (core = %p )\n", core);
+	if (!core)
+		return;
+	nes_debug(NES_DBG_CM, "---------------------------------------------\n");
+	nes_debug(NES_DBG_CM, "Session ID    : %u \n", atomic_read(&core->session_id));
+
+	nes_debug(NES_DBG_CM, "State         : %u \n",  core->state);
+
+	nes_debug(NES_DBG_CM, "Tx Free cnt   : %u \n", skb_queue_len(&core->tx_free_list));
+	nes_debug(NES_DBG_CM, "Listen Nodes  : %u \n", atomic_read(&core->listen_node_cnt));
+	nes_debug(NES_DBG_CM, "Active Nodes  : %u \n", atomic_read(&core->node_cnt));
+
+	nes_debug(NES_DBG_CM, "core          : %p \n", core);
+
+	nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
+}
+
+
+/**
+ * schedule_nes_timer
+ * note - cm_node needs to be protected before calling this. Encase in:
+ *			rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node);
+ */
+int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
+		enum nes_timer_type type, int send_retrans,
+		int close_when_complete)
+{
+	unsigned long  flags;
+	struct nes_cm_core *cm_core;
+	struct nes_timer_entry *new_send;
+	int ret = 0;
+	u32 was_timer_set;
+
+	new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
+	if (!new_send)
+		return -1;
+	if (!cm_node)
+		return -EINVAL;
+
+	/* new_send->timetosend = currenttime */
+	new_send->retrycount = NES_DEFAULT_RETRYS;
+	new_send->retranscount = NES_DEFAULT_RETRANS;
+	new_send->skb = skb;
+	new_send->timetosend = jiffies;
+	new_send->type = type;
+	new_send->netdev = cm_node->netdev;
+	new_send->send_retrans = send_retrans;
+	new_send->close_when_complete = close_when_complete;
+
+	if (type == NES_TIMER_TYPE_CLOSE) {
+		new_send->timetosend += (HZ/2);	/* TODO: decide on the correct value here */
+		spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+		list_add_tail(&new_send->list, &cm_node->recv_list);
+		spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+	}
+
+	if (type == NES_TIMER_TYPE_SEND) {
+		new_send->seq_num = htonl(tcp_hdr(skb)->seq);
+		atomic_inc(&new_send->skb->users);
+
+		ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev);
+		if (ret != NETDEV_TX_OK) {
+			nes_debug(NES_DBG_CM, "Error sending packet %p (jiffies = %lu)\n",
+					new_send, jiffies);
+			atomic_dec(&new_send->skb->users);
+			new_send->timetosend = jiffies;
+		} else {
+			cm_packets_sent++;
+			if (!send_retrans) {
+				if (close_when_complete)
+					rem_ref_cm_node(cm_node->cm_core, cm_node);
+				dev_kfree_skb_any(new_send->skb);
+				kfree(new_send);
+				return ret;
+			}
+			new_send->timetosend = jiffies + NES_RETRY_TIMEOUT;
+		}
+		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+		list_add_tail(&new_send->list, &cm_node->retrans_list);
+		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+	}
+	if (type == NES_TIMER_TYPE_RECV) {
+		new_send->seq_num = htonl(tcp_hdr(skb)->seq);
+		new_send->timetosend = jiffies;
+		spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+		list_add_tail(&new_send->list, &cm_node->recv_list);
+		spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+	}
+	cm_core = cm_node->cm_core;
+
+	was_timer_set = timer_pending(&cm_core->tcp_timer);
+
+	if (!was_timer_set) {
+		cm_core->tcp_timer.expires = new_send->timetosend;
+		add_timer(&cm_core->tcp_timer);
+	}
+
+	return ret;
+}
+
+
+/**
+ * nes_cm_timer_tick
+ */
+void nes_cm_timer_tick(unsigned long pass)
+{
+	unsigned long flags, qplockflags;
+	unsigned long nexttimeout = jiffies + NES_LONG_TIME;
+	struct iw_cm_id *cm_id;
+	struct nes_cm_node *cm_node;
+	struct nes_timer_entry *send_entry, *recv_entry;
+	struct list_head *list_core, *list_core_temp;
+	struct list_head *list_node, *list_node_temp;
+	struct nes_cm_core *cm_core = g_cm_core;
+	struct nes_qp *nesqp;
+	struct sk_buff *skb;
+	u32 settimer = 0;
+	int ret = NETDEV_TX_OK;
+	int    node_done;
+
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+	list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+		cm_node = container_of(list_node, struct nes_cm_node, list);
+		add_ref_cm_node(cm_node);
+		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+		spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+		list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) {
+			recv_entry = container_of(list_core, struct nes_timer_entry, list);
+			if ((time_after(recv_entry->timetosend, jiffies)) &&
+					(recv_entry->type == NES_TIMER_TYPE_CLOSE)) {
+				if (nexttimeout > recv_entry->timetosend || !settimer) {
+					nexttimeout = recv_entry->timetosend;
+					settimer = 1;
+				}
+				continue;
+			}
+			list_del(&recv_entry->list);
+			cm_id = cm_node->cm_id;
+			spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+			if (recv_entry->type == NES_TIMER_TYPE_CLOSE) {
+				nesqp = (struct nes_qp *)recv_entry->skb;
+				spin_lock_irqsave(&nesqp->lock, qplockflags);
+				if (nesqp->cm_id) {
+					nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d: "
+							"****** HIT A NES_TIMER_TYPE_CLOSE"
+							" with something to do!!! ******\n",
+							nesqp->hwqp.qp_id, cm_id,
+							atomic_read(&nesqp->refcount));
+					nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+					nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
+					nesqp->ibqp_state = IB_QPS_ERR;
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_cm_disconn(nesqp);
+				} else {
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d:"
+							" ****** HIT A NES_TIMER_TYPE_CLOSE"
+							" with nothing to do!!! ******\n",
+							nesqp->hwqp.qp_id, cm_id,
+							atomic_read(&nesqp->refcount));
+					nes_rem_ref(&nesqp->ibqp);
+				}
+				if (cm_id)
+					cm_id->rem_ref(cm_id);
+			}
+			kfree(recv_entry);
+			spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+		}
+		spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+
+		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+		node_done = 0;
+		list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) {
+			if (node_done) {
+				break;
+			}
+			send_entry = container_of(list_core, struct nes_timer_entry, list);
+			if (time_after(send_entry->timetosend, jiffies)) {
+				if (cm_node->state != NES_CM_STATE_TSA) {
+					if ((nexttimeout > send_entry->timetosend) || !settimer) {
+						nexttimeout = send_entry->timetosend;
+						settimer = 1;
+					}
+					node_done = 1;
+					continue;
+				} else {
+					list_del(&send_entry->list);
+					skb = send_entry->skb;
+					spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+					dev_kfree_skb_any(skb);
+					kfree(send_entry);
+					spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+					continue;
+				}
+			}
+			if (send_entry->type == NES_TIMER_NODE_CLEANUP) {
+				list_del(&send_entry->list);
+				spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+				kfree(send_entry);
+				spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+				continue;
+			}
+			if ((send_entry->seq_num < cm_node->tcp_cntxt.rem_ack_num) ||
+					(cm_node->state == NES_CM_STATE_TSA) ||
+					(cm_node->state == NES_CM_STATE_CLOSED)) {
+				skb = send_entry->skb;
+				list_del(&send_entry->list);
+				spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+				kfree(send_entry);
+				dev_kfree_skb_any(skb);
+				spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+				continue;
+			}
+
+			if (!send_entry->retranscount || !send_entry->retrycount) {
+				cm_packets_dropped++;
+				skb = send_entry->skb;
+				list_del(&send_entry->list);
+				spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+				dev_kfree_skb_any(skb);
+				kfree(send_entry);
+				if (cm_node->state == NES_CM_STATE_SYN_RCVD) {
+					/* this node never even generated an indication up to the cm */
+					rem_ref_cm_node(cm_core, cm_node);
+				} else {
+					cm_node->state = NES_CM_STATE_CLOSED;
+					create_event(cm_node, NES_CM_EVENT_ABORTED);
+				}
+				spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+				continue;
+			}
+			/* this seems like the correct place, but leave send entry unprotected */
+			// spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+			atomic_inc(&send_entry->skb->users);
+			cm_packets_retrans++;
+			nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p,"
+					" jiffies = %lu, time to send =  %lu, retranscount = %u, "
+					"send_entry->seq_num = 0x%08X, cm_node->tcp_cntxt.rem_ack_num = 0x%08X\n",
+					send_entry, cm_node, jiffies, send_entry->timetosend, send_entry->retranscount,
+					send_entry->seq_num, cm_node->tcp_cntxt.rem_ack_num);
+
+			spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+			ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev);
+			if (ret != NETDEV_TX_OK) {
+				cm_packets_bounced++;
+				atomic_dec(&send_entry->skb->users);
+				send_entry->retrycount--;
+				nexttimeout = jiffies + NES_SHORT_TIME;
+				settimer = 1;
+				node_done = 1;
+				spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+				continue;
+			} else {
+				cm_packets_sent++;
+			}
+			spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+			list_del(&send_entry->list);
+			nes_debug(NES_DBG_CM, "Packet Sent: retrans count = %u, retry count = %u.\n",
+					send_entry->retranscount, send_entry->retrycount);
+			if (send_entry->send_retrans) {
+				send_entry->retranscount--;
+				send_entry->timetosend = jiffies + NES_RETRY_TIMEOUT;
+				if (nexttimeout > send_entry->timetosend || !settimer) {
+					nexttimeout = send_entry->timetosend;
+					settimer = 1;
+				}
+				list_add(&send_entry->list, &cm_node->retrans_list);
+				continue;
+			} else {
+				int close_when_complete;
+				skb = send_entry->skb;
+				close_when_complete = send_entry->close_when_complete;
+				spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+				if (close_when_complete) {
+					BUG_ON(atomic_read(&cm_node->ref_count) == 1);
+					rem_ref_cm_node(cm_core, cm_node);
+				}
+				dev_kfree_skb_any(skb);
+				kfree(send_entry);
+				spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+				continue;
+			}
+		}
+		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+
+		rem_ref_cm_node(cm_core, cm_node);
+
+		spin_lock_irqsave(&cm_core->ht_lock, flags);
+		if (ret != NETDEV_TX_OK)
+			break;
+	}
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	if (settimer) {
+		if (!timer_pending(&cm_core->tcp_timer)) {
+			cm_core->tcp_timer.expires  = nexttimeout;
+			add_timer(&cm_core->tcp_timer);
+		}
+	}
+}
+
+
+/**
+ * send_syn
+ */
+int send_syn(struct nes_cm_node *cm_node, u32 sendack)
+{
+	int ret;
+	int flags = SET_SYN;
+	struct sk_buff *skb;
+	char optionsbuffer[sizeof(struct option_mss) +
+			sizeof(struct option_windowscale) +
+			sizeof(struct option_base) + 1];
+
+	int optionssize = 0;
+	/* Sending MSS option */
+	union all_known_options *options;
+
+	if (!cm_node)
+		return -EINVAL;
+
+	options = (union all_known_options *)&optionsbuffer[optionssize];
+	options->as_mss.optionnum = OPTION_NUMBER_MSS;
+	options->as_mss.length = sizeof(struct option_mss);
+	options->as_mss.mss = htons(cm_node->tcp_cntxt.mss);
+	optionssize += sizeof(struct option_mss);
+
+	options = (union all_known_options *)&optionsbuffer[optionssize];
+	options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE;
+	options->as_windowscale.length = sizeof(struct option_windowscale);
+	options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
+	optionssize += sizeof(struct option_windowscale);
+
+	if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)
+			) {
+		options = (union all_known_options *)&optionsbuffer[optionssize];
+		options->as_base.optionnum = OPTION_NUMBER_WRITE0;
+		options->as_base.length = sizeof(struct option_base);
+		optionssize += sizeof(struct option_base);
+		/* we need the size to be a multiple of 4 */
+		options = (union all_known_options *)&optionsbuffer[optionssize];
+		options->as_end = 1;
+		optionssize += 1;
+		options = (union all_known_options *)&optionsbuffer[optionssize];
+		options->as_end = 1;
+		optionssize += 1;
+	}
+
+	options = (union all_known_options *)&optionsbuffer[optionssize];
+	options->as_end = OPTION_NUMBER_END;
+	optionssize += 1;
+
+	skb = get_free_pkt(cm_node);
+	if (!skb) {
+		nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+		return -1;
+	}
+
+	if (sendack)
+		flags |= SET_ACK;
+
+	form_cm_frame(skb, cm_node, optionsbuffer, optionssize, NULL, 0, flags);
+	ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
+
+	return ret;
+}
+
+
+/**
+ * send_reset
+ */
+int send_reset(struct nes_cm_node *cm_node)
+{
+	int ret;
+	struct sk_buff *skb = get_free_pkt(cm_node);
+	int flags = SET_RST | SET_ACK;
+
+	if (!skb) {
+		nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+		return -1;
+	}
+
+	add_ref_cm_node(cm_node);
+	form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags);
+	ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1);
+
+	return ret;
+}
+
+
+/**
+ * send_ack
+ */
+int send_ack(struct nes_cm_node *cm_node)
+{
+	int ret;
+	struct sk_buff *skb = get_free_pkt(cm_node);
+
+	if (!skb) {
+		nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+		return -1;
+	}
+
+	form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK);
+	ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 0);
+
+	return ret;
+}
+
+
+/**
+ * send_fin
+ */
+int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
+{
+	int ret;
+
+	/* if we didn't get a frame get one */
+	if (!skb)
+		skb = get_free_pkt(cm_node);
+
+	if (!skb) {
+		nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+		return -1;
+	}
+
+	form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK | SET_FIN);
+	ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
+
+	return ret;
+}
+
+
+/**
+ * get_free_pkt
+ */
+struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node)
+{
+	struct sk_buff *skb, *new_skb;
+
+	/* check to see if we need to repopulate the free tx pkt queue */
+	if (skb_queue_len(&cm_node->cm_core->tx_free_list) < NES_CM_FREE_PKT_LO_WATERMARK) {
+		while (skb_queue_len(&cm_node->cm_core->tx_free_list) <
+				cm_node->cm_core->free_tx_pkt_max) {
+			/* replace the frame we took, we won't get it back */
+			new_skb = dev_alloc_skb(cm_node->cm_core->mtu);
+			BUG_ON(!new_skb);
+			/* add a replacement frame to the free tx list head */
+			skb_queue_head(&cm_node->cm_core->tx_free_list, new_skb);
+		}
+	}
+
+	skb = skb_dequeue(&cm_node->cm_core->tx_free_list);
+
+	return skb;
+}
+
+
+/**
+ * make_hashkey - generate hash key from node tuple
+ */
+static inline int make_hashkey(u16 loc_port, nes_addr_t loc_addr, u16 rem_port,
+		nes_addr_t rem_addr)
+{
+	u32 hashkey = 0;
+
+	hashkey = loc_addr + rem_addr + loc_port + rem_port;
+	hashkey = (hashkey % NES_CM_HASHTABLE_SIZE);
+
+	return hashkey;
+}
+
+
+/**
+ * find_node - find a cm node that matches the reference cm node
+ */
+static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
+		u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
+{
+	unsigned long flags;
+	u32 hashkey;
+	struct list_head *list_pos;
+	struct list_head *hte;
+	struct nes_cm_node *cm_node;
+
+	/* make a hash index key for this packet */
+	hashkey = make_hashkey(loc_port, loc_addr, rem_port, rem_addr);
+
+	/* get a handle on the hte */
+	hte = &cm_core->connected_nodes;
+
+	nes_debug(NES_DBG_CM, "Searching for an owner node:%x:%x from core %p->%p\n",
+			loc_addr, loc_port, cm_core, hte);
+
+	/* walk list and find cm_node associated with this session ID */
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+	list_for_each(list_pos, hte) {
+		cm_node = container_of(list_pos, struct nes_cm_node, list);
+		/* compare quad, return node handle if a match */
+		nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n",
+				cm_node->loc_addr, cm_node->loc_port,
+				loc_addr, loc_port,
+				cm_node->rem_addr, cm_node->rem_port,
+				rem_addr, rem_port);
+		if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) &&
+				(cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
+			add_ref_cm_node(cm_node);
+			spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+			return cm_node;
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	/* no owner node */
+	return NULL;
+}
+
+
+/**
+ * find_listener - find a cm node listening on this addr-port pair
+ */
+static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
+		nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
+{
+	unsigned long flags;
+	struct list_head *listen_list;
+	struct nes_cm_listener *listen_node;
+
+	/* walk list and find cm_node associated with this session ID */
+	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+	list_for_each(listen_list, &cm_core->listen_list.list) {
+		listen_node = container_of(listen_list, struct nes_cm_listener, list);
+		/* compare node pair, return node handle if a match */
+		if (((listen_node->loc_addr == dst_addr) ||
+				listen_node->loc_addr == 0x00000000) &&
+				(listen_node->loc_port == dst_port) &&
+				(listener_state & listen_node->listener_state)) {
+			atomic_inc(&listen_node->ref_count);
+			spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+			return listen_node;
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+	nes_debug(NES_DBG_CM, "Unable to find listener- %x:%x\n",
+			dst_addr, dst_port);
+
+	/* no listener */
+	return NULL;
+}
+
+
+/**
+ * add_hte_node - add a cm node to the hash table
+ */
+static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
+{
+	unsigned long flags;
+	u32 hashkey;
+	struct list_head *hte;
+
+	if (!cm_node || !cm_core)
+		return -EINVAL;
+
+	nes_debug(NES_DBG_CM, "Adding Node to Active Connection HT\n");
+
+	/* first, make an index into our hash table */
+	hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr,
+			cm_node->rem_port, cm_node->rem_addr);
+	cm_node->hashkey = hashkey;
+
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+	/* get a handle on the hash table element (list head for this slot) */
+	hte = &cm_core->connected_nodes;
+	list_add_tail(&cm_node->list, hte);
+	atomic_inc(&cm_core->ht_node_cnt);
+
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	return 0;
+}
+
+
+/**
+ * mini_cm_dec_refcnt_listen
+ */
+static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
+		struct nes_cm_listener *listener, int free_hanging_nodes)
+{
+	int ret = 1;
+	unsigned long flags;
+	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+	if (!atomic_dec_return(&listener->ref_count)) {
+		list_del(&listener->list);
+
+		/* decrement our listen node count */
+		atomic_dec(&cm_core->listen_node_cnt);
+
+		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+		if (listener->nesvnic) {
+			nes_manage_apbvt(listener->nesvnic, listener->loc_port,
+					PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
+		}
+
+		nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
+
+		kfree(listener);
+		ret = 0;
+		cm_listens_destroyed++;
+	} else {
+		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+	}
+	if (listener) {
+		if (atomic_read(&listener->pend_accepts_cnt) > 0)
+			nes_debug(NES_DBG_CM, "destroying listener (%p)"
+					" with non-zero pending accepts=%u\n",
+					listener, atomic_read(&listener->pend_accepts_cnt));
+	}
+
+	return ret;
+}
+
+
+/**
+ * mini_cm_del_listen
+ */
+static int mini_cm_del_listen(struct nes_cm_core *cm_core,
+		struct nes_cm_listener *listener)
+{
+	listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE;
+	listener->cm_id = NULL; /* going to be destroyed pretty soon */
+	return mini_cm_dec_refcnt_listen(cm_core, listener, 1);
+}
+
+
+/**
+ * mini_cm_accelerated
+ */
+static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
+		struct nes_cm_node *cm_node)
+{
+	u32 was_timer_set;
+	cm_node->accelerated = 1;
+
+	if (cm_node->accept_pend) {
+		BUG_ON(!cm_node->listener);
+		atomic_dec(&cm_node->listener->pend_accepts_cnt);
+		BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
+	}
+
+	was_timer_set = timer_pending(&cm_core->tcp_timer);
+	if (!was_timer_set) {
+		cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME;
+		add_timer(&cm_core->tcp_timer);
+	}
+
+	return 0;
+}
+
+
+/**
+ * nes_addr_send_arp
+ */
+static void nes_addr_send_arp(u32 dst_ip)
+{
+	struct rtable *rt;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof fl);
+	fl.nl_u.ip4_u.daddr = htonl(dst_ip);
+	if (ip_route_output_key(&init_net, &rt, &fl)) {
+		printk("%s: ip_route_output_key failed for 0x%08X\n",
+				__FUNCTION__, dst_ip);
+		return;
+	}
+
+	neigh_event_send(rt->u.dst.neighbour, NULL);
+	ip_rt_put(rt);
+}
+
+
+/**
+ * make_cm_node - create a new instance of a cm node
+ */
+static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
+		struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
+		struct nes_cm_listener *listener)
+{
+	struct nes_cm_node *cm_node;
+	struct timespec ts;
+	int arpindex = 0;
+	struct nes_device *nesdev;
+	struct nes_adapter *nesadapter;
+
+	/* create an hte and cm_node for this instance */
+	cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
+	if (!cm_node)
+		return NULL;
+
+	/* set our node specific transport info */
+	cm_node->loc_addr = cm_info->loc_addr;
+	cm_node->rem_addr = cm_info->rem_addr;
+	cm_node->loc_port = cm_info->loc_port;
+	cm_node->rem_port = cm_info->rem_port;
+	cm_node->send_write0 = send_first;
+	nes_debug(NES_DBG_CM, "Make node addresses : loc = %x:%x, rem = %x:%x\n",
+			cm_node->loc_addr, cm_node->loc_port, cm_node->rem_addr, cm_node->rem_port);
+	cm_node->listener = listener;
+	cm_node->netdev = nesvnic->netdev;
+	cm_node->cm_id = cm_info->cm_id;
+	memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN);
+
+	nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n",
+			cm_node->listener, cm_node->cm_id);
+
+	INIT_LIST_HEAD(&cm_node->retrans_list);
+	spin_lock_init(&cm_node->retrans_list_lock);
+	INIT_LIST_HEAD(&cm_node->recv_list);
+	spin_lock_init(&cm_node->recv_list_lock);
+
+	cm_node->loopbackpartner = NULL;
+	atomic_set(&cm_node->ref_count, 1);
+	/* associate our parent CM core */
+	cm_node->cm_core = cm_core;
+	cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID;
+	cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
+	cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
+			NES_CM_DEFAULT_RCV_WND_SCALE;
+	ts = current_kernel_time();
+	cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
+	cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
+			sizeof(struct tcphdr) - ETH_HLEN;
+	cm_node->tcp_cntxt.rcv_nxt = 0;
+	/* get a unique session ID , add thread_id to an upcounter to handle race */
+	atomic_inc(&cm_core->node_cnt);
+	atomic_inc(&cm_core->session_id);
+	cm_node->session_id = (u32)(atomic_read(&cm_core->session_id) + current->tgid);
+	cm_node->conn_type = cm_info->conn_type;
+	cm_node->apbvt_set = 0;
+	cm_node->accept_pend = 0;
+
+	cm_node->nesvnic = nesvnic;
+	/* get some device handles, for arp lookup */
+	nesdev = nesvnic->nesdev;
+	nesadapter = nesdev->nesadapter;
+
+	cm_node->loopbackpartner = NULL;
+	/* get the mac addr for the remote node */
+	arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+	if (arpindex < 0) {
+		kfree(cm_node);
+		nes_addr_send_arp(cm_info->rem_addr);
+		return NULL;
+	}
+
+	/* copy the mac addr to node context */
+	memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN);
+	nes_debug(NES_DBG_CM, "Remote mac addr from arp table:%02x,"
+			" %02x, %02x, %02x, %02x, %02x\n",
+			cm_node->rem_mac[0], cm_node->rem_mac[1],
+			cm_node->rem_mac[2], cm_node->rem_mac[3],
+			cm_node->rem_mac[4], cm_node->rem_mac[5]);
+
+	add_hte_node(cm_core, cm_node);
+	atomic_inc(&cm_nodes_created);
+
+	return cm_node;
+}
+
+
+/**
+ * add_ref_cm_node - destroy an instance of a cm node
+ */
+static int add_ref_cm_node(struct nes_cm_node *cm_node)
+{
+	atomic_inc(&cm_node->ref_count);
+	return 0;
+}
+
+
+/**
+ * rem_ref_cm_node - destroy an instance of a cm node
+ */
+static int rem_ref_cm_node(struct nes_cm_core *cm_core,
+		struct nes_cm_node *cm_node)
+{
+	unsigned long flags, qplockflags;
+	struct nes_timer_entry *send_entry;
+	struct nes_timer_entry *recv_entry;
+	struct iw_cm_id *cm_id;
+	struct list_head *list_core, *list_node_temp;
+	struct nes_qp *nesqp;
+
+	if (!cm_node)
+		return -EINVAL;
+
+	spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags);
+	if (atomic_dec_return(&cm_node->ref_count)) {
+		spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+		return 0;
+	}
+	list_del(&cm_node->list);
+	atomic_dec(&cm_core->ht_node_cnt);
+	spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+
+	/* if the node is destroyed before connection was accelerated */
+	if (!cm_node->accelerated && cm_node->accept_pend) {
+		BUG_ON(!cm_node->listener);
+		atomic_dec(&cm_node->listener->pend_accepts_cnt);
+		BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
+	}
+
+	spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+	list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) {
+		send_entry = container_of(list_core, struct nes_timer_entry, list);
+		list_del(&send_entry->list);
+		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+		dev_kfree_skb_any(send_entry->skb);
+		kfree(send_entry);
+		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+		continue;
+	}
+	spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+
+	spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+	list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) {
+		recv_entry = container_of(list_core, struct nes_timer_entry, list);
+		list_del(&recv_entry->list);
+		cm_id = cm_node->cm_id;
+		spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+		if (recv_entry->type == NES_TIMER_TYPE_CLOSE) {
+			nesqp = (struct nes_qp *)recv_entry->skb;
+			spin_lock_irqsave(&nesqp->lock, qplockflags);
+			if (nesqp->cm_id) {
+				nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE"
+						" with something to do!!! ******\n",
+						nesqp->hwqp.qp_id, cm_id);
+				nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+				nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
+				nesqp->ibqp_state = IB_QPS_ERR;
+				spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+				nes_cm_disconn(nesqp);
+			} else {
+				spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+				nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE"
+						" with nothing to do!!! ******\n",
+						nesqp->hwqp.qp_id, cm_id);
+				nes_rem_ref(&nesqp->ibqp);
+			}
+			cm_id->rem_ref(cm_id);
+		} else if (recv_entry->type == NES_TIMER_TYPE_RECV) {
+			dev_kfree_skb_any(recv_entry->skb);
+		}
+		kfree(recv_entry);
+		spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+	}
+	spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+
+	if (cm_node->listener) {
+		mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
+	} else {
+		if (cm_node->apbvt_set && cm_node->nesvnic) {
+			nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
+					PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
+					NES_MANAGE_APBVT_DEL);
+		}
+	}
+
+	kfree(cm_node);
+	atomic_dec(&cm_core->node_cnt);
+	atomic_inc(&cm_nodes_destroyed);
+
+	return 0;
+}
+
+
+/**
+ * process_options
+ */
+static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 optionsize, u32 syn_packet)
+{
+	u32 tmp;
+	u32 offset = 0;
+	union all_known_options *all_options;
+	char got_mss_option = 0;
+
+	while (offset < optionsize) {
+		all_options = (union all_known_options *)(optionsloc + offset);
+		switch (all_options->as_base.optionnum) {
+			case OPTION_NUMBER_END:
+				offset = optionsize;
+				break;
+			case OPTION_NUMBER_NONE:
+				offset += 1;
+				continue;
+			case OPTION_NUMBER_MSS:
+				nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d Size: %d\n",
+						__FUNCTION__,
+						all_options->as_mss.length, offset, optionsize);
+				got_mss_option = 1;
+				if (all_options->as_mss.length != 4) {
+					return 1;
+				} else {
+					tmp = ntohs(all_options->as_mss.mss);
+					if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss)
+						cm_node->tcp_cntxt.mss = tmp;
+				}
+				break;
+			case OPTION_NUMBER_WINDOW_SCALE:
+				cm_node->tcp_cntxt.snd_wscale = all_options->as_windowscale.shiftcount;
+				break;
+			case OPTION_NUMBER_WRITE0:
+				cm_node->send_write0 = 1;
+				break;
+			default:
+				nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
+						all_options->as_base.optionnum);
+				break;
+		}
+		offset += all_options->as_base.length;
+	}
+	if ((!got_mss_option) && (syn_packet))
+		cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS;
+	return 0;
+}
+
+
+/**
+ * process_packet
+ */
+int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
+		struct nes_cm_core *cm_core)
+{
+	int optionsize;
+	int datasize;
+	int ret = 0;
+	struct tcphdr *tcph = tcp_hdr(skb);
+	u32 inc_sequence;
+	if (cm_node->state == NES_CM_STATE_SYN_SENT && tcph->syn) {
+		inc_sequence = ntohl(tcph->seq);
+		cm_node->tcp_cntxt.rcv_nxt = inc_sequence;
+	}
+
+	if ((!tcph) || (cm_node->state == NES_CM_STATE_TSA)) {
+		BUG_ON(!tcph);
+		atomic_inc(&cm_accel_dropped_pkts);
+		return -1;
+	}
+
+	if (tcph->rst) {
+		atomic_inc(&cm_resets_recvd);
+		nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u. refcnt=%d\n",
+				cm_node, cm_node->state, atomic_read(&cm_node->ref_count));
+		switch (cm_node->state) {
+			case NES_CM_STATE_LISTENING:
+				rem_ref_cm_node(cm_core, cm_node);
+				break;
+			case NES_CM_STATE_TSA:
+			case NES_CM_STATE_CLOSED:
+				break;
+			case NES_CM_STATE_SYN_RCVD:
+					nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X,"
+							" remote 0x%08X:%04X, node state = %u\n",
+							cm_node->loc_addr, cm_node->loc_port,
+							cm_node->rem_addr, cm_node->rem_port,
+							cm_node->state);
+				rem_ref_cm_node(cm_core, cm_node);
+				break;
+			case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
+			case NES_CM_STATE_ESTABLISHED:
+			case NES_CM_STATE_MPAREQ_SENT:
+			default:
+					nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X,"
+							" remote 0x%08X:%04X, node state = %u refcnt=%d\n",
+							cm_node->loc_addr, cm_node->loc_port,
+							cm_node->rem_addr, cm_node->rem_port,
+							cm_node->state, atomic_read(&cm_node->ref_count));
+				// create event
+				cm_node->state = NES_CM_STATE_CLOSED;
+
+				create_event(cm_node, NES_CM_EVENT_ABORTED);
+				break;
+
+		}
+		return -1;
+	}
+
+	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+
+	skb_pull(skb, ip_hdr(skb)->ihl << 2);
+	skb_pull(skb, tcph->doff << 2);
+
+	datasize = skb->len;
+	inc_sequence = ntohl(tcph->seq);
+	nes_debug(NES_DBG_CM, "datasize = %u, sequence = 0x%08X, ack_seq = 0x%08X,"
+			" rcv_nxt = 0x%08X Flags: %s %s.\n",
+			datasize, inc_sequence, ntohl(tcph->ack_seq),
+			cm_node->tcp_cntxt.rcv_nxt, (tcph->syn ? "SYN":""),
+			(tcph->ack ? "ACK":""));
+
+	if (!tcph->syn && (inc_sequence != cm_node->tcp_cntxt.rcv_nxt)
+		) {
+		nes_debug(NES_DBG_CM, "dropping packet, datasize = %u, sequence = 0x%08X,"
+				" ack_seq = 0x%08X, rcv_nxt = 0x%08X Flags: %s.\n",
+				datasize, inc_sequence, ntohl(tcph->ack_seq),
+				cm_node->tcp_cntxt.rcv_nxt, (tcph->ack ? "ACK":""));
+		if (cm_node->state == NES_CM_STATE_LISTENING) {
+			rem_ref_cm_node(cm_core, cm_node);
+		}
+		return -1;
+	}
+
+		cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+
+
+	if (optionsize) {
+		u8 *optionsloc = (u8 *)&tcph[1];
+		if (process_options(cm_node, optionsloc, optionsize, (u32)tcph->syn)) {
+			nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __FUNCTION__, cm_node);
+			send_reset(cm_node);
+			if (cm_node->state != NES_CM_STATE_SYN_SENT)
+			rem_ref_cm_node(cm_core, cm_node);
+			return 0;
+		}
+	} else if (tcph->syn)
+		cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS;
+
+	cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
+			cm_node->tcp_cntxt.snd_wscale;
+
+	if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) {
+		cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+	}
+
+	if (tcph->ack) {
+		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+		switch (cm_node->state) {
+			case NES_CM_STATE_SYN_RCVD:
+			case NES_CM_STATE_SYN_SENT:
+				/* read and stash current sequence number */
+				if (cm_node->tcp_cntxt.rem_ack_num != cm_node->tcp_cntxt.loc_seq_num) {
+					nes_debug(NES_DBG_CM, "ERROR - cm_node->tcp_cntxt.rem_ack_num !="
+							" cm_node->tcp_cntxt.loc_seq_num\n");
+					send_reset(cm_node);
+					return 0;
+				}
+				if (cm_node->state == NES_CM_STATE_SYN_SENT)
+					cm_node->state = NES_CM_STATE_ONE_SIDE_ESTABLISHED;
+				else {
+						cm_node->state = NES_CM_STATE_ESTABLISHED;
+				}
+				break;
+			case NES_CM_STATE_LAST_ACK:
+				cm_node->state = NES_CM_STATE_CLOSED;
+				break;
+			case NES_CM_STATE_FIN_WAIT1:
+				cm_node->state = NES_CM_STATE_FIN_WAIT2;
+				break;
+			case NES_CM_STATE_CLOSING:
+				cm_node->state = NES_CM_STATE_TIME_WAIT;
+				/* need to schedule this to happen in 2MSL timeouts */
+				cm_node->state = NES_CM_STATE_CLOSED;
+				break;
+			case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
+			case NES_CM_STATE_ESTABLISHED:
+			case NES_CM_STATE_MPAREQ_SENT:
+			case NES_CM_STATE_CLOSE_WAIT:
+			case NES_CM_STATE_TIME_WAIT:
+			case NES_CM_STATE_CLOSED:
+				break;
+			case NES_CM_STATE_LISTENING:
+				nes_debug(NES_DBG_CM, "Received an ACK on a listening port (SYN %d)\n", tcph->syn);
+				cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+				send_reset(cm_node);
+				/* send_reset bumps refcount, this should have been a new node */
+				rem_ref_cm_node(cm_core, cm_node);
+				return -1;
+				break;
+			case NES_CM_STATE_TSA:
+				nes_debug(NES_DBG_CM, "Received a packet with the ack bit set while in TSA state\n");
+				break;
+			case NES_CM_STATE_UNKNOWN:
+			case NES_CM_STATE_INITED:
+			case NES_CM_STATE_ACCEPTING:
+			case NES_CM_STATE_FIN_WAIT2:
+			default:
+				nes_debug(NES_DBG_CM, "Received ack from unknown state: %x\n",
+						cm_node->state);
+				send_reset(cm_node);
+				break;
+		}
+	}
+
+	if (tcph->syn) {
+		if (cm_node->state == NES_CM_STATE_LISTENING) {
+			/* do not exceed backlog */
+			atomic_inc(&cm_node->listener->pend_accepts_cnt);
+			if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
+					cm_node->listener->backlog) {
+				nes_debug(NES_DBG_CM, "drop syn due to backlog pressure \n");
+				cm_backlog_drops++;
+				atomic_dec(&cm_node->listener->pend_accepts_cnt);
+				rem_ref_cm_node(cm_core, cm_node);
+				return 0;
+			}
+			cm_node->accept_pend = 1;
+
+		}
+		if (datasize == 0)
+			cm_node->tcp_cntxt.rcv_nxt ++;
+
+		if (cm_node->state == NES_CM_STATE_LISTENING) {
+			cm_node->state = NES_CM_STATE_SYN_RCVD;
+			send_syn(cm_node, 1);
+		}
+		if (cm_node->state == NES_CM_STATE_ONE_SIDE_ESTABLISHED) {
+			cm_node->state = NES_CM_STATE_ESTABLISHED;
+			/* send final handshake ACK */
+			ret = send_ack(cm_node);
+			if (ret < 0)
+				return ret;
+
+				cm_node->state = NES_CM_STATE_MPAREQ_SENT;
+				ret = send_mpa_request(cm_node);
+				if (ret < 0)
+					return ret;
+		}
+	}
+
+	if (tcph->fin) {
+		cm_node->tcp_cntxt.rcv_nxt++;
+		switch (cm_node->state) {
+			case NES_CM_STATE_SYN_RCVD:
+			case NES_CM_STATE_SYN_SENT:
+			case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
+			case NES_CM_STATE_ESTABLISHED:
+			case NES_CM_STATE_ACCEPTING:
+			case NES_CM_STATE_MPAREQ_SENT:
+				cm_node->state = NES_CM_STATE_CLOSE_WAIT;
+				cm_node->state = NES_CM_STATE_LAST_ACK;
+				ret = send_fin(cm_node, NULL);
+				break;
+			case NES_CM_STATE_FIN_WAIT1:
+				cm_node->state = NES_CM_STATE_CLOSING;
+				ret = send_ack(cm_node);
+				break;
+			case NES_CM_STATE_FIN_WAIT2:
+				cm_node->state = NES_CM_STATE_TIME_WAIT;
+				cm_node->tcp_cntxt.loc_seq_num ++;
+				ret = send_ack(cm_node);
+				/* need to schedule this to happen in 2MSL timeouts */
+				cm_node->state = NES_CM_STATE_CLOSED;
+				break;
+			case NES_CM_STATE_CLOSE_WAIT:
+			case NES_CM_STATE_LAST_ACK:
+			case NES_CM_STATE_CLOSING:
+			case NES_CM_STATE_TSA:
+			default:
+				nes_debug(NES_DBG_CM, "Received a fin while in %x state\n",
+						cm_node->state);
+				ret = -EINVAL;
+				break;
+		}
+	}
+
+	if (datasize) {
+		u8 *dataloc = skb->data;
+		/* figure out what state we are in and handle transition to next state */
+		switch (cm_node->state) {
+			case NES_CM_STATE_LISTENING:
+			case NES_CM_STATE_SYN_RCVD:
+			case NES_CM_STATE_SYN_SENT:
+			case NES_CM_STATE_FIN_WAIT1:
+			case NES_CM_STATE_FIN_WAIT2:
+			case NES_CM_STATE_CLOSE_WAIT:
+			case NES_CM_STATE_LAST_ACK:
+			case NES_CM_STATE_CLOSING:
+				break;
+			case  NES_CM_STATE_MPAREQ_SENT:
+				/* recv the mpa res frame, ret=frame len (incl priv data) */
+				ret = parse_mpa(cm_node, dataloc, datasize);
+				if (ret < 0)
+					break;
+				/* set the req frame payload len in skb */
+				/* we are done handling this state, set node to a TSA state */
+				cm_node->state = NES_CM_STATE_TSA;
+				send_ack(cm_node);
+				create_event(cm_node, NES_CM_EVENT_CONNECTED);
+				break;
+
+			case  NES_CM_STATE_ESTABLISHED:
+				/* we are expecting an MPA req frame */
+				ret = parse_mpa(cm_node, dataloc, datasize);
+				if (ret < 0) {
+					break;
+				}
+				cm_node->state = NES_CM_STATE_TSA;
+				send_ack(cm_node);
+				/* we got a valid MPA request, create an event */
+				create_event(cm_node, NES_CM_EVENT_MPA_REQ);
+				break;
+			case  NES_CM_STATE_TSA:
+				handle_exception_pkt(cm_node, skb);
+				break;
+			case NES_CM_STATE_UNKNOWN:
+			case NES_CM_STATE_INITED:
+			default:
+				ret = -1;
+		}
+	}
+
+	return ret;
+}
+
+
+/**
+ * mini_cm_listen - create a listen node with params
+ */
+static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
+		struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
+{
+	struct nes_cm_listener *listener;
+	unsigned long flags;
+
+	nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
+		cm_info->loc_addr, cm_info->loc_port);
+
+	/* cannot have multiple matching listeners */
+	listener = find_listener(cm_core, htonl(cm_info->loc_addr),
+			htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
+	if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
+		/* find automatically incs ref count ??? */
+		atomic_dec(&listener->ref_count);
+		nes_debug(NES_DBG_CM, "Not creating listener since it already exists\n");
+		return NULL;
+	}
+
+	if (!listener) {
+		/* create a CM listen node (1/2 node to compare incoming traffic to) */
+		listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
+		if (!listener) {
+			nes_debug(NES_DBG_CM, "Not creating listener memory allocation failed\n");
+			return NULL;
+		}
+
+		memset(listener, 0, sizeof(struct nes_cm_listener));
+		listener->loc_addr = htonl(cm_info->loc_addr);
+		listener->loc_port = htons(cm_info->loc_port);
+		listener->reused_node = 0;
+
+		atomic_set(&listener->ref_count, 1);
+	}
+	/* pasive case */
+	/* find already inc'ed the ref count */
+	else {
+		listener->reused_node = 1;
+	}
+
+	listener->cm_id = cm_info->cm_id;
+	atomic_set(&listener->pend_accepts_cnt, 0);
+	listener->cm_core = cm_core;
+	listener->nesvnic = nesvnic;
+	atomic_inc(&cm_core->node_cnt);
+	atomic_inc(&cm_core->session_id);
+
+	listener->session_id = (u32)(atomic_read(&cm_core->session_id) + current->tgid);
+	listener->conn_type = cm_info->conn_type;
+	listener->backlog = cm_info->backlog;
+	listener->listener_state = NES_CM_LISTENER_ACTIVE_STATE;
+
+	if (!listener->reused_node) {
+		spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+		list_add(&listener->list, &cm_core->listen_list.list);
+		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+		atomic_inc(&cm_core->listen_node_cnt);
+	}
+
+	nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x,"
+			" listener = %p, backlog = %d, cm_id = %p.\n",
+			cm_info->loc_addr, cm_info->loc_port,
+			listener, listener->backlog, listener->cm_id);
+
+	return listener;
+}
+
+
+/**
+ * mini_cm_connect - make a connection node with params
+ */
+struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
+		struct nes_vnic *nesvnic, struct ietf_mpa_frame *mpa_frame,
+		struct nes_cm_info *cm_info)
+{
+	int ret = 0;
+	struct nes_cm_node *cm_node;
+	struct nes_cm_listener *loopbackremotelistener;
+	struct nes_cm_node *loopbackremotenode;
+	struct nes_cm_info loopback_cm_info;
+
+	u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) +
+			ntohs(mpa_frame->priv_data_len);
+
+	cm_info->loc_addr = htonl(cm_info->loc_addr);
+	cm_info->rem_addr = htonl(cm_info->rem_addr);
+	cm_info->loc_port = htons(cm_info->loc_port);
+	cm_info->rem_port = htons(cm_info->rem_port);
+
+	/* create a CM connection node */
+	cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL);
+	if (!cm_node)
+		return NULL;
+
+	// set our node side to client (active) side
+	cm_node->tcp_cntxt.client = 1;
+	cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
+
+	if (cm_info->loc_addr == cm_info->rem_addr) {
+		loopbackremotelistener = find_listener(cm_core, cm_node->rem_addr,
+				cm_node->rem_port, NES_CM_LISTENER_ACTIVE_STATE);
+		if (loopbackremotelistener == NULL) {
+			create_event(cm_node, NES_CM_EVENT_ABORTED);
+		} else {
+			atomic_inc(&cm_loopbacks);
+			loopback_cm_info = *cm_info;
+			loopback_cm_info.loc_port = cm_info->rem_port;
+			loopback_cm_info.rem_port = cm_info->loc_port;
+			loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
+			loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info,
+					loopbackremotelistener);
+			loopbackremotenode->loopbackpartner = cm_node;
+			loopbackremotenode->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
+			cm_node->loopbackpartner = loopbackremotenode;
+			memcpy(loopbackremotenode->mpa_frame_buf, &mpa_frame->priv_data,
+					mpa_frame_size);
+			loopbackremotenode->mpa_frame_size = mpa_frame_size -
+					sizeof(struct ietf_mpa_frame);
+
+			// we are done handling this state, set node to a TSA state
+			cm_node->state = NES_CM_STATE_TSA;
+			cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num;
+			loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num;
+			cm_node->tcp_cntxt.max_snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd;
+			loopbackremotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
+			cm_node->tcp_cntxt.snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd;
+			loopbackremotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
+			cm_node->tcp_cntxt.snd_wscale = loopbackremotenode->tcp_cntxt.rcv_wscale;
+			loopbackremotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale;
+
+			create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ);
+		}
+		return cm_node;
+	}
+
+	/* set our node side to client (active) side */
+	cm_node->tcp_cntxt.client = 1;
+	/* init our MPA frame ptr */
+	memcpy(&cm_node->mpa_frame, mpa_frame, mpa_frame_size);
+	cm_node->mpa_frame_size = mpa_frame_size;
+
+	/* send a syn and goto syn sent state */
+	cm_node->state = NES_CM_STATE_SYN_SENT;
+	ret = send_syn(cm_node, 0);
+
+	nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X, port=0x%04x,"
+			" cm_node=%p, cm_id = %p.\n",
+			cm_node->rem_addr, cm_node->rem_port, cm_node, cm_node->cm_id);
+
+	return cm_node;
+}
+
+
+/**
+ * mini_cm_accept - accept a connection
+ * This function is never called
+ */
+int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame,
+		struct nes_cm_node *cm_node)
+{
+	return 0;
+}
+
+
+/**
+ * mini_cm_reject - reject and teardown a connection
+ */
+int mini_cm_reject(struct nes_cm_core *cm_core,
+		struct ietf_mpa_frame *mpa_frame,
+		struct nes_cm_node *cm_node)
+{
+	int ret = 0;
+	struct sk_buff *skb;
+	u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) +
+			ntohs(mpa_frame->priv_data_len);
+
+	skb = get_free_pkt(cm_node);
+	if (!skb) {
+		nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+		return -1;
+	}
+
+	/* send an MPA Request frame */
+	form_cm_frame(skb, cm_node, NULL, 0, mpa_frame, mpa_frame_size, SET_ACK | SET_FIN);
+	ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
+
+	cm_node->state = NES_CM_STATE_CLOSED;
+	ret = send_fin(cm_node, NULL);
+
+	if (ret < 0) {
+		printk(KERN_INFO PFX "failed to send MPA Reply (reject)\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+
+/**
+ * mini_cm_close
+ */
+int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
+{
+	int ret = 0;
+
+	if (!cm_core || !cm_node)
+		return -EINVAL;
+
+	switch (cm_node->state) {
+		/* if passed in node is null, create a reference key node for node search */
+		/* check if we found an owner node for this pkt */
+		case NES_CM_STATE_SYN_RCVD:
+		case NES_CM_STATE_SYN_SENT:
+		case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
+		case NES_CM_STATE_ESTABLISHED:
+		case NES_CM_STATE_ACCEPTING:
+		case NES_CM_STATE_MPAREQ_SENT:
+			cm_node->state = NES_CM_STATE_FIN_WAIT1;
+			send_fin(cm_node, NULL);
+			break;
+		case NES_CM_STATE_CLOSE_WAIT:
+			cm_node->state = NES_CM_STATE_LAST_ACK;
+			send_fin(cm_node, NULL);
+			break;
+		case NES_CM_STATE_FIN_WAIT1:
+		case NES_CM_STATE_FIN_WAIT2:
+		case NES_CM_STATE_LAST_ACK:
+		case NES_CM_STATE_TIME_WAIT:
+		case NES_CM_STATE_CLOSING:
+			ret = -1;
+			break;
+		case NES_CM_STATE_LISTENING:
+		case NES_CM_STATE_UNKNOWN:
+		case NES_CM_STATE_INITED:
+		case NES_CM_STATE_CLOSED:
+		case NES_CM_STATE_TSA:
+			ret = rem_ref_cm_node(cm_core, cm_node);
+			break;
+	}
+	cm_node->cm_id = NULL;
+	return ret;
+}
+
+
+/**
+ * recv_pkt - recv an ETHERNET packet, and process it through CM
+ * node state machine
+ */
+int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic,
+		struct sk_buff *skb)
+{
+	struct nes_cm_node *cm_node = NULL;
+	struct nes_cm_listener *listener = NULL;
+	struct iphdr *iph;
+	struct tcphdr *tcph;
+	struct nes_cm_info nfo;
+	int ret = 0;
+
+	if (!skb || skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	iph = (struct iphdr *)skb->data;
+	tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
+	skb_reset_network_header(skb);
+	skb_set_transport_header(skb, sizeof(*tcph));
+	skb->len = ntohs(iph->tot_len);
+
+	nfo.loc_addr = ntohl(iph->daddr);
+	nfo.loc_port = ntohs(tcph->dest);
+	nfo.rem_addr = ntohl(iph->saddr);
+	nfo.rem_port = ntohs(tcph->source);
+
+	nes_debug(NES_DBG_CM, "Received packet: dest=0x%08X:0x%04X src=0x%08X:0x%04X\n",
+			iph->daddr, tcph->dest, iph->saddr, tcph->source);
+
+	/* note: this call is going to increment cm_node ref count */
+	cm_node = find_node(cm_core,
+			nfo.rem_port, nfo.rem_addr,
+			nfo.loc_port, nfo.loc_addr);
+
+	if (!cm_node) {
+		listener = find_listener(cm_core, nfo.loc_addr, nfo.loc_port,
+				NES_CM_LISTENER_ACTIVE_STATE);
+		if (listener) {
+			nfo.cm_id = listener->cm_id;
+			nfo.conn_type = listener->conn_type;
+		} else {
+			nfo.cm_id = NULL;
+			nfo.conn_type = 0;
+		}
+
+		cm_node = make_cm_node(cm_core, nesvnic, &nfo, listener);
+		if (!cm_node) {
+			nes_debug(NES_DBG_CM, "Unable to allocate node\n");
+			if (listener) {
+				nes_debug(NES_DBG_CM, "unable to allocate node and decrementing listener refcount\n");
+				atomic_dec(&listener->ref_count);
+			}
+			ret = -1;
+			goto out;
+		}
+		if (!listener) {
+			nes_debug(NES_DBG_CM, "Packet found for unknown port %x refcnt=%d\n",
+					nfo.loc_port, atomic_read(&cm_node->ref_count));
+			if (!tcph->rst) {
+				nes_debug(NES_DBG_CM, "Packet found for unknown port=%d"
+						" rem_port=%d refcnt=%d\n",
+						nfo.loc_port, nfo.rem_port, atomic_read(&cm_node->ref_count));
+
+				cm_node->tcp_cntxt.rcv_nxt = ntohl(tcph->seq);
+				cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+				send_reset(cm_node);
+			}
+			rem_ref_cm_node(cm_core, cm_node);
+			ret = -1;
+			goto out;
+		}
+		add_ref_cm_node(cm_node);
+		cm_node->state = NES_CM_STATE_LISTENING;
+	}
+
+	nes_debug(NES_DBG_CM, "Processing Packet for node %p, data = (%p):\n",
+			cm_node, skb->data);
+	process_packet(cm_node, skb, cm_core);
+
+	rem_ref_cm_node(cm_core, cm_node);
+	out:
+	if (skb)
+		dev_kfree_skb_any(skb);
+	return ret;
+}
+
+
+/**
+ * nes_cm_alloc_core - allocate a top level instance of a cm core
+ */
+struct nes_cm_core *nes_cm_alloc_core(void)
+{
+	int i;
+
+	struct nes_cm_core *cm_core;
+	struct sk_buff *skb = NULL;
+
+	/* setup the CM core */
+	/* alloc top level core control structure */
+	cm_core = kzalloc(sizeof(*cm_core), GFP_KERNEL);
+	if (!cm_core)
+		return NULL;
+
+	INIT_LIST_HEAD(&cm_core->connected_nodes);
+	init_timer(&cm_core->tcp_timer);
+	cm_core->tcp_timer.function = nes_cm_timer_tick;
+
+	cm_core->mtu   = NES_CM_DEFAULT_MTU;
+	cm_core->state = NES_CM_STATE_INITED;
+	cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS;
+
+	atomic_set(&cm_core->session_id, 0);
+	atomic_set(&cm_core->events_posted, 0);
+
+	/* init the packet lists */
+	skb_queue_head_init(&cm_core->tx_free_list);
+
+	for (i = 0; i < NES_CM_DEFAULT_FRAME_CNT; i++) {
+		skb = dev_alloc_skb(cm_core->mtu);
+		if (!skb) {
+			kfree(cm_core);
+			return NULL;
+		}
+		/* add 'raw' skb to free frame list */
+		skb_queue_head(&cm_core->tx_free_list, skb);
+	}
+
+	cm_core->api = &nes_cm_api;
+
+	spin_lock_init(&cm_core->ht_lock);
+	spin_lock_init(&cm_core->listen_list_lock);
+
+	INIT_LIST_HEAD(&cm_core->listen_list.list);
+
+	nes_debug(NES_DBG_CM, "Init CM Core completed -- cm_core=%p\n", cm_core);
+
+	nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n");
+	cm_core->event_wq = create_singlethread_workqueue("nesewq");
+	cm_core->post_event = nes_cm_post_event;
+	nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n");
+	cm_core->disconn_wq = create_singlethread_workqueue("nesdwq");
+
+	print_core(cm_core);
+	return cm_core;
+}
+
+
+/**
+ * mini_cm_dealloc_core - deallocate a top level instance of a cm core
+ */
+int mini_cm_dealloc_core(struct nes_cm_core *cm_core)
+{
+	nes_debug(NES_DBG_CM, "De-Alloc CM Core (%p)\n", cm_core);
+
+	if (!cm_core)
+		return -EINVAL;
+
+	barrier();
+
+	if (timer_pending(&cm_core->tcp_timer)) {
+		del_timer(&cm_core->tcp_timer);
+	}
+
+	destroy_workqueue(cm_core->event_wq);
+	destroy_workqueue(cm_core->disconn_wq);
+	nes_debug(NES_DBG_CM, "\n");
+	kfree(cm_core);
+
+	return 0;
+}
+
+
+/**
+ * mini_cm_get
+ */
+int mini_cm_get(struct nes_cm_core *cm_core)
+{
+	return cm_core->state;
+}
+
+
+/**
+ * mini_cm_set
+ */
+int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value)
+{
+	int ret = 0;
+
+	switch (type) {
+		case NES_CM_SET_PKT_SIZE:
+			cm_core->mtu = value;
+			break;
+		case NES_CM_SET_FREE_PKT_Q_SIZE:
+			cm_core->free_tx_pkt_max = value;
+			break;
+		default:
+			/* unknown set option */
+			ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+
+/**
+ * nes_cm_init_tsa_conn setup HW; MPA frames must be
+ * successfully exchanged when this is called
+ */
+static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_node)
+{
+	int ret = 0;
+
+	if (!nesqp)
+		return -EINVAL;
+
+	nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 |
+			NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
+			NES_QPCONTEXT_MISC_DROS);
+
+	if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale)
+		nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE);
+
+	nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT);
+
+	nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16);
+
+	nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32(
+			(u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
+
+	nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
+			(cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
+			NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
+
+	nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
+			(cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
+			NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
+
+	nesqp->nesqp_context->keepalive = cpu_to_le32(0x80);
+	nesqp->nesqp_context->ts_recent = 0;
+	nesqp->nesqp_context->ts_age = 0;
+	nesqp->nesqp_context->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+	nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
+	nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
+	nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
+			cm_node->tcp_cntxt.rcv_wscale);
+	nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+	nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+	nesqp->nesqp_context->srtt = 0;
+	nesqp->nesqp_context->rttvar = cpu_to_le32(0x6);
+	nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000);
+	nesqp->nesqp_context->cwnd = cpu_to_le32(2*cm_node->tcp_cntxt.mss);
+	nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
+	nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+	nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
+
+	nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X,"
+			" Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
+			nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
+			le32_to_cpu(nesqp->nesqp_context->snd_nxt),
+			cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
+			le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
+			le32_to_cpu(nesqp->nesqp_context->misc));
+	nes_debug(NES_DBG_CM, "  snd_wnd  = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd));
+	nes_debug(NES_DBG_CM, "  snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd));
+	nes_debug(NES_DBG_CM, "  max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd));
+
+	nes_debug(NES_DBG_CM, "Change cm_node state to TSA\n");
+	cm_node->state = NES_CM_STATE_TSA;
+
+	return ret;
+}
+
+
+/**
+ * nes_cm_disconn
+ */
+int nes_cm_disconn(struct nes_qp *nesqp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&nesqp->lock, flags);
+	if (nesqp->disconn_pending == 0) {
+		nesqp->disconn_pending++;
+		spin_unlock_irqrestore(&nesqp->lock, flags);
+		/* nes_add_ref(&nesqp->ibqp); */
+		/* init our disconnect work element, to */
+		INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker);
+
+		queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work);
+	} else {
+		spin_unlock_irqrestore(&nesqp->lock, flags);
+		nes_rem_ref(&nesqp->ibqp);
+	}
+
+	return 0;
+}
+
+
+/**
+ * nes_disconnect_worker
+ */
+void nes_disconnect_worker(struct work_struct *work)
+{
+	struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work);
+
+	nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
+			nesqp->last_aeq, nesqp->hwqp.qp_id);
+	nes_cm_disconn_true(nesqp);
+}
+
+
+/**
+ * nes_cm_disconn_true
+ */
+int nes_cm_disconn_true(struct nes_qp *nesqp)
+{
+	unsigned long flags;
+	int ret = 0;
+	struct iw_cm_id *cm_id;
+	struct iw_cm_event cm_event;
+	struct nes_vnic *nesvnic;
+	u16 last_ae;
+	u8 original_hw_tcp_state;
+	u8 original_ibqp_state;
+	u8 issued_disconnect_reset = 0;
+
+	if (!nesqp) {
+		nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n");
+		return -1;
+	}
+
+	spin_lock_irqsave(&nesqp->lock, flags);
+	cm_id = nesqp->cm_id;
+	/* make sure we havent already closed this connection */
+	if (!cm_id) {
+		nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n",
+				nesqp->hwqp.qp_id);
+		spin_unlock_irqrestore(&nesqp->lock, flags);
+		nes_rem_ref(&nesqp->ibqp);
+		return -1;
+	}
+
+	nesvnic = to_nesvnic(nesqp->ibqp.device);
+	nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id);
+
+	original_hw_tcp_state = nesqp->hw_tcp_state;
+	original_ibqp_state   = nesqp->ibqp_state;
+	last_ae = nesqp->last_aeq;
+
+
+	nes_debug(NES_DBG_CM, "set ibqp_state=%u\n", nesqp->ibqp_state);
+
+	if ((nesqp->cm_id) && (cm_id->event_handler)) {
+		if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+				((original_ibqp_state == IB_QPS_RTS) &&
+				(last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+			atomic_inc(&cm_disconnects);
+			cm_event.event = IW_CM_EVENT_DISCONNECT;
+			if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) {
+				issued_disconnect_reset = 1;
+				cm_event.status = IW_CM_EVENT_STATUS_RESET;
+				nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event (status reset) for "
+						" QP%u, cm_id = %p. \n",
+						nesqp->hwqp.qp_id, cm_id);
+			} else {
+				cm_event.status = IW_CM_EVENT_STATUS_OK;
+			}
+
+			cm_event.local_addr = cm_id->local_addr;
+			cm_event.remote_addr = cm_id->remote_addr;
+			cm_event.private_data = NULL;
+			cm_event.private_data_len = 0;
+
+			nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event for "
+					" QP%u, SQ Head = %u, SQ Tail = %u. cm_id = %p, refcount = %u.\n",
+					nesqp->hwqp.qp_id,
+					nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail, cm_id,
+					atomic_read(&nesqp->refcount));
+
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			ret = cm_id->event_handler(cm_id, &cm_event);
+			if (ret)
+				nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
+			spin_lock_irqsave(&nesqp->lock, flags);
+		}
+
+		nesqp->disconn_pending = 0;
+		/* There might have been another AE while the lock was released */
+		original_hw_tcp_state = nesqp->hw_tcp_state;
+		original_ibqp_state   = nesqp->ibqp_state;
+		last_ae = nesqp->last_aeq;
+
+		if ((issued_disconnect_reset == 0) && (nesqp->cm_id) &&
+				((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
+				 (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
+				 (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
+				 (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+			atomic_inc(&cm_closes);
+			nesqp->cm_id = NULL;
+			nesqp->in_disconnect = 0;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			nes_disconnect(nesqp, 1);
+
+			cm_id->provider_data = nesqp;
+			/* Send up the close complete event */
+			cm_event.event = IW_CM_EVENT_CLOSE;
+			cm_event.status = IW_CM_EVENT_STATUS_OK;
+			cm_event.provider_data = cm_id->provider_data;
+			cm_event.local_addr = cm_id->local_addr;
+			cm_event.remote_addr = cm_id->remote_addr;
+			cm_event.private_data = NULL;
+			cm_event.private_data_len = 0;
+
+			ret = cm_id->event_handler(cm_id, &cm_event);
+			if (ret) {
+				nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
+			}
+
+			cm_id->rem_ref(cm_id);
+
+			spin_lock_irqsave(&nesqp->lock, flags);
+			if (nesqp->flush_issued == 0) {
+				nesqp->flush_issued = 1;
+				spin_unlock_irqrestore(&nesqp->lock, flags);
+				flush_wqes(nesvnic->nesdev, nesqp, NES_CQP_FLUSH_RQ, 1);
+			} else {
+				spin_unlock_irqrestore(&nesqp->lock, flags);
+			}
+
+			/* This reference is from either ModifyQP or the AE processing,
+					there is still a race here with modifyqp */
+			nes_rem_ref(&nesqp->ibqp);
+
+		} else {
+			cm_id = nesqp->cm_id;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			/* check to see if the inbound reset beat the outbound reset */
+			if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) {
+				nes_debug(NES_DBG_CM, "QP%u: Decing refcount due to inbound reset"
+						" beating the outbound reset.\n",
+						nesqp->hwqp.qp_id);
+				nes_rem_ref(&nesqp->ibqp);
+			}
+		}
+	} else {
+		nesqp->disconn_pending = 0;
+		spin_unlock_irqrestore(&nesqp->lock, flags);
+	}
+	nes_rem_ref(&nesqp->ibqp);
+
+	return 0;
+}
+
+
+/**
+ * nes_disconnect
+ */
+int nes_disconnect(struct nes_qp *nesqp, int abrupt)
+{
+	int ret = 0;
+	struct nes_vnic *nesvnic;
+	struct nes_device *nesdev;
+
+	nesvnic = to_nesvnic(nesqp->ibqp.device);
+	if (!nesvnic)
+		return -EINVAL;
+
+	nesdev = nesvnic->nesdev;
+
+	nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
+			atomic_read(&nesvnic->netdev->refcnt));
+
+	if (nesqp->active_conn) {
+
+		/* indicate this connection is NOT active */
+		nesqp->active_conn = 0;
+	} else {
+		/* Need to free the Last Streaming Mode Message */
+		if (nesqp->ietf_frame) {
+			pci_free_consistent(nesdev->pcidev,
+					nesqp->private_data_len+sizeof(struct ietf_mpa_frame),
+					nesqp->ietf_frame, nesqp->ietf_frame_pbase);
+		}
+	}
+
+	/* close the CM node down if it is still active */
+	if (nesqp->cm_node) {
+		nes_debug(NES_DBG_CM, "Call close API\n");
+
+		g_cm_core->api->close(g_cm_core, nesqp->cm_node);
+		nesqp->cm_node = NULL;
+	}
+
+	return ret;
+}
+
+
+/**
+ * nes_accept
+ */
+int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+	u64 u64temp;
+	struct ib_qp *ibqp;
+	struct nes_qp *nesqp;
+	struct nes_vnic *nesvnic;
+	struct nes_device *nesdev;
+	struct nes_cm_node *cm_node;
+	struct nes_adapter *adapter;
+	struct ib_qp_attr attr;
+	struct iw_cm_event cm_event;
+	struct nes_hw_qp_wqe *wqe;
+	struct nes_v4_quad nes_quad;
+	int ret;
+
+	ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+
+	/* get all our handles */
+	nesqp = to_nesqp(ibqp);
+	nesvnic = to_nesvnic(nesqp->ibqp.device);
+	nesdev = nesvnic->nesdev;
+	adapter = nesdev->nesadapter;
+
+	nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n",
+			nesvnic, nesvnic->netdev, nesvnic->netdev->name);
+
+	/* since this is from a listen, we were able to put node handle into cm_id */
+	cm_node = (struct nes_cm_node *)cm_id->provider_data;
+
+	/* associate the node with the QP */
+	nesqp->cm_node = (void *)cm_node;
+
+	nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu\n",
+			nesqp->hwqp.qp_id, cm_node, jiffies);
+	atomic_inc(&cm_accepts);
+
+	nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
+			atomic_read(&nesvnic->netdev->refcnt));
+
+		/* allocate the ietf frame and space for private data */
+		nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
+				sizeof(struct ietf_mpa_frame) + conn_param->private_data_len,
+				&nesqp->ietf_frame_pbase);
+
+		if (!nesqp->ietf_frame) {
+			nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n");
+			return -ENOMEM;
+		}
+
+
+		/* setup the MPA frame */
+		nesqp->private_data_len = conn_param->private_data_len;
+		memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+
+		memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
+				conn_param->private_data_len);
+
+		nesqp->ietf_frame->priv_data_len = cpu_to_be16(conn_param->private_data_len);
+		nesqp->ietf_frame->rev = mpa_version;
+		nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
+
+		/* setup our first outgoing iWarp send WQE (the IETF frame response) */
+		wqe = &nesqp->hwqp.sq_vbase[0];
+
+		if (cm_id->remote_addr.sin_addr.s_addr != cm_id->local_addr.sin_addr.s_addr) {
+			u64temp = (unsigned long)nesqp;
+			u64temp |= NES_SW_CONTEXT_ALIGN>>1;
+			set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
+					    u64temp);
+			wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+					cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | NES_IWARP_SQ_WQE_WRPDU);
+			wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
+					cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame));
+			wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] =
+					cpu_to_le32((u32)nesqp->ietf_frame_pbase);
+			wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] =
+					cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32));
+			wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
+					cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame));
+			wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+
+			nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
+					NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | NES_QPCONTEXT_ORDIRD_WRPDU);
+		} else {
+			nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+					NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM));
+		}
+		nesqp->skip_lsmm = 1;
+
+
+	/* Cache the cm_id in the qp */
+	nesqp->cm_id = cm_id;
+	cm_node->cm_id = cm_id;
+
+	/*  nesqp->cm_node = (void *)cm_id->provider_data; */
+	cm_id->provider_data = nesqp;
+	nesqp->active_conn   = 0;
+
+	nes_cm_init_tsa_conn(nesqp, cm_node);
+
+	nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
+	nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
+	nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+
+	nesqp->nesqp_context->misc2 |= cpu_to_le32(
+			(u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
+
+	nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32(
+			nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
+			NES_ARP_RESOLVE) << 16);
+
+	nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
+			jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
+
+	nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
+
+	nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
+			((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
+	nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
+
+	memset(&nes_quad, 0, sizeof(nes_quad));
+	nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
+	nes_quad.SrcIpadr      = cm_id->remote_addr.sin_addr.s_addr;
+	nes_quad.TcpPorts[0]   = cm_id->remote_addr.sin_port;
+	nes_quad.TcpPorts[1]   = cm_id->local_addr.sin_port;
+
+	/* Produce hash key */
+	nesqp->hte_index = cpu_to_be32(
+			crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff);
+	nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n",
+			nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
+
+	nesqp->hte_index &= adapter->hte_index_mask;
+	nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
+
+	cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
+
+	nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X,"
+			" rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + private data length=%zu.\n",
+			nesqp->hwqp.qp_id,
+			ntohl(cm_id->remote_addr.sin_addr.s_addr),
+			ntohs(cm_id->remote_addr.sin_port),
+			ntohl(cm_id->local_addr.sin_addr.s_addr),
+			ntohs(cm_id->local_addr.sin_port),
+			le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
+			le32_to_cpu(nesqp->nesqp_context->snd_nxt),
+			conn_param->private_data_len+sizeof(struct ietf_mpa_frame));
+
+	attr.qp_state = IB_QPS_RTS;
+	nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
+
+	/* notify OF layer that accept event was successfull */
+	cm_id->add_ref(cm_id);
+
+	cm_event.event = IW_CM_EVENT_ESTABLISHED;
+	cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED;
+	cm_event.provider_data = (void *)nesqp;
+	cm_event.local_addr = cm_id->local_addr;
+	cm_event.remote_addr = cm_id->remote_addr;
+	cm_event.private_data = NULL;
+	cm_event.private_data_len = 0;
+	ret = cm_id->event_handler(cm_id, &cm_event);
+	if (cm_node->loopbackpartner) {
+		cm_node->loopbackpartner->mpa_frame_size = nesqp->private_data_len;
+		/* copy entire MPA frame to our cm_node's frame */
+		memcpy(cm_node->loopbackpartner->mpa_frame_buf, nesqp->ietf_frame->priv_data,
+			   nesqp->private_data_len);
+		create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED);
+	}
+	if (ret)
+		printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
+				__FUNCTION__, __LINE__, ret);
+
+	return 0;
+}
+
+
+/**
+ * nes_reject
+ */
+int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	struct nes_cm_node *cm_node;
+	struct nes_cm_core *cm_core;
+
+	atomic_inc(&cm_rejects);
+	cm_node = (struct nes_cm_node *) cm_id->provider_data;
+	cm_core = cm_node->cm_core;
+	cm_node->mpa_frame_size = sizeof(struct ietf_mpa_frame) + pdata_len;
+
+	strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP);
+	memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len);
+
+	cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len);
+	cm_node->mpa_frame.rev = mpa_version;
+	cm_node->mpa_frame.flags = IETF_MPA_FLAGS_CRC | IETF_MPA_FLAGS_REJECT;
+
+	cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node);
+
+	return 0;
+}
+
+
+/**
+ * nes_connect
+ * setup and launch cm connect node
+ */
+int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+	struct ib_qp *ibqp;
+	struct nes_qp *nesqp;
+	struct nes_vnic *nesvnic;
+	struct nes_device *nesdev;
+	struct nes_cm_node *cm_node;
+	struct nes_cm_info cm_info;
+
+	ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	nesqp = to_nesqp(ibqp);
+	if (!nesqp)
+		return -EINVAL;
+	nesvnic = to_nesvnic(nesqp->ibqp.device);
+	if (!nesvnic)
+		return -EINVAL;
+	nesdev  = nesvnic->nesdev;
+	if (!nesdev)
+		return -EINVAL;
+
+	atomic_inc(&cm_connects);
+
+	nesqp->ietf_frame = kzalloc(sizeof(struct ietf_mpa_frame) +
+			conn_param->private_data_len, GFP_KERNEL);
+	if (!nesqp->ietf_frame)
+		return -ENOMEM;
+
+	/* set qp as having an active connection */
+	nesqp->active_conn = 1;
+
+	nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X.\n",
+			nesqp->hwqp.qp_id,
+			ntohl(cm_id->remote_addr.sin_addr.s_addr),
+			ntohs(cm_id->remote_addr.sin_port),
+			ntohl(cm_id->local_addr.sin_addr.s_addr),
+			ntohs(cm_id->local_addr.sin_port));
+
+	/* cache the cm_id in the qp */
+	nesqp->cm_id = cm_id;
+
+	cm_id->provider_data = nesqp;
+
+	/* copy the private data */
+	if (conn_param->private_data_len) {
+		memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
+				conn_param->private_data_len);
+	}
+
+	nesqp->private_data_len = conn_param->private_data_len;
+	nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
+	nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
+	nes_debug(NES_DBG_CM, "mpa private data len =%u\n", conn_param->private_data_len);
+
+	strcpy(&nesqp->ietf_frame->key[0], IEFT_MPA_KEY_REQ);
+	nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
+	nesqp->ietf_frame->rev = IETF_MPA_VERSION;
+	nesqp->ietf_frame->priv_data_len = htons(conn_param->private_data_len);
+
+	if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr)
+		nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
+				PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+
+	/* set up the connection params for the node */
+	cm_info.loc_addr = (cm_id->local_addr.sin_addr.s_addr);
+	cm_info.loc_port = (cm_id->local_addr.sin_port);
+	cm_info.rem_addr = (cm_id->remote_addr.sin_addr.s_addr);
+	cm_info.rem_port = (cm_id->remote_addr.sin_port);
+	cm_info.cm_id = cm_id;
+	cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+
+	cm_id->add_ref(cm_id);
+	nes_add_ref(&nesqp->ibqp);
+
+	/* create a connect CM node connection */
+	cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, nesqp->ietf_frame, &cm_info);
+	if (!cm_node) {
+		if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr)
+			nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
+					PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
+		nes_rem_ref(&nesqp->ibqp);
+		kfree(nesqp->ietf_frame);
+		nesqp->ietf_frame = NULL;
+		cm_id->rem_ref(cm_id);
+		return -ENOMEM;
+	}
+
+	cm_node->apbvt_set = 1;
+	nesqp->cm_node = cm_node;
+
+	return 0;
+}
+
+
+/**
+ * nes_create_listen
+ */
+int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+	struct nes_vnic *nesvnic;
+	struct nes_cm_listener *cm_node;
+	struct nes_cm_info cm_info;
+	struct nes_adapter *adapter;
+	int err;
+
+
+	nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n",
+			cm_id, ntohs(cm_id->local_addr.sin_port));
+
+	nesvnic = to_nesvnic(cm_id->device);
+	if (!nesvnic)
+		return -EINVAL;
+	adapter = nesvnic->nesdev->nesadapter;
+	nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n",
+			nesvnic, nesvnic->netdev, nesvnic->netdev->name);
+
+	nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n",
+			nesvnic->local_ipaddr, cm_id->local_addr.sin_addr.s_addr);
+
+	/* setup listen params in our api call struct */
+	cm_info.loc_addr = nesvnic->local_ipaddr;
+	cm_info.loc_port = cm_id->local_addr.sin_port;
+	cm_info.backlog = backlog;
+	cm_info.cm_id = cm_id;
+
+	cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+
+
+	cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
+	if (!cm_node) {
+		printk("%s[%u] Error returned from listen API call\n",
+				__FUNCTION__, __LINE__);
+		return -ENOMEM;
+	}
+
+	cm_id->provider_data = cm_node;
+
+	if (!cm_node->reused_node) {
+		err = nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
+				PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+		if (err) {
+			printk("nes_manage_apbvt call returned %d.\n", err);
+			g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
+			return err;
+		}
+		cm_listens_created++;
+	}
+
+	cm_id->add_ref(cm_id);
+	cm_id->provider_data = (void *)cm_node;
+
+
+	return 0;
+}
+
+
+/**
+ * nes_destroy_listen
+ */
+int nes_destroy_listen(struct iw_cm_id *cm_id)
+{
+	if (cm_id->provider_data)
+		g_cm_core->api->stop_listener(g_cm_core, cm_id->provider_data);
+	else
+		nes_debug(NES_DBG_CM, "cm_id->provider_data was NULL\n");
+
+	cm_id->rem_ref(cm_id);
+
+	return 0;
+}
+
+
+/**
+ * nes_cm_recv
+ */
+int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
+{
+	cm_packets_received++;
+	if ((g_cm_core) && (g_cm_core->api)) {
+		g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
+	} else {
+		nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
+				" cm is not setup properly.\n");
+	}
+
+	return 0;
+}
+
+
+/**
+ * nes_cm_start
+ * Start and init a cm core module
+ */
+int nes_cm_start(void)
+{
+	nes_debug(NES_DBG_CM, "\n");
+	/* create the primary CM core, pass this handle to subsequent core inits */
+	g_cm_core = nes_cm_alloc_core();
+	if (g_cm_core) {
+		return 0;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+
+/**
+ * nes_cm_stop
+ * stop and dealloc all cm core instances
+ */
+int nes_cm_stop(void)
+{
+	g_cm_core->api->destroy_cm_core(g_cm_core);
+	return 0;
+}
+
+
+/**
+ * cm_event_connected
+ * handle a connected event, setup QPs and HW
+ */
+void cm_event_connected(struct nes_cm_event *event)
+{
+	u64 u64temp;
+	struct nes_qp *nesqp;
+	struct nes_vnic *nesvnic;
+	struct nes_device *nesdev;
+	struct nes_cm_node *cm_node;
+	struct nes_adapter *nesadapter;
+	struct ib_qp_attr attr;
+	struct iw_cm_id *cm_id;
+	struct iw_cm_event cm_event;
+	struct nes_hw_qp_wqe *wqe;
+	struct nes_v4_quad nes_quad;
+	int ret;
+
+	/* get all our handles */
+	cm_node = event->cm_node;
+	cm_id = cm_node->cm_id;
+	nes_debug(NES_DBG_CM, "cm_event_connected - %p - cm_id = %p\n", cm_node, cm_id);
+	nesqp = (struct nes_qp *)cm_id->provider_data;
+	nesvnic = to_nesvnic(nesqp->ibqp.device);
+	nesdev = nesvnic->nesdev;
+	nesadapter = nesdev->nesadapter;
+
+	if (nesqp->destroyed) {
+		return;
+	}
+	atomic_inc(&cm_connecteds);
+	nes_debug(NES_DBG_CM, "QP%u attempting to connect to  0x%08X:0x%04X on"
+			" local port 0x%04X. jiffies = %lu.\n",
+			nesqp->hwqp.qp_id,
+			ntohl(cm_id->remote_addr.sin_addr.s_addr),
+			ntohs(cm_id->remote_addr.sin_port),
+			ntohs(cm_id->local_addr.sin_port),
+			jiffies);
+
+	nes_cm_init_tsa_conn(nesqp, cm_node);
+
+	/* set the QP tsa context */
+	nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
+	nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
+	nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+
+	nesqp->nesqp_context->misc2 |= cpu_to_le32(
+			(u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
+	nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32(
+			nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0),
+			NULL, NES_ARP_RESOLVE) << 16);
+	nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
+			jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
+	nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
+	nesqp->nesqp_context->ird_ord_sizes |=
+			cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
+
+	/* Adjust tail for not having a LSMM */
+	nesqp->hwqp.sq_tail = 1;
+
+#if defined(NES_SEND_FIRST_WRITE)
+		if (cm_node->send_write0) {
+			nes_debug(NES_DBG_CM, "Sending first write.\n");
+			wqe = &nesqp->hwqp.sq_vbase[0];
+			u64temp = (unsigned long)nesqp;
+			u64temp |= NES_SW_CONTEXT_ALIGN>>1;
+			set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
+					    u64temp);
+			wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
+			wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
+			wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
+			wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
+			wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
+			wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+
+			/* use the reserved spot on the WQ for the extra first WQE */
+			nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+					NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM));
+			nesqp->skip_lsmm = 1;
+			nesqp->hwqp.sq_tail = 0;
+			nes_write32(nesdev->regs + NES_WQE_ALLOC,
+					(1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
+		}
+#endif
+
+	memset(&nes_quad, 0, sizeof(nes_quad));
+
+	nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
+	nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
+	nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
+	nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
+
+	/* Produce hash key */
+	nesqp->hte_index = cpu_to_be32(
+			crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff);
+	nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n",
+			nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
+
+	nesqp->hte_index &= nesadapter->hte_index_mask;
+	nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
+
+	nesqp->ietf_frame = &cm_node->mpa_frame;
+	nesqp->private_data_len = (u8) cm_node->mpa_frame_size;
+	cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
+
+	/* modify QP state to rts */
+	attr.qp_state = IB_QPS_RTS;
+	nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
+
+	/* notify OF layer we successfully created the requested connection */
+	cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+	cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED;
+	cm_event.provider_data = cm_id->provider_data;
+	cm_event.local_addr.sin_family = AF_INET;
+	cm_event.local_addr.sin_port = cm_id->local_addr.sin_port;
+	cm_event.remote_addr = cm_id->remote_addr;
+
+		cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
+		cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size;
+
+	cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr;
+	ret = cm_id->event_handler(cm_id, &cm_event);
+	nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
+
+	if (ret)
+		printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
+				__FUNCTION__, __LINE__, ret);
+	nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = %lu\n",
+			nesqp->hwqp.qp_id, jiffies );
+
+	nes_rem_ref(&nesqp->ibqp);
+
+	return;
+}
+
+
+/**
+ * cm_event_connect_error
+ */
+void cm_event_connect_error(struct nes_cm_event *event)
+{
+	struct nes_qp *nesqp;
+	struct iw_cm_id *cm_id;
+	struct iw_cm_event cm_event;
+	/* struct nes_cm_info cm_info; */
+	int ret;
+
+	if (!event->cm_node)
+		return;
+
+	cm_id = event->cm_node->cm_id;
+	if (!cm_id) {
+		return;
+	}
+
+	nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id);
+	nesqp = cm_id->provider_data;
+
+	if (!nesqp) {
+		return;
+	}
+
+	/* notify OF layer about this connection error event */
+	/* cm_id->rem_ref(cm_id); */
+	nesqp->cm_id = NULL;
+	cm_id->provider_data = NULL;
+	cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+	cm_event.status = IW_CM_EVENT_STATUS_REJECTED;
+	cm_event.provider_data = cm_id->provider_data;
+	cm_event.local_addr = cm_id->local_addr;
+	cm_event.remote_addr = cm_id->remote_addr;
+	cm_event.private_data = NULL;
+	cm_event.private_data_len = 0;
+
+	nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remove_addr=%08x\n",
+			cm_event.local_addr.sin_addr.s_addr, cm_event.remote_addr.sin_addr.s_addr);
+
+	ret = cm_id->event_handler(cm_id, &cm_event);
+	nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
+	if (ret)
+		printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
+				__FUNCTION__, __LINE__, ret);
+	nes_rem_ref(&nesqp->ibqp);
+		cm_id->rem_ref(cm_id);
+
+	return;
+}
+
+
+/**
+ * cm_event_reset
+ */
+void cm_event_reset(struct nes_cm_event *event)
+{
+	struct nes_qp *nesqp;
+	struct iw_cm_id *cm_id;
+	struct iw_cm_event cm_event;
+	/* struct nes_cm_info cm_info; */
+	int ret;
+
+	if (!event->cm_node)
+		return;
+
+	if (!event->cm_node->cm_id)
+		return;
+
+	cm_id = event->cm_node->cm_id;
+
+	nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id);
+	nesqp = cm_id->provider_data;
+
+	nesqp->cm_id = NULL;
+	/* cm_id->provider_data = NULL; */
+	cm_event.event = IW_CM_EVENT_DISCONNECT;
+	cm_event.status = IW_CM_EVENT_STATUS_RESET;
+	cm_event.provider_data = cm_id->provider_data;
+	cm_event.local_addr = cm_id->local_addr;
+	cm_event.remote_addr = cm_id->remote_addr;
+	cm_event.private_data = NULL;
+	cm_event.private_data_len = 0;
+
+	ret = cm_id->event_handler(cm_id, &cm_event);
+	nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
+
+
+	/* notify OF layer about this connection error event */
+	cm_id->rem_ref(cm_id);
+
+	return;
+}
+
+
+/**
+ * cm_event_mpa_req
+ */
+void cm_event_mpa_req(struct nes_cm_event *event)
+{
+	struct iw_cm_id   *cm_id;
+	struct iw_cm_event cm_event;
+	int ret;
+	struct nes_cm_node *cm_node;
+
+	cm_node = event->cm_node;
+	if (!cm_node)
+		return;
+	cm_id = cm_node->cm_id;
+
+	atomic_inc(&cm_connect_reqs);
+	nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
+			cm_node, cm_id, jiffies);
+
+	cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
+	cm_event.status = IW_CM_EVENT_STATUS_OK;
+	cm_event.provider_data = (void *)cm_node;
+
+	cm_event.local_addr.sin_family = AF_INET;
+	cm_event.local_addr.sin_port = htons(event->cm_info.loc_port);
+	cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr);
+
+	cm_event.remote_addr.sin_family = AF_INET;
+	cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port);
+	cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
+
+		cm_event.private_data                = cm_node->mpa_frame_buf;
+		cm_event.private_data_len            = (u8) cm_node->mpa_frame_size;
+
+	ret = cm_id->event_handler(cm_id, &cm_event);
+	if (ret)
+		printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
+				__FUNCTION__, __LINE__, ret);
+
+	return;
+}
+
+
+static void nes_cm_event_handler(struct work_struct *);
+
+/**
+ * nes_cm_post_event
+ * post an event to the cm event handler
+ */
+int nes_cm_post_event(struct nes_cm_event *event)
+{
+	atomic_inc(&event->cm_node->cm_core->events_posted);
+	add_ref_cm_node(event->cm_node);
+	event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
+	INIT_WORK(&event->event_work, nes_cm_event_handler);
+	nes_debug(NES_DBG_CM, "queue_work, event=%p\n", event);
+
+	queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
+
+	nes_debug(NES_DBG_CM, "Exit\n");
+	return 0;
+}
+
+
+/**
+ * nes_cm_event_handler
+ * worker function to handle cm events
+ * will free instance of nes_cm_event
+ */
+static void nes_cm_event_handler(struct work_struct *work)
+{
+	struct nes_cm_event *event = container_of(work, struct nes_cm_event, event_work);
+	struct nes_cm_core *cm_core;
+
+	if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) {
+		return;
+	}
+	cm_core = event->cm_node->cm_core;
+	nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n",
+			event, event->type, atomic_read(&cm_core->events_posted));
+
+	switch (event->type) {
+		case NES_CM_EVENT_MPA_REQ:
+			cm_event_mpa_req(event);
+			nes_debug(NES_DBG_CM, "CM Event: MPA REQUEST\n");
+			break;
+		case NES_CM_EVENT_RESET:
+			nes_debug(NES_DBG_CM, "CM Event: RESET\n");
+			cm_event_reset(event);
+			break;
+		case NES_CM_EVENT_CONNECTED:
+			if ((!event->cm_node->cm_id) ||
+				(event->cm_node->state != NES_CM_STATE_TSA)) {
+				break;
+			}
+			cm_event_connected(event);
+			nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
+			break;
+		case NES_CM_EVENT_ABORTED:
+			if ((!event->cm_node->cm_id) || (event->cm_node->state == NES_CM_STATE_TSA)) {
+				break;
+			}
+			cm_event_connect_error(event);
+			nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
+			break;
+		case NES_CM_EVENT_DROPPED_PKT:
+			nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n");
+			break;
+		default:
+			nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n");
+			break;
+	}
+
+	atomic_dec(&cm_core->events_posted);
+	event->cm_info.cm_id->rem_ref(event->cm_info.cm_id);
+	rem_ref_cm_node(cm_core, event->cm_node);
+	kfree(event);
+
+	return;
+}
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
new file mode 100644
index 0000000..a59f0a7
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef NES_CM_H
+#define NES_CM_H
+
+#define QUEUE_EVENTS
+
+#define NES_MANAGE_APBVT_DEL 0
+#define NES_MANAGE_APBVT_ADD 1
+
+/* IETF MPA -- defines, enums, structs */
+#define IEFT_MPA_KEY_REQ  "MPA ID Req Frame"
+#define IEFT_MPA_KEY_REP  "MPA ID Rep Frame"
+#define IETF_MPA_KEY_SIZE 16
+#define IETF_MPA_VERSION  1
+
+enum ietf_mpa_flags {
+	IETF_MPA_FLAGS_MARKERS = 0x80,	/* receive Markers */
+	IETF_MPA_FLAGS_CRC     = 0x40,	/* receive Markers */
+	IETF_MPA_FLAGS_REJECT  = 0x20,	/* Reject */
+};
+
+struct ietf_mpa_frame {
+	u8 key[IETF_MPA_KEY_SIZE];
+	u8 flags;
+	u8 rev;
+	__be16 priv_data_len;
+	u8 priv_data[0];
+};
+
+#define ietf_mpa_req_resp_frame ietf_mpa_frame
+
+struct nes_v4_quad {
+	u32 rsvd0;
+	__le32 DstIpAdrIndex;	/* Only most significant 5 bits are valid */
+	__be32 SrcIpadr;
+	__be16 TcpPorts[2];		/* src is low, dest is high */
+};
+
+struct nes_cm_node;
+enum nes_timer_type {
+	NES_TIMER_TYPE_SEND,
+	NES_TIMER_TYPE_RECV,
+	NES_TIMER_NODE_CLEANUP,
+	NES_TIMER_TYPE_CLOSE,
+};
+
+#define MAX_NES_IFS 4
+
+#define SET_ACK 1
+#define SET_SYN 2
+#define SET_FIN 4
+#define SET_RST 8
+
+struct option_base {
+	u8 optionnum;
+	u8 length;
+};
+
+enum option_numbers {
+	OPTION_NUMBER_END,
+	OPTION_NUMBER_NONE,
+	OPTION_NUMBER_MSS,
+	OPTION_NUMBER_WINDOW_SCALE,
+	OPTION_NUMBER_SACK_PERM,
+	OPTION_NUMBER_SACK,
+	OPTION_NUMBER_WRITE0 = 0xbc
+};
+
+struct option_mss {
+	u8 optionnum;
+	u8 length;
+	__be16 mss;
+};
+
+struct option_windowscale {
+	u8 optionnum;
+	u8 length;
+	u8 shiftcount;
+};
+
+union all_known_options {
+	char as_end;
+	struct option_base as_base;
+	struct option_mss as_mss;
+	struct option_windowscale as_windowscale;
+};
+
+struct nes_timer_entry {
+	struct list_head list;
+	unsigned long timetosend;	/* jiffies */
+	struct sk_buff *skb;
+	u32 type;
+	u32 retrycount;
+	u32 retranscount;
+	u32 context;
+	u32 seq_num;
+	u32 send_retrans;
+	int close_when_complete;
+	struct net_device *netdev;
+};
+
+#define NES_DEFAULT_RETRYS  64
+#define NES_DEFAULT_RETRANS 8
+#ifdef CONFIG_INFINIBAND_NES_DEBUG
+#define NES_RETRY_TIMEOUT   (1000*HZ/1000)
+#else
+#define NES_RETRY_TIMEOUT   (3000*HZ/1000)
+#endif
+#define NES_SHORT_TIME      (10)
+#define NES_LONG_TIME       (2000*HZ/1000)
+
+#define NES_CM_HASHTABLE_SIZE         1024
+#define NES_CM_TCP_TIMER_INTERVAL     3000
+#define NES_CM_DEFAULT_MTU            1540
+#define NES_CM_DEFAULT_FRAME_CNT      10
+#define NES_CM_THREAD_STACK_SIZE      256
+#define NES_CM_DEFAULT_RCV_WND        64240	// before we know that window scaling is allowed
+#define NES_CM_DEFAULT_RCV_WND_SCALED 256960  // after we know that window scaling is allowed
+#define NES_CM_DEFAULT_RCV_WND_SCALE  2
+#define NES_CM_DEFAULT_FREE_PKTS      0x000A
+#define NES_CM_FREE_PKT_LO_WATERMARK  2
+
+#define NES_CM_DEFAULT_MSS   536
+
+#define NES_CM_DEF_SEQ       0x159bf75f
+#define NES_CM_DEF_LOCAL_ID  0x3b47
+
+#define NES_CM_DEF_SEQ2      0x18ed5740
+#define NES_CM_DEF_LOCAL_ID2 0xb807
+
+typedef u32 nes_addr_t;
+
+#define nes_cm_tsa_context nes_qp_context
+
+struct nes_qp;
+
+/* cm node transition states */
+enum nes_cm_node_state {
+	NES_CM_STATE_UNKNOWN,
+	NES_CM_STATE_INITED,
+	NES_CM_STATE_LISTENING,
+	NES_CM_STATE_SYN_RCVD,
+	NES_CM_STATE_SYN_SENT,
+	NES_CM_STATE_ONE_SIDE_ESTABLISHED,
+	NES_CM_STATE_ESTABLISHED,
+	NES_CM_STATE_ACCEPTING,
+	NES_CM_STATE_MPAREQ_SENT,
+	NES_CM_STATE_TSA,
+	NES_CM_STATE_FIN_WAIT1,
+	NES_CM_STATE_FIN_WAIT2,
+	NES_CM_STATE_CLOSE_WAIT,
+	NES_CM_STATE_TIME_WAIT,
+	NES_CM_STATE_LAST_ACK,
+	NES_CM_STATE_CLOSING,
+	NES_CM_STATE_CLOSED
+};
+
+/* type of nes connection */
+enum nes_cm_conn_type {
+	NES_CM_IWARP_CONN_TYPE,
+};
+
+/* CM context params */
+struct nes_cm_tcp_context {
+	u8  client;
+
+	u32 loc_seq_num;
+	u32 loc_ack_num;
+	u32 rem_ack_num;
+	u32 rcv_nxt;
+
+	u32 loc_id;
+	u32 rem_id;
+
+	u32 snd_wnd;
+	u32 max_snd_wnd;
+
+	u32 rcv_wnd;
+	u32 mss;
+	u8  snd_wscale;
+	u8  rcv_wscale;
+
+	struct nes_cm_tsa_context tsa_cntxt;
+	struct timeval            sent_ts;
+};
+
+
+enum nes_cm_listener_state {
+	NES_CM_LISTENER_PASSIVE_STATE=1,
+	NES_CM_LISTENER_ACTIVE_STATE=2,
+	NES_CM_LISTENER_EITHER_STATE=3
+};
+
+struct nes_cm_listener {
+	struct list_head           list;
+	u64                        session_id;
+	struct nes_cm_core         *cm_core;
+	u8                         loc_mac[ETH_ALEN];
+	nes_addr_t                 loc_addr;
+	u16                        loc_port;
+	struct iw_cm_id            *cm_id;
+	enum nes_cm_conn_type      conn_type;
+	atomic_t                   ref_count;
+	struct nes_vnic            *nesvnic;
+	atomic_t                   pend_accepts_cnt;
+	int                        backlog;
+	enum nes_cm_listener_state listener_state;
+	u32                        reused_node;
+};
+
+/* per connection node and node state information */
+struct nes_cm_node {
+	u64                       session_id;
+	u32                       hashkey;
+
+	nes_addr_t                loc_addr, rem_addr;
+	u16                       loc_port, rem_port;
+
+	u8                        loc_mac[ETH_ALEN];
+	u8                        rem_mac[ETH_ALEN];
+
+	enum nes_cm_node_state    state;
+	struct nes_cm_tcp_context tcp_cntxt;
+	struct nes_cm_core        *cm_core;
+	struct sk_buff_head       resend_list;
+	atomic_t                  ref_count;
+	struct net_device         *netdev;
+
+	struct nes_cm_node        *loopbackpartner;
+	struct list_head          retrans_list;
+	spinlock_t                retrans_list_lock;
+	struct list_head          recv_list;
+	spinlock_t                recv_list_lock;
+
+	int                       send_write0;
+	union {
+		struct ietf_mpa_frame mpa_frame;
+		u8                    mpa_frame_buf[NES_CM_DEFAULT_MTU];
+	};
+	u16                       mpa_frame_size;
+	struct iw_cm_id           *cm_id;
+	struct list_head          list;
+	int                       accelerated;
+	struct nes_cm_listener    *listener;
+	enum nes_cm_conn_type     conn_type;
+	struct nes_vnic           *nesvnic;
+	int                       apbvt_set;
+	int                       accept_pend;
+};
+
+/* structure for client or CM to fill when making CM api calls. */
+/*	- only need to set relevant data, based on op. */
+struct nes_cm_info {
+	union {
+		struct iw_cm_id   *cm_id;
+		struct net_device *netdev;
+	};
+
+	u16 loc_port;
+	u16 rem_port;
+	nes_addr_t loc_addr;
+	nes_addr_t rem_addr;
+
+	enum nes_cm_conn_type  conn_type;
+	int backlog;
+};
+
+/* CM event codes */
+enum  nes_cm_event_type {
+	NES_CM_EVENT_UNKNOWN,
+	NES_CM_EVENT_ESTABLISHED,
+	NES_CM_EVENT_MPA_REQ,
+	NES_CM_EVENT_MPA_CONNECT,
+	NES_CM_EVENT_MPA_ACCEPT,
+	NES_CM_EVENT_MPA_ESTABLISHED,
+	NES_CM_EVENT_CONNECTED,
+	NES_CM_EVENT_CLOSED,
+	NES_CM_EVENT_RESET,
+	NES_CM_EVENT_DROPPED_PKT,
+	NES_CM_EVENT_CLOSE_IMMED,
+	NES_CM_EVENT_CLOSE_HARD,
+	NES_CM_EVENT_CLOSE_CLEAN,
+	NES_CM_EVENT_ABORTED,
+	NES_CM_EVENT_SEND_FIRST
+};
+
+/* event to post to CM event handler */
+struct nes_cm_event {
+	enum nes_cm_event_type type;
+
+	struct nes_cm_info cm_info;
+	struct work_struct event_work;
+	struct nes_cm_node *cm_node;
+};
+
+struct nes_cm_core {
+	enum nes_cm_node_state  state;
+	atomic_t                session_id;
+
+	atomic_t                listen_node_cnt;
+	struct nes_cm_node      listen_list;
+	spinlock_t              listen_list_lock;
+
+	u32                     mtu;
+	u32                     free_tx_pkt_max;
+	u32                     rx_pkt_posted;
+	struct sk_buff_head     tx_free_list;
+	atomic_t                ht_node_cnt;
+	struct list_head        connected_nodes;
+	/* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */
+	spinlock_t              ht_lock;
+
+	struct timer_list       tcp_timer;
+
+	struct nes_cm_ops       *api;
+
+	int (*post_event)(struct nes_cm_event *event);
+	atomic_t                events_posted;
+	struct workqueue_struct *event_wq;
+	struct workqueue_struct *disconn_wq;
+
+	atomic_t                node_cnt;
+	u64                     aborted_connects;
+	u32                     options;
+
+	struct nes_cm_node      *current_listen_node;
+};
+
+
+#define NES_CM_SET_PKT_SIZE        (1 << 1)
+#define NES_CM_SET_FREE_PKT_Q_SIZE (1 << 2)
+
+/* CM ops/API for client interface */
+struct nes_cm_ops {
+	int (*accelerated)(struct nes_cm_core *, struct nes_cm_node *);
+	struct nes_cm_listener * (*listen)(struct nes_cm_core *, struct nes_vnic *,
+			struct nes_cm_info *);
+	int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *);
+	struct nes_cm_node * (*connect)(struct nes_cm_core *,
+			struct nes_vnic *, struct ietf_mpa_frame *,
+			struct nes_cm_info *);
+	int (*close)(struct nes_cm_core *, struct nes_cm_node *);
+	int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *,
+			struct nes_cm_node *);
+	int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
+			struct nes_cm_node *);
+	int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
+			struct sk_buff *);
+	int (*destroy_cm_core)(struct nes_cm_core *);
+	int (*get)(struct nes_cm_core *);
+	int (*set)(struct nes_cm_core *, u32, u32);
+};
+
+
+int send_mpa_request(struct nes_cm_node *);
+struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *,
+		void *, u32, void *, u32, u8);
+int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *,
+		enum nes_timer_type, int, int);
+void nes_cm_timer_tick(unsigned long);
+int send_syn(struct nes_cm_node *, u32);
+int send_reset(struct nes_cm_node *);
+int send_ack(struct nes_cm_node *);
+int send_fin(struct nes_cm_node *, struct sk_buff *);
+struct sk_buff *get_free_pkt(struct nes_cm_node *);
+int process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *);
+
+struct nes_cm_node * mini_cm_connect(struct nes_cm_core *,
+		struct nes_vnic *, struct ietf_mpa_frame *, struct nes_cm_info *);
+int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *);
+int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *);
+int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
+int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *);
+struct nes_cm_core *mini_cm_alloc_core(struct nes_cm_info *);
+int mini_cm_dealloc_core(struct nes_cm_core *);
+int mini_cm_get(struct nes_cm_core *);
+int mini_cm_set(struct nes_cm_core *, u32, u32);
+
+int nes_cm_disconn(struct nes_qp *);
+void nes_disconnect_worker(struct work_struct *);
+int nes_cm_disconn_true(struct nes_qp *);
+int nes_disconnect(struct nes_qp *, int);
+
+int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
+int nes_reject(struct iw_cm_id *, const void *, u8);
+int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
+int nes_create_listen(struct iw_cm_id *, int);
+int nes_destroy_listen(struct iw_cm_id *);
+
+int nes_cm_recv(struct sk_buff *, struct net_device *);
+int nes_cm_start(void);
+int nes_cm_stop(void);
+
+/* CM event handler functions */
+void cm_event_connected(struct nes_cm_event *);
+void cm_event_connect_error(struct nes_cm_event *);
+void cm_event_reset(struct nes_cm_event *);
+void cm_event_mpa_req(struct nes_cm_event *);
+int nes_cm_post_event(struct nes_cm_event *);
+
+#endif			/* NES_CM_H */
diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h
new file mode 100644
index 0000000..da9daba
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_context.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef NES_CONTEXT_H
+#define NES_CONTEXT_H
+
+struct nes_qp_context {
+	__le32   misc;
+	__le32   cqs;
+	__le32   sq_addr_low;
+	__le32   sq_addr_high;
+	__le32   rq_addr_low;
+	__le32   rq_addr_high;
+	__le32   misc2;
+	__le16   tcpPorts[2];
+	__le32   ip0;
+	__le32   ip1;
+	__le32   ip2;
+	__le32   ip3;
+	__le32   mss;
+	__le32   arp_index_vlan;
+	__le32   tcp_state_flow_label;
+	__le32   pd_index_wscale;
+	__le32   keepalive;
+	u32   ts_recent;
+	u32   ts_age;
+	__le32   snd_nxt;
+	__le32   snd_wnd;
+	__le32   rcv_nxt;
+	__le32   rcv_wnd;
+	__le32   snd_max;
+	__le32   snd_una;
+	u32   srtt;
+	__le32   rttvar;
+	__le32   ssthresh;
+	__le32   cwnd;
+	__le32   snd_wl1;
+	__le32   snd_wl2;
+	__le32   max_snd_wnd;
+	__le32   ts_val_delta;
+	u32   retransmit;
+	u32   probe_cnt;
+	u32   hte_index;
+	__le32   q2_addr_low;
+	__le32   q2_addr_high;
+	__le32   ird_index;
+	u32   Rsvd3;
+	__le32   ird_ord_sizes;
+	u32   mrkr_offset;
+	__le32   aeq_token_low;
+	__le32   aeq_token_high;
+};
+
+/* QP Context Misc Field */
+
+#define NES_QPCONTEXT_MISC_IWARP_VER_MASK    0x00000003
+#define NES_QPCONTEXT_MISC_IWARP_VER_SHIFT   0
+#define NES_QPCONTEXT_MISC_EFB_SIZE_MASK     0x000000C0
+#define NES_QPCONTEXT_MISC_EFB_SIZE_SHIFT    6
+#define NES_QPCONTEXT_MISC_RQ_SIZE_MASK      0x00000300
+#define NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT     8
+#define NES_QPCONTEXT_MISC_SQ_SIZE_MASK      0x00000c00
+#define NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT     10
+#define NES_QPCONTEXT_MISC_PCI_FCN_MASK      0x00007000
+#define NES_QPCONTEXT_MISC_PCI_FCN_SHIFT     12
+#define NES_QPCONTEXT_MISC_DUP_ACKS_MASK     0x00070000
+#define NES_QPCONTEXT_MISC_DUP_ACKS_SHIFT    16
+
+enum nes_qp_context_misc_bits {
+	NES_QPCONTEXT_MISC_RX_WQE_SIZE         = 0x00000004,
+	NES_QPCONTEXT_MISC_IPV4                = 0x00000008,
+	NES_QPCONTEXT_MISC_DO_NOT_FRAG         = 0x00000010,
+	NES_QPCONTEXT_MISC_INSERT_VLAN         = 0x00000020,
+	NES_QPCONTEXT_MISC_DROS                = 0x00008000,
+	NES_QPCONTEXT_MISC_WSCALE              = 0x00080000,
+	NES_QPCONTEXT_MISC_KEEPALIVE           = 0x00100000,
+	NES_QPCONTEXT_MISC_TIMESTAMP           = 0x00200000,
+	NES_QPCONTEXT_MISC_SACK                = 0x00400000,
+	NES_QPCONTEXT_MISC_RDMA_WRITE_EN       = 0x00800000,
+	NES_QPCONTEXT_MISC_RDMA_READ_EN        = 0x01000000,
+	NES_QPCONTEXT_MISC_WBIND_EN            = 0x10000000,
+	NES_QPCONTEXT_MISC_FAST_REGISTER_EN    = 0x20000000,
+	NES_QPCONTEXT_MISC_PRIV_EN             = 0x40000000,
+	NES_QPCONTEXT_MISC_NO_NAGLE            = 0x80000000
+};
+
+enum nes_qp_acc_wq_sizes {
+	HCONTEXT_TSA_WQ_SIZE_4 = 0,
+	HCONTEXT_TSA_WQ_SIZE_32 = 1,
+	HCONTEXT_TSA_WQ_SIZE_128 = 2,
+	HCONTEXT_TSA_WQ_SIZE_512 = 3
+};
+
+/* QP Context Misc2 Fields */
+#define NES_QPCONTEXT_MISC2_TTL_MASK            0x000000ff
+#define NES_QPCONTEXT_MISC2_TTL_SHIFT           0
+#define NES_QPCONTEXT_MISC2_HOP_LIMIT_MASK      0x000000ff
+#define NES_QPCONTEXT_MISC2_HOP_LIMIT_SHIFT     0
+#define NES_QPCONTEXT_MISC2_LIMIT_MASK          0x00000300
+#define NES_QPCONTEXT_MISC2_LIMIT_SHIFT         8
+#define NES_QPCONTEXT_MISC2_NIC_INDEX_MASK      0x0000fc00
+#define NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT     10
+#define NES_QPCONTEXT_MISC2_SRC_IP_MASK         0x001f0000
+#define NES_QPCONTEXT_MISC2_SRC_IP_SHIFT        16
+#define NES_QPCONTEXT_MISC2_TOS_MASK            0xff000000
+#define NES_QPCONTEXT_MISC2_TOS_SHIFT           24
+#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_MASK  0xff000000
+#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_SHIFT 24
+
+/* QP Context Tcp State/Flow Label Fields */
+#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_MASK   0x000fffff
+#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_SHIFT  0
+#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_MASK    0xf0000000
+#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT   28
+
+enum nes_qp_tcp_state {
+	NES_QPCONTEXT_TCPSTATE_CLOSED = 1,
+	NES_QPCONTEXT_TCPSTATE_EST = 5,
+	NES_QPCONTEXT_TCPSTATE_TIME_WAIT = 11,
+};
+
+/* QP Context PD Index/wscale Fields */
+#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK  0x0000000f
+#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT 0
+#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK  0x00000f00
+#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT 8
+#define NES_QPCONTEXT_PDWSCALE_PDINDEX_MASK     0xffff0000
+#define NES_QPCONTEXT_PDWSCALE_PDINDEX_SHIFT    16
+
+/* QP Context Keepalive Fields */
+#define NES_QPCONTEXT_KEEPALIVE_DELTA_MASK      0x0000ffff
+#define NES_QPCONTEXT_KEEPALIVE_DELTA_SHIFT     0
+#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_MASK  0x00ff0000
+#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_SHIFT 16
+#define NES_QPCONTEXT_KEEPALIVE_INTV_MASK       0xff000000
+#define NES_QPCONTEXT_KEEPALIVE_INTV_SHIFT      24
+
+/* QP Context ORD/IRD Fields */
+#define NES_QPCONTEXT_ORDIRD_ORDSIZE_MASK       0x0000007f
+#define NES_QPCONTEXT_ORDIRD_ORDSIZE_SHIFT      0
+#define NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK       0x00030000
+#define NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT      16
+#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_MASK    0x30000000
+#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT   28
+
+enum nes_ord_ird_bits {
+	NES_QPCONTEXT_ORDIRD_WRPDU                   = 0x02000000,
+	NES_QPCONTEXT_ORDIRD_LSMM_PRESENT            = 0x04000000,
+	NES_QPCONTEXT_ORDIRD_ALSMM                   = 0x08000000,
+	NES_QPCONTEXT_ORDIRD_AAH                     = 0x40000000,
+	NES_QPCONTEXT_ORDIRD_RNMC                    = 0x80000000
+};
+
+enum nes_iwarp_qp_state {
+	NES_QPCONTEXT_IWARP_STATE_NONEXIST  = 0,
+	NES_QPCONTEXT_IWARP_STATE_IDLE      = 1,
+	NES_QPCONTEXT_IWARP_STATE_RTS       = 2,
+	NES_QPCONTEXT_IWARP_STATE_CLOSING   = 3,
+	NES_QPCONTEXT_IWARP_STATE_TERMINATE = 5,
+	NES_QPCONTEXT_IWARP_STATE_ERROR     = 6
+};
+
+
+#endif		/* NES_CONTEXT_H */
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
new file mode 100644
index 0000000..7c4c0fb
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -0,0 +1,3080 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+
+#include "nes.h"
+
+u32 crit_err_count = 0;
+u32 int_mod_timer_init;
+u32 int_mod_cq_depth_256;
+u32 int_mod_cq_depth_128;
+u32 int_mod_cq_depth_32;
+u32 int_mod_cq_depth_24;
+u32 int_mod_cq_depth_16;
+u32 int_mod_cq_depth_4;
+u32 int_mod_cq_depth_1;
+
+#include "nes_cm.h"
+
+
+#ifdef CONFIG_INFINIBAND_NES_DEBUG
+static unsigned char *nes_iwarp_state_str[] = {
+	"Non-Existant",
+	"Idle",
+	"RTS",
+	"Closing",
+	"RSVD1",
+	"Terminate",
+	"Error",
+	"RSVD2",
+};
+
+static unsigned char *nes_tcp_state_str[] = {
+	"Non-Existant",
+	"Closed",
+	"Listen",
+	"SYN Sent",
+	"SYN Rcvd",
+	"Established",
+	"Close Wait",
+	"FIN Wait 1",
+	"Closing",
+	"Last Ack",
+	"FIN Wait 2",
+	"Time Wait",
+	"RSVD1",
+	"RSVD2",
+	"RSVD3",
+	"RSVD4",
+};
+#endif
+
+
+/**
+ * nes_nic_init_timer_defaults
+ */
+void  nes_nic_init_timer_defaults(struct nes_device *nesdev, u8 jumbomode)
+{
+	unsigned long flags;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
+
+	spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
+
+	shared_timer->timer_in_use_min = NES_NIC_FAST_TIMER_LOW;
+	shared_timer->timer_in_use_max = NES_NIC_FAST_TIMER_HIGH;
+	if (jumbomode) {
+		shared_timer->threshold_low    = DEFAULT_JUMBO_NES_QL_LOW;
+		shared_timer->threshold_target = DEFAULT_JUMBO_NES_QL_TARGET;
+		shared_timer->threshold_high   = DEFAULT_JUMBO_NES_QL_HIGH;
+	} else {
+		shared_timer->threshold_low    = DEFAULT_NES_QL_LOW;
+		shared_timer->threshold_target = DEFAULT_NES_QL_TARGET;
+		shared_timer->threshold_high   = DEFAULT_NES_QL_HIGH;
+	}
+
+	/* todo use netdev->mtu to set thresholds */
+	spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+}
+
+
+/**
+ * nes_nic_init_timer
+ */
+static void  nes_nic_init_timer(struct nes_device *nesdev)
+{
+	unsigned long flags;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
+
+	spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
+
+	if (shared_timer->timer_in_use_old == 0) {
+		nesdev->deepcq_count = 0;
+		shared_timer->timer_direction_upward = 0;
+		shared_timer->timer_direction_downward = 0;
+		shared_timer->timer_in_use = NES_NIC_FAST_TIMER;
+		shared_timer->timer_in_use_old = 0;
+
+	}
+	if (shared_timer->timer_in_use != shared_timer->timer_in_use_old) {
+		shared_timer->timer_in_use_old = shared_timer->timer_in_use;
+		nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
+			0x80000000 | ((u32)(shared_timer->timer_in_use*8)));
+	}
+	/* todo use netdev->mtu to set thresholds */
+	spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+}
+
+
+/**
+ * nes_nic_tune_timer
+ */
+static void nes_nic_tune_timer(struct nes_device *nesdev)
+{
+	unsigned long flags;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
+	u16 cq_count = nesdev->currcq_count;
+
+	spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
+
+	if (shared_timer->cq_count_old < cq_count) {
+		if (cq_count > shared_timer->threshold_low)
+			shared_timer->cq_direction_downward=0;
+	}
+	if (shared_timer->cq_count_old >= cq_count)
+		shared_timer->cq_direction_downward++;
+	shared_timer->cq_count_old = cq_count;
+	if (shared_timer->cq_direction_downward > NES_NIC_CQ_DOWNWARD_TREND) {
+		if (cq_count <= shared_timer->threshold_low) {
+			shared_timer->threshold_low = shared_timer->threshold_low/2;
+			shared_timer->cq_direction_downward=0;
+			nesdev->currcq_count = 0;
+			spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+			return;
+		}
+	}
+
+	if (cq_count > 1) {
+		nesdev->deepcq_count += cq_count;
+		if (cq_count <= shared_timer->threshold_low) {       /* increase timer gently */
+			shared_timer->timer_direction_upward++;
+			shared_timer->timer_direction_downward = 0;
+		} else if (cq_count <= shared_timer->threshold_target) { /* balanced */
+			shared_timer->timer_direction_upward = 0;
+			shared_timer->timer_direction_downward = 0;
+		} else if (cq_count <= shared_timer->threshold_high) {  /* decrease timer gently */
+			shared_timer->timer_direction_downward++;
+			shared_timer->timer_direction_upward = 0;
+		} else if (cq_count <= (shared_timer->threshold_high) * 2) {
+			shared_timer->timer_in_use -= 2;
+			shared_timer->timer_direction_upward = 0;
+			shared_timer->timer_direction_downward++;
+		} else {
+			shared_timer->timer_in_use -= 4;
+			shared_timer->timer_direction_upward = 0;
+			shared_timer->timer_direction_downward++;
+		}
+
+		if (shared_timer->timer_direction_upward > 3 ) {  /* using history */
+			shared_timer->timer_in_use += 3;
+			shared_timer->timer_direction_upward = 0;
+			shared_timer->timer_direction_downward = 0;
+		}
+		if (shared_timer->timer_direction_downward > 5) { /* using history */
+			shared_timer->timer_in_use -= 4 ;
+			shared_timer->timer_direction_downward = 0;
+			shared_timer->timer_direction_upward = 0;
+		}
+	}
+
+	/* boundary checking */
+	if (shared_timer->timer_in_use > NES_NIC_FAST_TIMER_HIGH)
+		shared_timer->timer_in_use = NES_NIC_FAST_TIMER_HIGH;
+	else if (shared_timer->timer_in_use < NES_NIC_FAST_TIMER_LOW) {
+		shared_timer->timer_in_use = NES_NIC_FAST_TIMER_LOW;
+	}
+
+	nesdev->currcq_count = 0;
+
+	spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+}
+
+
+/**
+ * nes_init_adapter - initialize adapter
+ */
+struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
+	struct nes_adapter *nesadapter = NULL;
+	unsigned long num_pds;
+	u32 u32temp;
+	u32 port_count;
+	u16 max_rq_wrs;
+	u16 max_sq_wrs;
+	u32 max_mr;
+	u32 max_256pbl;
+	u32 max_4kpbl;
+	u32 max_qp;
+	u32 max_irrq;
+	u32 max_cq;
+	u32 hte_index_mask;
+	u32 adapter_size;
+	u32 arp_table_size;
+	u16 vendor_id;
+	u8  OneG_Mode;
+	u8  func_index;
+
+	/* search the list of existing adapters */
+	list_for_each_entry(nesadapter, &nes_adapter_list, list) {
+		nes_debug(NES_DBG_INIT, "Searching Adapter list for PCI devfn = 0x%X,"
+				" adapter PCI slot/bus = %u/%u, pci devices PCI slot/bus = %u/%u, .\n",
+				nesdev->pcidev->devfn,
+				PCI_SLOT(nesadapter->devfn),
+				nesadapter->bus_number,
+				PCI_SLOT(nesdev->pcidev->devfn),
+				nesdev->pcidev->bus->number );
+		if ((PCI_SLOT(nesadapter->devfn) == PCI_SLOT(nesdev->pcidev->devfn)) &&
+				(nesadapter->bus_number == nesdev->pcidev->bus->number)) {
+			nesadapter->ref_count++;
+			return nesadapter;
+		}
+	}
+
+	/* no adapter found */
+	num_pds = pci_resource_len(nesdev->pcidev, BAR_1) >> PAGE_SHIFT;
+	if ((hw_rev != NE020_REV) && (hw_rev != NE020_REV1)) {
+		nes_debug(NES_DBG_INIT, "NE020 driver detected unknown hardware revision 0x%x\n",
+				hw_rev);
+		return NULL;
+	}
+
+	nes_debug(NES_DBG_INIT, "Determine Soft Reset, QP_control=0x%x, CPU0=0x%x, CPU1=0x%x, CPU2=0x%x\n",
+			nes_read_indexed(nesdev, NES_IDX_QP_CONTROL + PCI_FUNC(nesdev->pcidev->devfn) * 8),
+			nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS),
+			nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 4),
+			nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 8));
+
+	nes_debug(NES_DBG_INIT, "Reset and init NE020\n");
+
+
+	if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0)
+		return NULL;
+	if (nes_init_serdes(nesdev, hw_rev, port_count, OneG_Mode))
+		return NULL;
+	nes_init_csr_ne020(nesdev, hw_rev, port_count);
+
+	max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE);
+	nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp);
+
+	u32temp = nes_read_indexed(nesdev, NES_IDX_QUAD_HASH_TABLE_SIZE);
+	if (max_qp > ((u32)1 << (u32temp & 0x001f))) {
+		nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to hash table size = 0x%08X\n",
+				max_qp, u32temp);
+		max_qp = (u32)1 << (u32temp & 0x001f);
+	}
+
+	hte_index_mask = ((u32)1 << ((u32temp & 0x001f)+1))-1;
+	nes_debug(NES_DBG_INIT, "Max QP = %u, hte_index_mask = 0x%08X.\n",
+			max_qp, hte_index_mask);
+
+	u32temp = nes_read_indexed(nesdev, NES_IDX_IRRQ_COUNT);
+
+	max_irrq = 1 << (u32temp & 0x001f);
+
+	if (max_qp > max_irrq) {
+		max_qp = max_irrq;
+		nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to Available Q1s.\n",
+				max_qp);
+	}
+
+	/* there should be no reason to allocate more pds than qps */
+	if (num_pds > max_qp)
+		num_pds = max_qp;
+
+	u32temp = nes_read_indexed(nesdev, NES_IDX_MRT_SIZE);
+	max_mr = (u32)8192 << (u32temp & 0x7);
+
+	u32temp = nes_read_indexed(nesdev, NES_IDX_PBL_REGION_SIZE);
+	max_256pbl = (u32)1 << (u32temp & 0x0000001f);
+	max_4kpbl = (u32)1 << ((u32temp >> 16) & 0x0000001f);
+	max_cq = nes_read_indexed(nesdev, NES_IDX_CQ_CTX_SIZE);
+
+	u32temp = nes_read_indexed(nesdev, NES_IDX_ARP_CACHE_SIZE);
+	arp_table_size = 1 << u32temp;
+
+	adapter_size = (sizeof(struct nes_adapter) +
+			(sizeof(unsigned long)-1)) & (~(sizeof(unsigned long)-1));
+	adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
+	adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
+	adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq);
+	adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds);
+	adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size);
+	adapter_size += sizeof(struct nes_qp **) * max_qp;
+
+	/* allocate a new adapter struct */
+	nesadapter = kzalloc(adapter_size, GFP_KERNEL);
+	if (nesadapter == NULL) {
+		return NULL;
+	}
+
+	nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n",
+			nesadapter, (u32)sizeof(struct nes_adapter), adapter_size);
+
+	/* populate the new nesadapter */
+	nesadapter->devfn = nesdev->pcidev->devfn;
+	nesadapter->bus_number = nesdev->pcidev->bus->number;
+	nesadapter->ref_count = 1;
+	nesadapter->timer_int_req = 0xffff0000;
+	nesadapter->OneG_Mode = OneG_Mode;
+	nesadapter->doorbell_start = nesdev->doorbell_region;
+
+	/* nesadapter->tick_delta = clk_divisor; */
+	nesadapter->hw_rev = hw_rev;
+	nesadapter->port_count = port_count;
+
+	nesadapter->max_qp = max_qp;
+	nesadapter->hte_index_mask = hte_index_mask;
+	nesadapter->max_irrq = max_irrq;
+	nesadapter->max_mr = max_mr;
+	nesadapter->max_256pbl = max_256pbl - 1;
+	nesadapter->max_4kpbl = max_4kpbl - 1;
+	nesadapter->max_cq = max_cq;
+	nesadapter->free_256pbl = max_256pbl - 1;
+	nesadapter->free_4kpbl = max_4kpbl - 1;
+	nesadapter->max_pd = num_pds;
+	nesadapter->arp_table_size = arp_table_size;
+
+	nesadapter->et_pkt_rate_low = NES_TIMER_ENABLE_LIMIT;
+	if (nes_drv_opt & NES_DRV_OPT_DISABLE_INT_MOD) {
+		nesadapter->et_use_adaptive_rx_coalesce = 0;
+		nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
+		nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
+	} else {
+		nesadapter->et_use_adaptive_rx_coalesce = 1;
+		nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
+		nesadapter->et_rx_coalesce_usecs_irq = 0;
+		printk(PFX "%s: Using Adaptive Interrupt Moderation\n", __FUNCTION__);
+	}
+	/* Setup and enable the periodic timer */
+	if (nesadapter->et_rx_coalesce_usecs_irq)
+		nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 |
+				((u32)(nesadapter->et_rx_coalesce_usecs_irq * 8)));
+	else
+		nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x00000000);
+
+	nesadapter->base_pd = 1;
+
+	nesadapter->device_cap_flags =
+			IB_DEVICE_ZERO_STAG | IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW;
+
+	nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
+			[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
+	nesadapter->allocated_cqs = &nesadapter->allocated_qps[BITS_TO_LONGS(max_qp)];
+	nesadapter->allocated_mrs = &nesadapter->allocated_cqs[BITS_TO_LONGS(max_cq)];
+	nesadapter->allocated_pds = &nesadapter->allocated_mrs[BITS_TO_LONGS(max_mr)];
+	nesadapter->allocated_arps = &nesadapter->allocated_pds[BITS_TO_LONGS(num_pds)];
+	nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
+
+
+	/* mark the usual suspect QPs and CQs as in use */
+	for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
+		set_bit(u32temp, nesadapter->allocated_qps);
+		set_bit(u32temp, nesadapter->allocated_cqs);
+	}
+
+	for (u32temp = 0; u32temp < 20; u32temp++)
+		set_bit(u32temp, nesadapter->allocated_pds);
+	u32temp = nes_read_indexed(nesdev, NES_IDX_QP_MAX_CFG_SIZES);
+
+	max_rq_wrs = ((u32temp >> 8) & 3);
+	switch (max_rq_wrs) {
+		case 0:
+			max_rq_wrs = 4;
+			break;
+		case 1:
+			max_rq_wrs = 16;
+			break;
+		case 2:
+			max_rq_wrs = 32;
+			break;
+		case 3:
+			max_rq_wrs = 512;
+			break;
+	}
+
+	max_sq_wrs = (u32temp & 3);
+	switch (max_sq_wrs) {
+		case 0:
+			max_sq_wrs = 4;
+			break;
+		case 1:
+			max_sq_wrs = 16;
+			break;
+		case 2:
+			max_sq_wrs = 32;
+			break;
+		case 3:
+			max_sq_wrs = 512;
+			break;
+	}
+	nesadapter->max_qp_wr = min(max_rq_wrs, max_sq_wrs);
+	nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
+
+	nesadapter->max_sge = 4;
+	nesadapter->max_cqe = 32767;
+
+	if (nes_read_eeprom_values(nesdev, nesadapter)) {
+		printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
+		kfree(nesadapter);
+		return NULL;
+	}
+
+	u32temp = nes_read_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG);
+	nes_write_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG,
+			(u32temp & 0xff000000) | (nesadapter->tcp_timer_core_clk_divisor & 0x00ffffff));
+
+	/* setup port configuration */
+	if (nesadapter->port_count == 1) {
+		u32temp = 0x00000000;
+		if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT)
+			nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002);
+		else
+			nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
+	} else {
+		if (nesadapter->port_count == 2)
+			u32temp = 0x00000044;
+		else
+			u32temp = 0x000000e4;
+		nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
+	}
+
+	nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, u32temp);
+	nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n",
+			nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT));
+
+	spin_lock_init(&nesadapter->resource_lock);
+	spin_lock_init(&nesadapter->phy_lock);
+	spin_lock_init(&nesadapter->pbl_lock);
+	spin_lock_init(&nesadapter->periodic_timer_lock);
+
+	INIT_LIST_HEAD(&nesadapter->nesvnic_list[0]);
+	INIT_LIST_HEAD(&nesadapter->nesvnic_list[1]);
+	INIT_LIST_HEAD(&nesadapter->nesvnic_list[2]);
+	INIT_LIST_HEAD(&nesadapter->nesvnic_list[3]);
+
+	if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) {
+		u32 pcs_control_status0, pcs_control_status1;
+		u32 reset_value;
+		u32 i = 0;
+		u32 int_cnt = 0;
+		u32 ext_cnt = 0;
+		unsigned long flags;
+		u32 j = 0;
+
+		pcs_control_status0 = nes_read_indexed(nesdev,
+			NES_IDX_PHY_PCS_CONTROL_STATUS0);
+		pcs_control_status1 = nes_read_indexed(nesdev,
+			NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+
+		for (i = 0; i < NES_MAX_LINK_CHECK; i++) {
+			pcs_control_status0 = nes_read_indexed(nesdev,
+					NES_IDX_PHY_PCS_CONTROL_STATUS0);
+			pcs_control_status1 = nes_read_indexed(nesdev,
+					NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+			if ((0x0F000100 == (pcs_control_status0 & 0x0F000100))
+			    || (0x0F000100 == (pcs_control_status1 & 0x0F000100)))
+				int_cnt++;
+			msleep(1);
+		}
+		if (int_cnt > 1) {
+			spin_lock_irqsave(&nesadapter->phy_lock, flags);
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
+			mh_detected++;
+			reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
+			reset_value |= 0x0000003d;
+			nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
+
+			while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
+				& 0x00000040) != 0x00000040) && (j++ < 5000));
+			spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+
+			pcs_control_status0 = nes_read_indexed(nesdev,
+					NES_IDX_PHY_PCS_CONTROL_STATUS0);
+			pcs_control_status1 = nes_read_indexed(nesdev,
+					NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+
+			for (i = 0; i < NES_MAX_LINK_CHECK; i++) {
+				pcs_control_status0 = nes_read_indexed(nesdev,
+					NES_IDX_PHY_PCS_CONTROL_STATUS0);
+				pcs_control_status1 = nes_read_indexed(nesdev,
+					NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+				if ((0x0F000100 == (pcs_control_status0 & 0x0F000100))
+					|| (0x0F000100 == (pcs_control_status1 & 0x0F000100))) {
+					if (++ext_cnt > int_cnt) {
+						spin_lock_irqsave(&nesadapter->phy_lock, flags);
+						nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1,
+								0x0000F0C8);
+						mh_detected++;
+						reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
+						reset_value |= 0x0000003d;
+						nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
+
+						while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
+							& 0x00000040) != 0x00000040) && (j++ < 5000));
+						spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+						break;
+					}
+				}
+				msleep(1);
+			}
+		}
+	}
+
+	if (nesadapter->hw_rev == NE020_REV) {
+		init_timer(&nesadapter->mh_timer);
+		nesadapter->mh_timer.function = nes_mh_fix;
+		nesadapter->mh_timer.expires = jiffies + (HZ/5);  /* 1 second */
+		nesadapter->mh_timer.data = (unsigned long)nesdev;
+		add_timer(&nesadapter->mh_timer);
+	} else {
+		nes_write32(nesdev->regs+NES_INTF_INT_STAT, 0x0f000000);
+	}
+
+	init_timer(&nesadapter->lc_timer);
+	nesadapter->lc_timer.function = nes_clc;
+	nesadapter->lc_timer.expires = jiffies + 3600 * HZ;  /* 1 hour */
+	nesadapter->lc_timer.data = (unsigned long)nesdev;
+	add_timer(&nesadapter->lc_timer);
+
+	list_add_tail(&nesadapter->list, &nes_adapter_list);
+
+	for (func_index = 0; func_index < 8; func_index++) {
+		pci_bus_read_config_word(nesdev->pcidev->bus,
+					PCI_DEVFN(PCI_SLOT(nesdev->pcidev->devfn),
+					func_index), 0, &vendor_id);
+		if (vendor_id == 0xffff)
+			break;
+	}
+	nes_debug(NES_DBG_INIT, "%s %d functions found for %s.\n", __FUNCTION__,
+		func_index, pci_name(nesdev->pcidev));
+	nesadapter->adapter_fcn_count = func_index;
+
+	return nesadapter;
+}
+
+
+/**
+ * nes_reset_adapter_ne020
+ */
+unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode)
+{
+	u32 port_count;
+	u32 u32temp;
+	u32 i;
+
+	u32temp = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
+	port_count = ((u32temp & 0x00000300) >> 8) + 1;
+	/* TODO: assuming that both SERDES are set the same for now */
+	*OneG_Mode = (u32temp & 0x00003c00) ? 0 : 1;
+	nes_debug(NES_DBG_INIT, "Initial Software Reset = 0x%08X, port_count=%u\n",
+			u32temp, port_count);
+	if (*OneG_Mode)
+		nes_debug(NES_DBG_INIT, "Running in 1G mode.\n");
+	u32temp &= 0xff00ffc0;
+	switch (port_count) {
+		case 1:
+			u32temp |= 0x00ee0000;
+			break;
+		case 2:
+			u32temp |= 0x00cc0000;
+			break;
+		case 4:
+			u32temp |= 0x00000000;
+			break;
+		default:
+			return 0;
+			break;
+	}
+
+	/* check and do full reset if needed */
+	if (nes_read_indexed(nesdev, NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))) {
+		nes_debug(NES_DBG_INIT, "Issuing Full Soft reset = 0x%08X\n", u32temp | 0xd);
+		nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd);
+
+		i = 0;
+		while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
+			mdelay(1);
+		if (i >= 10000) {
+			nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n");
+			return 0;
+		}
+	}
+
+	/* port reset */
+	switch (port_count) {
+		case 1:
+			u32temp |= 0x00ee0010;
+			break;
+		case 2:
+			u32temp |= 0x00cc0030;
+			break;
+		case 4:
+			u32temp |= 0x00000030;
+			break;
+	}
+
+	nes_debug(NES_DBG_INIT, "Issuing Port Soft reset = 0x%08X\n", u32temp | 0xd);
+	nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd);
+
+	i = 0;
+	while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
+		mdelay(1);
+	if (i >= 10000) {
+		nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n");
+		return 0;
+	}
+
+	/* serdes 0 */
+	i = 0;
+	while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
+			& 0x0000000f)) != 0x0000000f) && i++ < 5000)
+		mdelay(1);
+	if (i >= 5000) {
+		nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp);
+		return 0;
+	}
+
+	/* serdes 1 */
+	if (port_count > 1) {
+		i = 0;
+		while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
+				& 0x0000000f)) != 0x0000000f) && i++ < 5000)
+			mdelay(1);
+		if (i >= 5000) {
+			nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp);
+			return 0;
+		}
+	}
+
+
+
+	i = 0;
+	while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000)
+		mdelay(1);
+	if (i >= 10000) {
+		printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n",
+				nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS));
+		return 0;
+	}
+
+	return port_count;
+}
+
+
+/**
+ * nes_init_serdes
+ */
+int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, u8  OneG_Mode)
+{
+	int i;
+	u32 u32temp;
+
+	if (hw_rev != NE020_REV) {
+		/* init serdes 0 */
+
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
+		if (!OneG_Mode)
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000);
+		if (port_count > 1) {
+			/* init serdes 1 */
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF);
+			if (!OneG_Mode)
+				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000);
+			}
+	} else {
+		/* init serdes 0 */
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
+		i = 0;
+		while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
+				& 0x0000000f)) != 0x0000000f) && i++ < 5000)
+			mdelay(1);
+		if (i >= 5000) {
+			nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp);
+			return 1;
+		}
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000);
+		if (OneG_Mode)
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222);
+		else
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222);
+
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff);
+		if (port_count > 1) {
+			/* init serdes 1 */
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x00000048);
+			i = 0;
+			while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
+				& 0x0000000f)) != 0x0000000f) && (i++ < 5000))
+				mdelay(1);
+			if (i >= 5000) {
+				printk("%s: Init: serdes 1 not ready, status=%x\n", __FUNCTION__, u32temp);
+				/* return 1; */
+			}
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x000bdef7);
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE1, 0x9ce73000);
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE1, 0x0ff00000);
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET1, 0x00000000);
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS1, 0x00000000);
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1, 0x00000000);
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL1, 0xf0002222);
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000ff);
+		}
+	}
+	return 0;
+}
+
+
+/**
+ * nes_init_csr_ne020
+ * Initialize registers for ne020 hardware
+ */
+void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count)
+{
+	u32 u32temp;
+
+	nes_debug(NES_DBG_INIT, "port_count=%d\n", port_count);
+
+	nes_write_indexed(nesdev, 0x000001E4, 0x00000007);
+	/* nes_write_indexed(nesdev, 0x000001E8, 0x000208C4); */
+	nes_write_indexed(nesdev, 0x000001E8, 0x00020874);
+	nes_write_indexed(nesdev, 0x000001D8, 0x00048002);
+	/* nes_write_indexed(nesdev, 0x000001D8, 0x0004B002); */
+	nes_write_indexed(nesdev, 0x000001FC, 0x00050005);
+	nes_write_indexed(nesdev, 0x00000600, 0x55555555);
+	nes_write_indexed(nesdev, 0x00000604, 0x55555555);
+
+	/* TODO: move these MAC register settings to NIC bringup */
+	nes_write_indexed(nesdev, 0x00002000, 0x00000001);
+	nes_write_indexed(nesdev, 0x00002004, 0x00000001);
+	nes_write_indexed(nesdev, 0x00002008, 0x0000FFFF);
+	nes_write_indexed(nesdev, 0x0000200C, 0x00000001);
+	nes_write_indexed(nesdev, 0x00002010, 0x000003c1);
+	nes_write_indexed(nesdev, 0x0000201C, 0x75345678);
+	if (port_count > 1) {
+		nes_write_indexed(nesdev, 0x00002200, 0x00000001);
+		nes_write_indexed(nesdev, 0x00002204, 0x00000001);
+		nes_write_indexed(nesdev, 0x00002208, 0x0000FFFF);
+		nes_write_indexed(nesdev, 0x0000220C, 0x00000001);
+		nes_write_indexed(nesdev, 0x00002210, 0x000003c1);
+		nes_write_indexed(nesdev, 0x0000221C, 0x75345678);
+		nes_write_indexed(nesdev, 0x00000908, 0x20000001);
+	}
+	if (port_count > 2) {
+		nes_write_indexed(nesdev, 0x00002400, 0x00000001);
+		nes_write_indexed(nesdev, 0x00002404, 0x00000001);
+		nes_write_indexed(nesdev, 0x00002408, 0x0000FFFF);
+		nes_write_indexed(nesdev, 0x0000240C, 0x00000001);
+		nes_write_indexed(nesdev, 0x00002410, 0x000003c1);
+		nes_write_indexed(nesdev, 0x0000241C, 0x75345678);
+		nes_write_indexed(nesdev, 0x00000910, 0x20000001);
+
+		nes_write_indexed(nesdev, 0x00002600, 0x00000001);
+		nes_write_indexed(nesdev, 0x00002604, 0x00000001);
+		nes_write_indexed(nesdev, 0x00002608, 0x0000FFFF);
+		nes_write_indexed(nesdev, 0x0000260C, 0x00000001);
+		nes_write_indexed(nesdev, 0x00002610, 0x000003c1);
+		nes_write_indexed(nesdev, 0x0000261C, 0x75345678);
+		nes_write_indexed(nesdev, 0x00000918, 0x20000001);
+	}
+
+	nes_write_indexed(nesdev, 0x00005000, 0x00018000);
+	/* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */
+	nes_write_indexed(nesdev, 0x00005004, 0x00020001);
+	nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F);
+	nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F);
+	nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F);
+	nes_write_indexed(nesdev, 0x00005020, 0x1F1F1F1F);
+	nes_write_indexed(nesdev, 0x00006090, 0xFFFFFFFF);
+
+	/* TODO: move this to code, get from EEPROM */
+	nes_write_indexed(nesdev, 0x00000900, 0x20000001);
+	nes_write_indexed(nesdev, 0x000060C0, 0x0000028e);
+	nes_write_indexed(nesdev, 0x000060C8, 0x00000020);
+														//
+	nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0);
+	/* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */
+
+	if (hw_rev != NE020_REV) {
+		u32temp = nes_read_indexed(nesdev, 0x000008e8);
+		u32temp |= 0x80000000;
+		nes_write_indexed(nesdev, 0x000008e8, u32temp);
+		u32temp = nes_read_indexed(nesdev, 0x000021f8);
+		u32temp &= 0x7fffffff;
+		u32temp |= 0x7fff0010;
+		nes_write_indexed(nesdev, 0x000021f8, u32temp);
+	}
+}
+
+
+/**
+ * nes_destroy_adapter - destroy the adapter structure
+ */
+void nes_destroy_adapter(struct nes_adapter *nesadapter)
+{
+	struct nes_adapter *tmp_adapter;
+
+	list_for_each_entry(tmp_adapter, &nes_adapter_list, list) {
+		nes_debug(NES_DBG_SHUTDOWN, "Nes Adapter list entry = 0x%p.\n",
+				tmp_adapter);
+	}
+
+	nesadapter->ref_count--;
+	if (!nesadapter->ref_count) {
+		if (nesadapter->hw_rev == NE020_REV) {
+			del_timer(&nesadapter->mh_timer);
+		}
+		del_timer(&nesadapter->lc_timer);
+
+		list_del(&nesadapter->list);
+		kfree(nesadapter);
+	}
+}
+
+
+/**
+ * nes_init_cqp
+ */
+int nes_init_cqp(struct nes_device *nesdev)
+{
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_hw_cqp_qp_context *cqp_qp_context;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_hw_ceq *ceq;
+	struct nes_hw_ceq *nic_ceq;
+	struct nes_hw_aeq *aeq;
+	void *vmem;
+	dma_addr_t pmem;
+	u32 count=0;
+	u32 cqp_head;
+	u64 u64temp;
+	u32 u32temp;
+
+	/* allocate CQP memory */
+	/* Need to add max_cq to the aeq size once cq overflow checking is added back */
+	/* SQ is 512 byte aligned, others are 256 byte aligned */
+	nesdev->cqp_mem_size = 512 +
+			(sizeof(struct nes_hw_cqp_wqe) * NES_CQP_SQ_SIZE) +
+			(sizeof(struct nes_hw_cqe) * NES_CCQ_SIZE) +
+			max(((u32)sizeof(struct nes_hw_ceqe) * NES_CCEQ_SIZE), (u32)256) +
+			max(((u32)sizeof(struct nes_hw_ceqe) * NES_NIC_CEQ_SIZE), (u32)256) +
+			(sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) +
+			sizeof(struct nes_hw_cqp_qp_context);
+
+	nesdev->cqp_vbase = pci_alloc_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
+			&nesdev->cqp_pbase);
+	if (!nesdev->cqp_vbase) {
+		nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n");
+		return -ENOMEM;
+	}
+	memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size);
+
+	/* Allocate a twice the number of CQP requests as the SQ size */
+	nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
+			2 * NES_CQP_SQ_SIZE, GFP_KERNEL);
+	if (nesdev->nes_cqp_requests == NULL) {
+		nes_debug(NES_DBG_INIT, "Unable to allocate memory CQP request entries.\n");
+		pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
+				nesdev->cqp.sq_pbase);
+		return -ENOMEM;
+	}
+
+	nes_debug(NES_DBG_INIT, "Allocated CQP structures at %p (phys = %016lX), size = %u.\n",
+			nesdev->cqp_vbase, (unsigned long)nesdev->cqp_pbase, nesdev->cqp_mem_size);
+
+	spin_lock_init(&nesdev->cqp.lock);
+	init_waitqueue_head(&nesdev->cqp.waitq);
+
+	/* Setup Various Structures */
+	vmem = (void *)(((unsigned long)nesdev->cqp_vbase + (512 - 1)) &
+			~(unsigned long)(512 - 1));
+	pmem = (dma_addr_t)(((unsigned long long)nesdev->cqp_pbase + (512 - 1)) &
+			~(unsigned long long)(512 - 1));
+
+	nesdev->cqp.sq_vbase = vmem;
+	nesdev->cqp.sq_pbase = pmem;
+	nesdev->cqp.sq_size = NES_CQP_SQ_SIZE;
+	nesdev->cqp.sq_head = 0;
+	nesdev->cqp.sq_tail = 0;
+	nesdev->cqp.qp_id = PCI_FUNC(nesdev->pcidev->devfn);
+
+	vmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size);
+	pmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size);
+
+	nesdev->ccq.cq_vbase = vmem;
+	nesdev->ccq.cq_pbase = pmem;
+	nesdev->ccq.cq_size = NES_CCQ_SIZE;
+	nesdev->ccq.cq_head = 0;
+	nesdev->ccq.ce_handler = nes_cqp_ce_handler;
+	nesdev->ccq.cq_number = PCI_FUNC(nesdev->pcidev->devfn);
+
+	vmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size);
+	pmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size);
+
+	nesdev->ceq_index = PCI_FUNC(nesdev->pcidev->devfn);
+	ceq = &nesadapter->ceq[nesdev->ceq_index];
+	ceq->ceq_vbase = vmem;
+	ceq->ceq_pbase = pmem;
+	ceq->ceq_size = NES_CCEQ_SIZE;
+	ceq->ceq_head = 0;
+
+	vmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256);
+	pmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256);
+
+	nesdev->nic_ceq_index = PCI_FUNC(nesdev->pcidev->devfn) + 8;
+	nic_ceq = &nesadapter->ceq[nesdev->nic_ceq_index];
+	nic_ceq->ceq_vbase = vmem;
+	nic_ceq->ceq_pbase = pmem;
+	nic_ceq->ceq_size = NES_NIC_CEQ_SIZE;
+	nic_ceq->ceq_head = 0;
+
+	vmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256);
+	pmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256);
+
+	aeq = &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)];
+	aeq->aeq_vbase = vmem;
+	aeq->aeq_pbase = pmem;
+	aeq->aeq_size = nesadapter->max_qp;
+	aeq->aeq_head = 0;
+
+	/* Setup QP Context */
+	vmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size);
+	pmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size);
+
+	cqp_qp_context = vmem;
+	cqp_qp_context->context_words[0] =
+			cpu_to_le32((PCI_FUNC(nesdev->pcidev->devfn) << 12) + (2 << 10));
+	cqp_qp_context->context_words[1] = 0;
+	cqp_qp_context->context_words[2] = cpu_to_le32((u32)nesdev->cqp.sq_pbase);
+	cqp_qp_context->context_words[3] = cpu_to_le32(((u64)nesdev->cqp.sq_pbase) >> 32);
+
+
+	/* Write the address to Create CQP */
+	if ((sizeof(dma_addr_t) > 4)) {
+		nes_write_indexed(nesdev,
+				NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8),
+				((u64)pmem) >> 32);
+	} else {
+		nes_write_indexed(nesdev,
+				NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), 0);
+	}
+	nes_write_indexed(nesdev,
+			NES_IDX_CREATE_CQP_LOW + (PCI_FUNC(nesdev->pcidev->devfn) * 8),
+			(u32)pmem);
+
+	INIT_LIST_HEAD(&nesdev->cqp_avail_reqs);
+	INIT_LIST_HEAD(&nesdev->cqp_pending_reqs);
+
+	for (count = 0; count < 2*NES_CQP_SQ_SIZE; count++) {
+		init_waitqueue_head(&nesdev->nes_cqp_requests[count].waitq);
+		list_add_tail(&nesdev->nes_cqp_requests[count].list, &nesdev->cqp_avail_reqs);
+	}
+
+	/* Write Create CCQ WQE */
+	cqp_head = nesdev->cqp.sq_head++;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+			(NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
+			NES_CQP_CQ_CHK_OVERFLOW | ((u32)nesdev->ccq.cq_size << 16)));
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+			    (nesdev->ccq.cq_number |
+			     ((u32)nesdev->ceq_index << 16)));
+	u64temp = (u64)nesdev->ccq.cq_pbase;
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
+	u64temp = (unsigned long)&nesdev->ccq;
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] =
+			cpu_to_le32((u32)(u64temp >> 1));
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
+			cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
+
+	/* Write Create CEQ WQE */
+	cqp_head = nesdev->cqp.sq_head++;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+			    (NES_CQP_CREATE_CEQ + ((u32)nesdev->ceq_index << 8)));
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, ceq->ceq_size);
+	u64temp = (u64)ceq->ceq_pbase;
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+
+	/* Write Create AEQ WQE */
+	cqp_head = nesdev->cqp.sq_head++;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+			(NES_CQP_CREATE_AEQ + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8)));
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX, aeq->aeq_size);
+	u64temp = (u64)aeq->aeq_pbase;
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+
+	/* Write Create NIC CEQ WQE */
+	cqp_head = nesdev->cqp.sq_head++;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+			(NES_CQP_CREATE_CEQ + ((u32)nesdev->nic_ceq_index << 8)));
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, nic_ceq->ceq_size);
+	u64temp = (u64)nic_ceq->ceq_pbase;
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+
+	/* Poll until CCQP done */
+	count = 0;
+	do {
+		if (count++ > 1000) {
+			printk(KERN_ERR PFX "Error creating CQP\n");
+			pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
+					nesdev->cqp_vbase, nesdev->cqp_pbase);
+			return -1;
+		}
+		udelay(10);
+	} while (!(nes_read_indexed(nesdev,
+			NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn) * 8)) & (1 << 8)));
+
+	nes_debug(NES_DBG_INIT, "CQP Status = 0x%08X\n", nes_read_indexed(nesdev,
+			NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
+
+	u32temp = 0x04800000;
+	nes_write32(nesdev->regs+NES_WQE_ALLOC, u32temp | nesdev->cqp.qp_id);
+
+	/* wait for the CCQ, CEQ, and AEQ to get created */
+	count = 0;
+	do {
+		if (count++ > 1000) {
+			printk(KERN_ERR PFX "Error creating CCQ, CEQ, and AEQ\n");
+			pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
+					nesdev->cqp_vbase, nesdev->cqp_pbase);
+			return -1;
+		}
+		udelay(10);
+	} while (((nes_read_indexed(nesdev,
+			NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != (15<<8)));
+
+	/* dump the QP status value */
+	nes_debug(NES_DBG_INIT, "QP Status = 0x%08X\n", nes_read_indexed(nesdev,
+			NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
+
+	nesdev->cqp.sq_tail++;
+
+	return 0;
+}
+
+
+/**
+ * nes_destroy_cqp
+ */
+int nes_destroy_cqp(struct nes_device *nesdev)
+{
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	u32 count = 0;
+	u32 cqp_head;
+	unsigned long flags;
+
+	do {
+		if (count++ > 1000)
+			break;
+		udelay(10);
+	} while (!(nesdev->cqp.sq_head == nesdev->cqp.sq_tail));
+
+	/* Reset CCQ */
+	nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_RESET |
+			nesdev->ccq.cq_number);
+
+	/* Disable device interrupts */
+	nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff);
+
+	spin_lock_irqsave(&nesdev->cqp.lock, flags);
+
+	/* Destroy the AEQ */
+	cqp_head = nesdev->cqp.sq_head++;
+	nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_AEQ |
+			((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8));
+	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
+
+	/* Destroy the NIC CEQ */
+	cqp_head = nesdev->cqp.sq_head++;
+	nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ |
+			((u32)nesdev->nic_ceq_index << 8));
+
+	/* Destroy the CEQ */
+	cqp_head = nesdev->cqp.sq_head++;
+	nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ |
+			(nesdev->ceq_index << 8));
+
+	/* Destroy the CCQ */
+	cqp_head = nesdev->cqp.sq_head++;
+	nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CQ);
+	cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->ccq.cq_number |
+			((u32)nesdev->ceq_index << 16));
+
+	/* Destroy CQP */
+	cqp_head = nesdev->cqp.sq_head++;
+	nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_QP |
+			NES_CQP_QP_TYPE_CQP);
+	cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->cqp.qp_id);
+
+	barrier();
+	/* Ring doorbell (5 WQEs) */
+	nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id);
+
+	spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+
+	/* wait for the CCQ, CEQ, and AEQ to get destroyed */
+	count = 0;
+	do {
+		if (count++ > 1000) {
+			printk(KERN_ERR PFX "Function%d: Error destroying CCQ, CEQ, and AEQ\n",
+					PCI_FUNC(nesdev->pcidev->devfn));
+			break;
+		}
+		udelay(10);
+	} while (((nes_read_indexed(nesdev,
+			NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15 << 8)) != 0));
+
+	/* dump the QP status value */
+	nes_debug(NES_DBG_SHUTDOWN, "Function%d: QP Status = 0x%08X\n",
+			PCI_FUNC(nesdev->pcidev->devfn),
+			nes_read_indexed(nesdev,
+			NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
+
+	kfree(nesdev->nes_cqp_requests);
+
+	/* Free the control structures */
+	pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
+			nesdev->cqp.sq_pbase);
+
+	return 0;
+}
+
+
+/**
+ * nes_init_phy
+ */
+int nes_init_phy(struct nes_device *nesdev)
+{
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u32 counter = 0;
+	u32 mac_index = nesdev->mac_index;
+	u32 tx_config;
+	u16 phy_data;
+
+	if (nesadapter->OneG_Mode) {
+		nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
+		if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) {
+			printk(PFX "%s: Programming mdc config for 1G\n", __FUNCTION__);
+			tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
+			tx_config |= 0x04;
+			nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+		}
+
+		nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data);
+		nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n",
+				nesadapter->phy_index[mac_index], phy_data);
+		nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index],  0xb000);
+
+		/* Reset the PHY */
+		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000);
+		udelay(100);
+		counter = 0;
+		do {
+			nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
+			nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data);
+			if (counter++ > 100) break;
+		} while (phy_data & 0x8000);
+
+		/* Setting no phy loopback */
+		phy_data &= 0xbfff;
+		phy_data |= 0x1140;
+		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index],  phy_data);
+		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
+		nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data);
+
+		nes_read_1G_phy_reg(nesdev, 0x17, nesadapter->phy_index[mac_index], &phy_data);
+		nes_debug(NES_DBG_PHY, "Phy data from register 0x17 = 0x%X.\n", phy_data);
+
+		nes_read_1G_phy_reg(nesdev, 0x1e, nesadapter->phy_index[mac_index], &phy_data);
+		nes_debug(NES_DBG_PHY, "Phy data from register 0x1e = 0x%X.\n", phy_data);
+
+		/* Setting the interrupt mask */
+		nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data);
+		nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data);
+		nes_write_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], 0xffee);
+
+		nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data);
+		nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data);
+
+		/* turning on flow control */
+		nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data);
+		nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data);
+		nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index],
+				(phy_data & ~(0x03E0)) | 0xc00);
+		/* nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index],
+				phy_data | 0xc00); */
+		nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data);
+		nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data);
+
+		nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data);
+		nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data);
+		/* Clear Half duplex */
+		nes_write_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index],
+				phy_data & ~(0x0100));
+		nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data);
+		nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data);
+
+		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
+		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300);
+	} else {
+		if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) {
+			/* setup 10G MDIO operation */
+			tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
+			tx_config |= 0x14;
+			nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+		}
+	}
+	return 0;
+}
+
+
+/**
+ * nes_replenish_nic_rq
+ */
+static void nes_replenish_nic_rq(struct nes_vnic *nesvnic)
+{
+	unsigned long flags;
+	dma_addr_t bus_address;
+	struct sk_buff *skb;
+	struct nes_hw_nic_rq_wqe *nic_rqe;
+	struct nes_hw_nic *nesnic;
+	struct nes_device *nesdev;
+	u32 rx_wqes_posted = 0;
+
+	nesnic = &nesvnic->nic;
+	nesdev = nesvnic->nesdev;
+	spin_lock_irqsave(&nesnic->rq_lock, flags);
+	if (nesnic->replenishing_rq !=0) {
+		if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) &&
+				(atomic_read(&nesvnic->rx_skb_timer_running) == 0)) {
+			atomic_set(&nesvnic->rx_skb_timer_running, 1);
+			spin_unlock_irqrestore(&nesnic->rq_lock, flags);
+			nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2);	/* 1/2 second */
+			add_timer(&nesvnic->rq_wqes_timer);
+		} else
+		spin_unlock_irqrestore(&nesnic->rq_lock, flags);
+		return;
+	}
+	nesnic->replenishing_rq = 1;
+	spin_unlock_irqrestore(&nesnic->rq_lock, flags);
+	do {
+		skb = dev_alloc_skb(nesvnic->max_frame_size);
+		if (skb) {
+			skb->dev = nesvnic->netdev;
+
+			bus_address = pci_map_single(nesdev->pcidev,
+					skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+
+			nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head];
+			nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
+					cpu_to_le32(nesvnic->max_frame_size);
+			nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
+			nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] =
+					cpu_to_le32((u32)bus_address);
+			nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] =
+					cpu_to_le32((u32)((u64)bus_address >> 32));
+			nesnic->rx_skb[nesnic->rq_head] = skb;
+			nesnic->rq_head++;
+			nesnic->rq_head &= nesnic->rq_size - 1;
+			atomic_dec(&nesvnic->rx_skbs_needed);
+			barrier();
+			if (++rx_wqes_posted == 255) {
+				nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id);
+				rx_wqes_posted = 0;
+			}
+		} else {
+			spin_lock_irqsave(&nesnic->rq_lock, flags);
+			if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) &&
+					(atomic_read(&nesvnic->rx_skb_timer_running) == 0)) {
+				atomic_set(&nesvnic->rx_skb_timer_running, 1);
+				spin_unlock_irqrestore(&nesnic->rq_lock, flags);
+				nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2);	/* 1/2 second */
+				add_timer(&nesvnic->rq_wqes_timer);
+			} else
+				spin_unlock_irqrestore(&nesnic->rq_lock, flags);
+			break;
+		}
+	} while (atomic_read(&nesvnic->rx_skbs_needed));
+	barrier();
+	if (rx_wqes_posted)
+		nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id);
+	nesnic->replenishing_rq = 0;
+}
+
+
+/**
+ * nes_rq_wqes_timeout
+ */
+static void nes_rq_wqes_timeout(unsigned long parm)
+{
+	struct nes_vnic *nesvnic = (struct nes_vnic *)parm;
+	printk("%s: Timer fired.\n", __FUNCTION__);
+	atomic_set(&nesvnic->rx_skb_timer_running, 0);
+	if (atomic_read(&nesvnic->rx_skbs_needed))
+		nes_replenish_nic_rq(nesvnic);
+}
+
+
+/**
+ * nes_init_nic_qp
+ */
+int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
+{
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_hw_nic_sq_wqe *nic_sqe;
+	struct nes_hw_nic_qp_context *nic_context;
+	struct sk_buff *skb;
+	struct nes_hw_nic_rq_wqe *nic_rqe;
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	unsigned long flags;
+	void *vmem;
+	dma_addr_t pmem;
+	u64 u64temp;
+	int ret;
+	u32 cqp_head;
+	u32 counter;
+	u32 wqe_count;
+	u8 jumbomode=0;
+
+	/* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */
+	nesvnic->nic_mem_size = 256 +
+			(NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag)) +
+			(NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)) +
+			(NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)) +
+			(NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) +
+			sizeof(struct nes_hw_nic_qp_context);
+
+	nesvnic->nic_vbase = pci_alloc_consistent(nesdev->pcidev, nesvnic->nic_mem_size,
+			&nesvnic->nic_pbase);
+	if (!nesvnic->nic_vbase) {
+		nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n");
+		return -ENOMEM;
+	}
+	memset(nesvnic->nic_vbase, 0, nesvnic->nic_mem_size);
+	nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n",
+			nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size);
+
+	vmem = (void *)(((unsigned long)nesvnic->nic_vbase + (256 - 1)) &
+			~(unsigned long)(256 - 1));
+	pmem = (dma_addr_t)(((unsigned long long)nesvnic->nic_pbase + (256 - 1)) &
+			~(unsigned long long)(256 - 1));
+
+	/* Setup the first Fragment buffers */
+	nesvnic->nic.first_frag_vbase = vmem;
+
+	for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) {
+		nesvnic->nic.frag_paddr[counter] = pmem;
+		pmem += sizeof(struct nes_first_frag);
+	}
+
+	/* setup the SQ */
+	vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag));
+
+	nesvnic->nic.sq_vbase = (void *)vmem;
+	nesvnic->nic.sq_pbase = pmem;
+	nesvnic->nic.sq_head = 0;
+	nesvnic->nic.sq_tail = 0;
+	nesvnic->nic.sq_size = NES_NIC_WQ_SIZE;
+	for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) {
+		nic_sqe = &nesvnic->nic.sq_vbase[counter];
+		nic_sqe->wqe_words[NES_NIC_SQ_WQE_MISC_IDX] =
+				cpu_to_le32(NES_NIC_SQ_WQE_DISABLE_CHKSUM |
+				NES_NIC_SQ_WQE_COMPLETION);
+		nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX] =
+				cpu_to_le32((u32)NES_FIRST_FRAG_SIZE << 16);
+		nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX] =
+				cpu_to_le32((u32)nesvnic->nic.frag_paddr[counter]);
+		nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX] =
+				cpu_to_le32((u32)((u64)nesvnic->nic.frag_paddr[counter] >> 32));
+	}
+
+	nesvnic->get_cqp_request = nes_get_cqp_request;
+	nesvnic->post_cqp_request = nes_post_cqp_request;
+	nesvnic->mcrq_mcast_filter = NULL;
+
+	spin_lock_init(&nesvnic->nic.sq_lock);
+	spin_lock_init(&nesvnic->nic.rq_lock);
+
+	/* setup the RQ */
+	vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe));
+	pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe));
+
+
+	nesvnic->nic.rq_vbase = vmem;
+	nesvnic->nic.rq_pbase = pmem;
+	nesvnic->nic.rq_head = 0;
+	nesvnic->nic.rq_tail = 0;
+	nesvnic->nic.rq_size = NES_NIC_WQ_SIZE;
+
+	/* setup the CQ */
+	vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe));
+	pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe));
+
+	if (nesdev->nesadapter->netdev_count > 2)
+		nesvnic->mcrq_qp_id = nesvnic->nic_index + 32;
+	else
+		nesvnic->mcrq_qp_id = nesvnic->nic.qp_id + 4;
+
+	nesvnic->nic_cq.cq_vbase = vmem;
+	nesvnic->nic_cq.cq_pbase = pmem;
+	nesvnic->nic_cq.cq_head = 0;
+	nesvnic->nic_cq.cq_size = NES_NIC_WQ_SIZE * 2;
+
+	nesvnic->nic_cq.ce_handler = nes_nic_napi_ce_handler;
+
+	/* Send CreateCQ request to CQP */
+	spin_lock_irqsave(&nesdev->cqp.lock, flags);
+	cqp_head = nesdev->cqp.sq_head;
+
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
+			NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
+			((u32)nesvnic->nic_cq.cq_size << 16));
+	cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(
+			nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16));
+	u64temp = (u64)nesvnic->nic_cq.cq_pbase;
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =  0;
+	u64temp = (unsigned long)&nesvnic->nic_cq;
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] =  cpu_to_le32((u32)(u64temp >> 1));
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
+			cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
+	if (++cqp_head >= nesdev->cqp.sq_size)
+		cqp_head = 0;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+	/* Send CreateQP request to CQP */
+	nic_context = (void *)(&nesvnic->nic_cq.cq_vbase[nesvnic->nic_cq.cq_size]);
+	nic_context->context_words[NES_NIC_CTX_MISC_IDX] =
+			cpu_to_le32((u32)NES_NIC_CTX_SIZE |
+			((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12));
+	nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n",
+			nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE),
+			nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE));
+	if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) {
+		nic_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE);
+	}
+
+	u64temp = (u64)nesvnic->nic.sq_pbase;
+	nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+	nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
+	u64temp = (u64)nesvnic->nic.rq_pbase;
+	nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+	nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
+
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
+			NES_CQP_QP_TYPE_NIC);
+	cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesvnic->nic.qp_id);
+	u64temp = (u64)nesvnic->nic_cq.cq_pbase +
+			(nesvnic->nic_cq.cq_size * sizeof(struct nes_hw_nic_cqe));
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+
+	if (++cqp_head >= nesdev->cqp.sq_size)
+		cqp_head = 0;
+	nesdev->cqp.sq_head = cqp_head;
+
+	barrier();
+
+	/* Ring doorbell (2 WQEs) */
+	nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+
+	spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+	nes_debug(NES_DBG_INIT, "Waiting for create NIC QP%u to complete.\n",
+			nesvnic->nic.qp_id);
+
+	ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
+			NES_EVENT_TIMEOUT);
+	nes_debug(NES_DBG_INIT, "Create NIC QP%u completed, wait_event_timeout ret = %u.\n",
+			nesvnic->nic.qp_id, ret);
+	if (!ret) {
+		nes_debug(NES_DBG_INIT, "NIC QP%u create timeout expired\n", nesvnic->nic.qp_id);
+		pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase,
+				nesvnic->nic_pbase);
+		return -EIO;
+	}
+
+	/* Populate the RQ */
+	for (counter = 0; counter < (NES_NIC_WQ_SIZE - 1); counter++) {
+		skb = dev_alloc_skb(nesvnic->max_frame_size);
+		if (!skb) {
+			nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name);
+
+			nes_destroy_nic_qp(nesvnic);
+			return -ENOMEM;
+		}
+
+		skb->dev = netdev;
+
+		pmem = pci_map_single(nesdev->pcidev, skb->data,
+				nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+
+		nic_rqe = &nesvnic->nic.rq_vbase[counter];
+		nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size);
+		nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
+		nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
+		nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
+		nesvnic->nic.rx_skb[counter] = skb;
+	}
+
+	wqe_count = NES_NIC_WQ_SIZE - 1;
+	nesvnic->nic.rq_head = wqe_count;
+	barrier();
+	do {
+		counter = min(wqe_count, ((u32)255));
+		wqe_count -= counter;
+		nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id);
+	} while (wqe_count);
+	init_timer(&nesvnic->rq_wqes_timer);
+	nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout;
+	nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic;
+	nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
+
+	if (nesdev->nesadapter->et_use_adaptive_rx_coalesce)
+	{
+		nes_nic_init_timer(nesdev);
+		if (netdev->mtu > 1500)
+			jumbomode = 1;
+                nes_nic_init_timer_defaults(nesdev, jumbomode);
+	}
+
+	return 0;
+}
+
+
+/**
+ * nes_destroy_nic_qp
+ */
+void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
+{
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_hw_nic_rq_wqe *nic_rqe;
+	u64 wqe_frag;
+	u32 cqp_head;
+	unsigned long flags;
+	int ret;
+
+	/* Free remaining NIC receive buffers */
+	while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
+		nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
+		wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
+		wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
+		pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag,
+				nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+		dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]);
+		nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1);
+	}
+
+	spin_lock_irqsave(&nesdev->cqp.lock, flags);
+
+	/* Destroy NIC QP */
+	cqp_head = nesdev->cqp.sq_head;
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+		(NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC));
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+		nesvnic->nic.qp_id);
+
+	if (++cqp_head >= nesdev->cqp.sq_size)
+		cqp_head = 0;
+
+	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+
+	/* Destroy NIC CQ */
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+		(NES_CQP_DESTROY_CQ | ((u32)nesvnic->nic_cq.cq_size << 16)));
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+		(nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16)));
+
+	if (++cqp_head >= nesdev->cqp.sq_size)
+		cqp_head = 0;
+
+	nesdev->cqp.sq_head = cqp_head;
+	barrier();
+
+	/* Ring doorbell (2 WQEs) */
+	nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+
+	spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+	nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
+			" cqp.sq_tail=%u, cqp.sq_size=%u\n",
+			cqp_head, nesdev->cqp.sq_head,
+			nesdev->cqp.sq_tail, nesdev->cqp.sq_size);
+
+	ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
+			NES_EVENT_TIMEOUT);
+
+	nes_debug(NES_DBG_SHUTDOWN, "Destroy NIC QP returned, wait_event_timeout ret = %u, cqp_head=%u,"
+			" cqp.sq_head=%u, cqp.sq_tail=%u\n",
+			ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
+	if (!ret) {
+		nes_debug(NES_DBG_SHUTDOWN, "NIC QP%u destroy timeout expired\n",
+				nesvnic->nic.qp_id);
+	}
+
+	pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase,
+			nesvnic->nic_pbase);
+}
+
+/**
+ * nes_napi_isr
+ */
+int nes_napi_isr(struct nes_device *nesdev)
+{
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u32 int_stat;
+
+	if (nesdev->napi_isr_ran) {
+		/* interrupt status has already been read in ISR */
+		int_stat = nesdev->int_stat;
+	} else {
+		int_stat = nes_read32(nesdev->regs + NES_INT_STAT);
+		nesdev->int_stat = int_stat;
+		nesdev->napi_isr_ran = 1;
+	}
+
+	int_stat &= nesdev->int_req;
+	/* iff NIC, process here, else wait for DPC */
+	if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) {
+		nesdev->napi_isr_ran = 0;
+		nes_write32(nesdev->regs+NES_INT_STAT,
+				(int_stat &
+				~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)));
+
+		/* Process the CEQs */
+		nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]);
+
+		if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) &&
+					   (!nesadapter->et_use_adaptive_rx_coalesce)) ||
+					  ((nesadapter->et_use_adaptive_rx_coalesce) &&
+					   (nesdev->deepcq_count > nesadapter->et_pkt_rate_low)))) ) {
+			if ((nesdev->int_req & NES_INT_TIMER) == 0) {
+				/* Enable Periodic timer interrupts */
+				nesdev->int_req |= NES_INT_TIMER;
+				/* ack any pending periodic timer interrupts so we don't get an immediate interrupt */
+				/* TODO: need to also ack other unused periodic timer values, get from nesadapter */
+				nes_write32(nesdev->regs+NES_TIMER_STAT,
+						nesdev->timer_int_req  | ~(nesdev->nesadapter->timer_int_req));
+				nes_write32(nesdev->regs+NES_INTF_INT_MASK,
+						~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER));
+			}
+
+			if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
+			{
+				nes_nic_init_timer(nesdev);
+			}
+			/* Enable interrupts, except CEQs */
+			nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
+		} else {
+			/* Enable interrupts, make sure timer is off */
+			nesdev->int_req &= ~NES_INT_TIMER;
+			nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
+			nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+			nesadapter->tune_timer.timer_in_use_old = 0;
+		}
+		nesdev->deepcq_count = 0;
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+
+/**
+ * nes_dpc
+ */
+void nes_dpc(unsigned long param)
+{
+	struct nes_device *nesdev = (struct nes_device *)param;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u32 counter;
+	u32 loop_counter = 0;
+	u32 int_status_bit;
+	u32 int_stat;
+	u32 timer_stat;
+	u32 temp_int_stat;
+	u32 intf_int_stat;
+	u32 debug_error;
+	u32 processed_intf_int = 0;
+	u16 processed_timer_int = 0;
+	u16 completion_ints = 0;
+	u16 timer_ints = 0;
+
+	/* nes_debug(NES_DBG_ISR, "\n"); */
+
+	do {
+		timer_stat = 0;
+		if (nesdev->napi_isr_ran) {
+			nesdev->napi_isr_ran = 0;
+			int_stat = nesdev->int_stat;
+		} else
+			int_stat = nes_read32(nesdev->regs+NES_INT_STAT);
+		if (processed_intf_int != 0)
+			int_stat &= nesdev->int_req & ~NES_INT_INTF;
+		else
+			int_stat &= nesdev->int_req;
+		if (processed_timer_int == 0) {
+			processed_timer_int = 1;
+			if (int_stat & NES_INT_TIMER) {
+				timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT);
+				if ((timer_stat & nesdev->timer_int_req) == 0) {
+					int_stat &= ~NES_INT_TIMER;
+				}
+			}
+		} else {
+			int_stat &= ~NES_INT_TIMER;
+		}
+
+		if (int_stat) {
+			if (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
+					NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)) {
+				/* Ack the interrupts */
+				nes_write32(nesdev->regs+NES_INT_STAT,
+						(int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
+						NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)));
+			}
+
+			temp_int_stat = int_stat;
+			for (counter = 0, int_status_bit = 1; counter < 16; counter++) {
+				if (int_stat & int_status_bit) {
+					nes_process_ceq(nesdev, &nesadapter->ceq[counter]);
+					temp_int_stat &= ~int_status_bit;
+					completion_ints = 1;
+				}
+				if (!(temp_int_stat & 0x0000ffff))
+					break;
+				int_status_bit <<= 1;
+			}
+
+			/* Process the AEQ for this pci function */
+			int_status_bit = 1 << (16 + PCI_FUNC(nesdev->pcidev->devfn));
+			if (int_stat & int_status_bit) {
+				nes_process_aeq(nesdev, &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)]);
+			}
+
+			/* Process the MAC interrupt for this pci function */
+			int_status_bit = 1 << (24 + nesdev->mac_index);
+			if (int_stat & int_status_bit) {
+				nes_process_mac_intr(nesdev, nesdev->mac_index);
+			}
+
+			if (int_stat & NES_INT_TIMER) {
+				if (timer_stat & nesdev->timer_int_req) {
+					nes_write32(nesdev->regs + NES_TIMER_STAT,
+							(timer_stat & nesdev->timer_int_req) |
+							~(nesdev->nesadapter->timer_int_req));
+					timer_ints = 1;
+				}
+			}
+
+			if (int_stat & NES_INT_INTF) {
+				processed_intf_int = 1;
+				intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
+				intf_int_stat &= nesdev->intf_int_req;
+				if (NES_INTF_INT_CRITERR & intf_int_stat) {
+					debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS);
+					printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n",
+							(u16)debug_error);
+					nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS,
+							0x01010000 | (debug_error & 0x0000ffff));
+					/* BUG(); */
+					if (crit_err_count++ > 10)
+						nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17);
+				}
+				if (NES_INTF_INT_PCIERR & intf_int_stat) {
+					printk(KERN_ERR PFX "PCI Error reported by device!!!\n");
+					BUG();
+				}
+				if (NES_INTF_INT_AEQ_OFLOW & intf_int_stat) {
+					printk(KERN_ERR PFX "AEQ Overflow reported by device!!!\n");
+					BUG();
+				}
+				nes_write32(nesdev->regs+NES_INTF_INT_STAT, intf_int_stat);
+			}
+
+			if (int_stat & NES_INT_TSW) {
+			}
+		}
+		/* Don't use the interface interrupt bit stay in loop */
+		int_stat &= ~NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
+				NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3;
+	} while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS));
+
+	if (timer_ints == 1) {
+		if ((nesadapter->et_rx_coalesce_usecs_irq) || (nesadapter->et_use_adaptive_rx_coalesce)) {
+			if (completion_ints == 0) {
+				nesdev->timer_only_int_count++;
+				if (nesdev->timer_only_int_count>=nesadapter->timer_int_limit) {
+					nesdev->timer_only_int_count = 0;
+					nesdev->int_req &= ~NES_INT_TIMER;
+					nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
+					nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+					nesdev->nesadapter->tune_timer.timer_in_use_old = 0;
+				} else {
+					nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req));
+				}
+			} else {
+				if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
+				{
+					nes_nic_init_timer(nesdev);
+				}
+				nesdev->timer_only_int_count = 0;
+				nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req));
+			}
+		} else {
+			nesdev->timer_only_int_count = 0;
+			nesdev->int_req &= ~NES_INT_TIMER;
+			nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
+			nes_write32(nesdev->regs+NES_TIMER_STAT,
+					nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
+			nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+		}
+	} else {
+		if ( (completion_ints == 1) &&
+			 (((nesadapter->et_rx_coalesce_usecs_irq) &&
+			   (!nesadapter->et_use_adaptive_rx_coalesce)) ||
+			  ((nesdev->deepcq_count > nesadapter->et_pkt_rate_low) &&
+			   (nesadapter->et_use_adaptive_rx_coalesce) )) ) {
+			/* nes_debug(NES_DBG_ISR, "Enabling periodic timer interrupt.\n" ); */
+			nesdev->timer_only_int_count = 0;
+			nesdev->int_req |= NES_INT_TIMER;
+			nes_write32(nesdev->regs+NES_TIMER_STAT,
+					nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
+			nes_write32(nesdev->regs+NES_INTF_INT_MASK,
+					~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER));
+			nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
+		} else {
+			nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+		}
+	}
+	nesdev->deepcq_count = 0;
+}
+
+
+/**
+ * nes_process_ceq
+ */
+void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
+{
+	u64 u64temp;
+	struct nes_hw_cq *cq;
+	u32 head;
+	u32 ceq_size;
+
+	/* nes_debug(NES_DBG_CQ, "\n"); */
+	head = ceq->ceq_head;
+	ceq_size = ceq->ceq_size;
+
+	do {
+		if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) &
+				NES_CEQE_VALID) {
+			u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX])))<<32) |
+						((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX])));
+			u64temp <<= 1;
+			cq = *((struct nes_hw_cq **)&u64temp);
+			/* nes_debug(NES_DBG_CQ, "pCQ = %p\n", cq); */
+			barrier();
+			ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX] = 0;
+
+			/* call the event handler */
+			cq->ce_handler(nesdev, cq);
+
+			if (++head >= ceq_size)
+				head = 0;
+		} else {
+			break;
+		}
+
+	} while (1);
+
+	ceq->ceq_head = head;
+}
+
+
+/**
+ * nes_process_aeq
+ */
+void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
+{
+//	u64 u64temp;
+	u32 head;
+	u32 aeq_size;
+	u32 aeqe_misc;
+	u32 aeqe_cq_id;
+	struct nes_hw_aeqe volatile *aeqe;
+
+	head = aeq->aeq_head;
+	aeq_size = aeq->aeq_size;
+
+	do {
+		aeqe = &aeq->aeq_vbase[head];
+		if ((le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]) & NES_AEQE_VALID) == 0)
+			break;
+		aeqe_misc  = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+		aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
+		if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) {
+			if (aeqe_cq_id >= NES_FIRST_QPN) {
+				/* dealing with an accelerated QP related AE */
+//				u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])))<<32) |
+//					((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX])));
+				nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe);
+			} else {
+				/* TODO: dealing with a CQP related AE */
+				nes_debug(NES_DBG_AEQ, "Processing CQP related AE, misc = 0x%04X\n",
+						(u16)(aeqe_misc >> 16));
+			}
+		}
+
+		aeqe->aeqe_words[NES_AEQE_MISC_IDX] = 0;
+
+		if (++head >= aeq_size)
+			head = 0;
+	}
+	while (1);
+	aeq->aeq_head = head;
+}
+
+static void nes_reset_link(struct nes_device *nesdev, u32 mac_index)
+{
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u32 reset_value;
+	u32 i=0;
+	u32 u32temp;
+
+	if (nesadapter->hw_rev == NE020_REV) {
+		return;
+	}
+	mh_detected++;
+
+	reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
+
+	if ((mac_index == 0) || ((mac_index == 1) && (nesadapter->OneG_Mode)))
+		reset_value |= 0x0000001d;
+	else
+		reset_value |= 0x0000002d;
+
+	if (4 <= (nesadapter->link_interrupt_count[mac_index] / ((u16)NES_MAX_LINK_INTERRUPTS))) {
+		if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) {
+			nesadapter->link_interrupt_count[0] = 0;
+			nesadapter->link_interrupt_count[1] = 0;
+			u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+			if (0x00000040 & u32temp)
+				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
+			else
+				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
+
+			reset_value |= 0x0000003d;
+		}
+		nesadapter->link_interrupt_count[mac_index] = 0;
+	}
+
+	nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
+
+	while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
+			& 0x00000040) != 0x00000040) && (i++ < 5000));
+
+	if (0x0000003d == (reset_value & 0x0000003d)) {
+		u32 pcs_control_status0, pcs_control_status1;
+
+		for (i = 0; i < 10; i++) {
+			pcs_control_status0 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0);
+			pcs_control_status1 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+			if (((0x0F000000 == (pcs_control_status0 & 0x0F000000))
+			     && (pcs_control_status0 & 0x00100000))
+			    || ((0x0F000000 == (pcs_control_status1 & 0x0F000000))
+				&& (pcs_control_status1 & 0x00100000)))
+				continue;
+			else
+				break;
+		}
+		if (10 == i) {
+			u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+			if (0x00000040 & u32temp)
+				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
+			else
+				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
+
+			nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
+
+			while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET)
+				 & 0x00000040) != 0x00000040) && (i++ < 5000));
+		}
+	}
+}
+
+/**
+ * nes_process_mac_intr
+ */
+void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
+{
+	unsigned long flags;
+	u32 pcs_control_status;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_vnic *nesvnic;
+	u32 mac_status;
+	u32 mac_index = nesdev->mac_index;
+	u32 u32temp;
+	u16 phy_data;
+	u16 temp_phy_data;
+
+	spin_lock_irqsave(&nesadapter->phy_lock, flags);
+	if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) {
+		spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+		return;
+	}
+	nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_INTERRUPT;
+	spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+
+	/* ack the MAC interrupt */
+	mac_status = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200));
+	/* Clear the interrupt */
+	nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200), mac_status);
+
+	nes_debug(NES_DBG_PHY, "MAC%u interrupt status = 0x%X.\n", mac_number, mac_status);
+
+	if (mac_status & (NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT)) {
+		nesdev->link_status_interrupts++;
+		if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS))) {
+			spin_lock_irqsave(&nesadapter->phy_lock, flags);
+			nes_reset_link(nesdev, mac_index);
+			spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+		}
+		/* read the PHY interrupt status register */
+		if (nesadapter->OneG_Mode) {
+			do {
+				nes_read_1G_phy_reg(nesdev, 0x1a,
+						nesadapter->phy_index[mac_index], &phy_data);
+				nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1a = 0x%X.\n",
+						nesadapter->phy_index[mac_index], phy_data);
+			} while (phy_data&0x8000);
+
+			temp_phy_data = 0;
+			do {
+				nes_read_1G_phy_reg(nesdev, 0x11,
+						nesadapter->phy_index[mac_index], &phy_data);
+				nes_debug(NES_DBG_PHY, "Phy%d data from register 0x11 = 0x%X.\n",
+						nesadapter->phy_index[mac_index], phy_data);
+				if (temp_phy_data == phy_data)
+					break;
+				temp_phy_data = phy_data;
+			} while (1);
+
+			nes_read_1G_phy_reg(nesdev, 0x1e,
+					nesadapter->phy_index[mac_index], &phy_data);
+			nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1e = 0x%X.\n",
+					nesadapter->phy_index[mac_index], phy_data);
+
+			nes_read_1G_phy_reg(nesdev, 1,
+					nesadapter->phy_index[mac_index], &phy_data);
+			nes_debug(NES_DBG_PHY, "1G phy%u data from register 1 = 0x%X\n",
+					nesadapter->phy_index[mac_index], phy_data);
+
+			if (temp_phy_data & 0x1000) {
+				nes_debug(NES_DBG_PHY, "The Link is up according to the PHY\n");
+				phy_data = 4;
+			} else {
+				nes_debug(NES_DBG_PHY, "The Link is down according to the PHY\n");
+			}
+		}
+		nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n",
+				nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0),
+				nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200));
+		pcs_control_status = nes_read_indexed(nesdev,
+				NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200));
+		pcs_control_status = nes_read_indexed(nesdev,
+				NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200));
+		nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n",
+				mac_index, pcs_control_status);
+		if (nesadapter->OneG_Mode) {
+			u32temp = 0x01010000;
+			if (nesadapter->port_count > 2) {
+				u32temp |= 0x02020000;
+			}
+			if ((pcs_control_status & u32temp)!= u32temp) {
+				phy_data = 0;
+				nes_debug(NES_DBG_PHY, "PCS says the link is down\n");
+			}
+		} else if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) {
+			nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]);
+			temp_phy_data = (u16)nes_read_indexed(nesdev,
+								NES_IDX_MAC_MDIO_CONTROL);
+			u32temp = 20;
+			do {
+				nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]);
+				phy_data = (u16)nes_read_indexed(nesdev,
+								NES_IDX_MAC_MDIO_CONTROL);
+				if ((phy_data == temp_phy_data) || (!(--u32temp)))
+					break;
+				temp_phy_data = phy_data;
+			} while (1);
+			nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+				__FUNCTION__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
+
+		} else {
+			phy_data = (0x0f0f0000 == (pcs_control_status & 0x0f1f0000)) ? 4 : 0;
+		}
+
+		if (phy_data & 0x0004) {
+			nesadapter->mac_link_down[mac_index] = 0;
+			list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
+				nes_debug(NES_DBG_PHY, "The Link is UP!!.  linkup was %d\n",
+						nesvnic->linkup);
+				if (nesvnic->linkup == 0) {
+					printk(PFX "The Link is now up for port %u, netdev %p.\n",
+							mac_index, nesvnic->netdev);
+					if (netif_queue_stopped(nesvnic->netdev))
+						netif_start_queue(nesvnic->netdev);
+					nesvnic->linkup = 1;
+					netif_carrier_on(nesvnic->netdev);
+				}
+			}
+		} else {
+			nesadapter->mac_link_down[mac_index] = 1;
+			list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
+				nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n",
+						nesvnic->linkup);
+				if (nesvnic->linkup == 1) {
+					printk(PFX "The Link is now down for port %u, netdev %p.\n",
+							mac_index, nesvnic->netdev);
+					if (!(netif_queue_stopped(nesvnic->netdev)))
+						netif_stop_queue(nesvnic->netdev);
+					nesvnic->linkup = 0;
+					netif_carrier_off(nesvnic->netdev);
+				}
+			}
+		}
+	}
+
+	nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE;
+}
+
+
+
+void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
+{
+	struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
+
+	netif_rx_schedule(nesdev->netdev[nesvnic->netdev_index], &nesvnic->napi);
+}
+
+
+/* The MAX_RQES_TO_PROCESS defines how many max read requests to complete before
+* getting out of nic_ce_handler
+*/
+#define	MAX_RQES_TO_PROCESS	384
+
+/**
+ * nes_nic_ce_handler
+ */
+void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
+{
+	u64 u64temp;
+	dma_addr_t bus_address;
+	struct nes_hw_nic *nesnic;
+	struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_hw_nic_rq_wqe *nic_rqe;
+	struct nes_hw_nic_sq_wqe *nic_sqe;
+	struct sk_buff *skb;
+	struct sk_buff *rx_skb;
+	__le16 *wqe_fragment_length;
+	u32 head;
+	u32 cq_size;
+	u32 rx_pkt_size;
+	u32 cqe_count=0;
+	u32 cqe_errv;
+	u32 cqe_misc;
+	u16 wqe_fragment_index = 1;	/* first fragment (0) is used by copy buffer */
+	u16 vlan_tag;
+	u16 pkt_type;
+	u16 rqes_processed = 0;
+	u8 sq_cqes = 0;
+
+	head = cq->cq_head;
+	cq_size = cq->cq_size;
+	cq->cqes_pending = 1;
+	do {
+		if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) &
+				NES_NIC_CQE_VALID) {
+			nesnic = &nesvnic->nic;
+			cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]);
+			if (cqe_misc & NES_NIC_CQE_SQ) {
+				sq_cqes++;
+				wqe_fragment_index = 1;
+				nic_sqe = &nesnic->sq_vbase[nesnic->sq_tail];
+				skb = nesnic->tx_skb[nesnic->sq_tail];
+				wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
+				/* bump past the vlan tag */
+				wqe_fragment_length++;
+				if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
+					u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]);
+					u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32;
+					bus_address = (dma_addr_t)u64temp;
+					if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) {
+						pci_unmap_single(nesdev->pcidev,
+								bus_address,
+								le16_to_cpu(wqe_fragment_length[wqe_fragment_index++]),
+								PCI_DMA_TODEVICE);
+					}
+					for (; wqe_fragment_index < 5; wqe_fragment_index++) {
+						if (wqe_fragment_length[wqe_fragment_index]) {
+							u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]);
+							u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32;
+							bus_address = (dma_addr_t)u64temp;
+							pci_unmap_page(nesdev->pcidev,
+									bus_address,
+									le16_to_cpu(wqe_fragment_length[wqe_fragment_index]),
+									PCI_DMA_TODEVICE);
+						} else
+							break;
+					}
+					if (skb)
+						dev_kfree_skb_any(skb);
+				}
+				nesnic->sq_tail++;
+				nesnic->sq_tail &= nesnic->sq_size-1;
+				if (sq_cqes > 128) {
+					barrier();
+				/* restart the queue if it had been stopped */
+				if (netif_queue_stopped(nesvnic->netdev))
+					netif_wake_queue(nesvnic->netdev);
+					sq_cqes = 0;
+				}
+			} else {
+				rqes_processed ++;
+
+				cq->rx_cqes_completed++;
+				cq->rx_pkts_indicated++;
+				rx_pkt_size = cqe_misc & 0x0000ffff;
+				nic_rqe = &nesnic->rq_vbase[nesnic->rq_tail];
+				/* Get the skb */
+				rx_skb = nesnic->rx_skb[nesnic->rq_tail];
+				nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_tail];
+				bus_address = (dma_addr_t)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
+				bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
+				pci_unmap_single(nesdev->pcidev, bus_address,
+						nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+				/* rx_skb->tail = rx_skb->data + rx_pkt_size; */
+				/* rx_skb->len = rx_pkt_size; */
+				rx_skb->len = 0;  /* TODO: see if this is necessary */
+				skb_put(rx_skb, rx_pkt_size);
+				rx_skb->protocol = eth_type_trans(rx_skb, nesvnic->netdev);
+				nesnic->rq_tail++;
+				nesnic->rq_tail &= nesnic->rq_size - 1;
+
+				atomic_inc(&nesvnic->rx_skbs_needed);
+				if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) {
+					nes_write32(nesdev->regs+NES_CQE_ALLOC,
+							cq->cq_number | (cqe_count << 16));
+//					nesadapter->tune_timer.cq_count += cqe_count;
+					nesdev->currcq_count += cqe_count;
+					cqe_count = 0;
+					nes_replenish_nic_rq(nesvnic);
+				}
+				pkt_type = (u16)(le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]));
+				cqe_errv = (cqe_misc & NES_NIC_CQE_ERRV_MASK) >> NES_NIC_CQE_ERRV_SHIFT;
+				rx_skb->ip_summed = CHECKSUM_NONE;
+
+				if ((NES_PKT_TYPE_TCPV4_BITS == (pkt_type & NES_PKT_TYPE_TCPV4_MASK)) ||
+						(NES_PKT_TYPE_UDPV4_BITS == (pkt_type & NES_PKT_TYPE_UDPV4_MASK))) {
+					if ((cqe_errv &
+							(NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR |
+							NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) {
+						if (nesvnic->rx_checksum_disabled == 0) {
+							rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
+						}
+					} else
+						nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet."
+								" errv = 0x%X, pkt_type = 0x%X.\n",
+								nesvnic->netdev->name, cqe_errv, pkt_type);
+
+				} else if ((pkt_type & NES_PKT_TYPE_IPV4_MASK) == NES_PKT_TYPE_IPV4_BITS) {
+					if ((cqe_errv &
+							(NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR |
+							NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) {
+						if (nesvnic->rx_checksum_disabled == 0) {
+							rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
+							/* nes_debug(NES_DBG_CQ, "%s: Reporting successfully checksummed IPv4 packet.\n",
+								  nesvnic->netdev->name); */
+						}
+					} else
+						nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet."
+								" errv = 0x%X, pkt_type = 0x%X.\n",
+								nesvnic->netdev->name, cqe_errv, pkt_type);
+					}
+				/* nes_debug(NES_DBG_CQ, "pkt_type=%x, APBVT_MASK=%x\n",
+							pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */
+
+				if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) {
+					nes_cm_recv(rx_skb, nesvnic->netdev);
+				} else {
+					if ((cqe_misc & NES_NIC_CQE_TAG_VALID) && (nesvnic->vlan_grp != NULL)) {
+						vlan_tag = (u16)(le32_to_cpu(
+								cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
+								>> 16);
+						nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
+								nesvnic->netdev->name, vlan_tag);
+						nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
+					} else {
+						nes_netif_rx(rx_skb);
+					}
+				}
+
+				nesvnic->netdev->last_rx = jiffies;
+				/* nesvnic->netstats.rx_packets++; */
+				/* nesvnic->netstats.rx_bytes += rx_pkt_size; */
+			}
+
+			cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0;
+			/* Accounting... */
+			cqe_count++;
+			if (++head >= cq_size)
+				head = 0;
+			if (cqe_count == 255) {
+				/* Replenish Nic CQ */
+				nes_write32(nesdev->regs+NES_CQE_ALLOC,
+						cq->cq_number | (cqe_count << 16));
+//				nesdev->nesadapter->tune_timer.cq_count += cqe_count;
+				nesdev->currcq_count += cqe_count;
+				cqe_count = 0;
+			}
+
+			if (cq->rx_cqes_completed >= nesvnic->budget)
+				break;
+		} else {
+			cq->cqes_pending = 0;
+			break;
+		}
+
+	} while (1);
+
+	if (sq_cqes) {
+		barrier();
+		/* restart the queue if it had been stopped */
+		if (netif_queue_stopped(nesvnic->netdev))
+			netif_wake_queue(nesvnic->netdev);
+	}
+
+	cq->cq_head = head;
+	/* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n",
+			cq->cq_number, cqe_count, cq->cq_head); */
+	cq->cqe_allocs_pending = cqe_count;
+	if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
+	{
+//		nesdev->nesadapter->tune_timer.cq_count += cqe_count;
+		nesdev->currcq_count += cqe_count;
+		nes_nic_tune_timer(nesdev);
+	}
+	if (atomic_read(&nesvnic->rx_skbs_needed))
+		nes_replenish_nic_rq(nesvnic);
+	}
+
+
+/**
+ * nes_cqp_ce_handler
+ */
+void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
+{
+	u64 u64temp;
+	unsigned long flags;
+	struct nes_hw_cqp *cqp = NULL;
+	struct nes_cqp_request *cqp_request;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	u32 head;
+	u32 cq_size;
+	u32 cqe_count=0;
+	u32 error_code;
+	/* u32 counter; */
+
+	head = cq->cq_head;
+	cq_size = cq->cq_size;
+
+	do {
+		/* process the CQE */
+		/* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head,
+			  le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */
+
+		if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
+			u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
+					cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
+					((u64)(le32_to_cpu(cq->cq_vbase[head].
+					cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
+			cqp = *((struct nes_hw_cqp **)&u64temp);
+
+			error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]);
+			if (error_code) {
+				nes_debug(NES_DBG_CQP, "Bad Completion code for opcode 0x%02X from CQP,"
+						" Major/Minor codes = 0x%04X:%04X.\n",
+						le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f,
+						(u16)(error_code >> 16),
+						(u16)error_code);
+				nes_debug(NES_DBG_CQP, "cqp: qp_id=%u, sq_head=%u, sq_tail=%u\n",
+						cqp->qp_id, cqp->sq_head, cqp->sq_tail);
+			}
+
+			u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
+					wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX])))<<32) |
+					((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
+					wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX])));
+			cqp_request = *((struct nes_cqp_request **)&u64temp);
+			if (cqp_request) {
+				if (cqp_request->waiting) {
+					/* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */
+					cqp_request->major_code = (u16)(error_code >> 16);
+					cqp_request->minor_code = (u16)error_code;
+					barrier();
+					cqp_request->request_done = 1;
+					wake_up(&cqp_request->waitq);
+					if (atomic_dec_and_test(&cqp_request->refcount)) {
+						nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
+								cqp_request,
+								le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f);
+						if (cqp_request->dynamic) {
+							kfree(cqp_request);
+						} else {
+							spin_lock_irqsave(&nesdev->cqp.lock, flags);
+							list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+							spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+						}
+					}
+				} else if (cqp_request->callback) {
+					/* Envoke the callback routine */
+					cqp_request->cqp_callback(nesdev, cqp_request);
+					if (cqp_request->dynamic) {
+						kfree(cqp_request);
+					} else {
+						spin_lock_irqsave(&nesdev->cqp.lock, flags);
+						list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+						spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+					}
+				} else {
+					nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
+							cqp_request,
+							le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f);
+					if (cqp_request->dynamic) {
+						kfree(cqp_request);
+					} else {
+						spin_lock_irqsave(&nesdev->cqp.lock, flags);
+						list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+						spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+					}
+				}
+			} else {
+				wake_up(&nesdev->cqp.waitq);
+			}
+
+			cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
+			nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (1 << 16));
+			if (++cqp->sq_tail >= cqp->sq_size)
+				cqp->sq_tail = 0;
+
+			/* Accounting... */
+			cqe_count++;
+			if (++head >= cq_size)
+				head = 0;
+		} else {
+			break;
+		}
+	} while (1);
+	cq->cq_head = head;
+
+	spin_lock_irqsave(&nesdev->cqp.lock, flags);
+	while ((!list_empty(&nesdev->cqp_pending_reqs)) &&
+			((((nesdev->cqp.sq_tail+nesdev->cqp.sq_size)-nesdev->cqp.sq_head) &
+			(nesdev->cqp.sq_size - 1)) != 1)) {
+		cqp_request = list_entry(nesdev->cqp_pending_reqs.next,
+				struct nes_cqp_request, list);
+		list_del_init(&cqp_request->list);
+		head = nesdev->cqp.sq_head++;
+		nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+		cqp_wqe = &nesdev->cqp.sq_vbase[head];
+		memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
+		barrier();
+		cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] =
+			cpu_to_le32((u32)((unsigned long)cqp_request));
+		cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] =
+			cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request)));
+		nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n",
+				cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head);
+		/* Ring doorbell (1 WQEs) */
+		barrier();
+		nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
+	}
+	spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+
+	/* Arm the CCQ */
+	nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+			cq->cq_number);
+	nes_read32(nesdev->regs+NES_CQE_ALLOC);
+}
+
+
+/**
+ * nes_process_iwarp_aeqe
+ */
+void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe)
+{
+	u64 context;
+	u64 aeqe_context = 0;
+	unsigned long flags;
+	struct nes_qp *nesqp;
+	int resource_allocated;
+	/* struct iw_cm_id *cm_id; */
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct ib_event ibevent;
+	/* struct iw_cm_event cm_event; */
+	u32 aeq_info;
+	u32 next_iwarp_state = 0;
+	u16 async_event_id;
+	u8 tcp_state;
+	u8 iwarp_state;
+
+	nes_debug(NES_DBG_AEQ, "\n");
+	aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+	if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) {
+		context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
+		context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
+	} else {
+		aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
+		aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
+		context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
+						aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
+		BUG_ON(!context);
+	}
+
+	async_event_id = (u16)aeq_info;
+	tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
+	iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
+	nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p,"
+			" Tcp state = %s, iWARP state = %s\n",
+			async_event_id,
+			le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
+			nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
+
+
+	switch (async_event_id) {
+		case NES_AEQE_AEID_LLP_FIN_RECEIVED:
+			nesqp = *((struct nes_qp **)&context);
+			if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
+				nesqp->cm_id->add_ref(nesqp->cm_id);
+				nes_add_ref(&nesqp->ibqp);
+				schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
+						NES_TIMER_TYPE_CLOSE, 1, 0);
+				nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d),"
+						" need ae to finish up, original_last_aeq = 0x%04X."
+						" last_aeq = 0x%04X, scheduling timer. TCP state = %d\n",
+						nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+						async_event_id, nesqp->last_aeq, tcp_state);
+			}
+			if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+					(nesqp->ibqp_state != IB_QPS_RTS)) {
+				/* FIN Received but tcp state or IB state moved on,
+						should expect a	close complete */
+				return;
+			}
+		case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
+		case NES_AEQE_AEID_LLP_CONNECTION_RESET:
+		case NES_AEQE_AEID_TERMINATE_SENT:
+		case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
+		case NES_AEQE_AEID_RESET_SENT:
+			nesqp = *((struct nes_qp **)&context);
+			if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
+				tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+			}
+			nes_add_ref(&nesqp->ibqp);
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = iwarp_state;
+			nesqp->hw_tcp_state = tcp_state;
+			nesqp->last_aeq = async_event_id;
+
+			if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
+					(tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
+				nesqp->hte_added = 0;
+				spin_unlock_irqrestore(&nesqp->lock, flags);
+				nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u to remove hte\n",
+						nesqp->hwqp.qp_id);
+				nes_hw_modify_qp(nesdev, nesqp,
+						NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE, 0);
+				spin_lock_irqsave(&nesqp->lock, flags);
+			}
+
+			if ((nesqp->ibqp_state == IB_QPS_RTS) &&
+					((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+					(async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+				switch (nesqp->hw_iwarp_state) {
+					case NES_AEQE_IWARP_STATE_RTS:
+						next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
+						nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
+						break;
+					case NES_AEQE_IWARP_STATE_TERMINATE:
+						next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
+						nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
+						if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
+							next_iwarp_state |= 0x02000000;
+							nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+						}
+						break;
+					default:
+						next_iwarp_state = 0;
+				}
+				spin_unlock_irqrestore(&nesqp->lock, flags);
+				if (next_iwarp_state) {
+					nes_add_ref(&nesqp->ibqp);
+					nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
+							" also added another reference\n",
+							nesqp->hwqp.qp_id, next_iwarp_state);
+					nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
+				}
+				nes_cm_disconn(nesqp);
+			} else {
+				if (async_event_id ==  NES_AEQE_AEID_LLP_FIN_RECEIVED) {
+					/* FIN Received but ib state not RTS,
+							close complete will be on its way */
+					spin_unlock_irqrestore(&nesqp->lock, flags);
+					nes_rem_ref(&nesqp->ibqp);
+					return;
+				}
+				spin_unlock_irqrestore(&nesqp->lock, flags);
+				if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
+					next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000;
+					nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+					nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
+							" also added another reference\n",
+							nesqp->hwqp.qp_id, next_iwarp_state);
+					nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
+				}
+				nes_cm_disconn(nesqp);
+			}
+			break;
+		case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
+			nesqp = *((struct nes_qp **)&context);
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = iwarp_state;
+			nesqp->hw_tcp_state = tcp_state;
+			nesqp->last_aeq = async_event_id;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TERMINATE_RECEIVED"
+					" event on QP%u \n  Q2 Data:\n",
+					nesqp->hwqp.qp_id);
+			if (nesqp->ibqp.event_handler) {
+				ibevent.device = nesqp->ibqp.device;
+				ibevent.element.qp = &nesqp->ibqp;
+				ibevent.event = IB_EVENT_QP_FATAL;
+				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+			}
+			if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+					((nesqp->ibqp_state == IB_QPS_RTS)&&
+					(async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+				nes_add_ref(&nesqp->ibqp);
+				nes_cm_disconn(nesqp);
+			} else {
+				nesqp->in_disconnect = 0;
+				wake_up(&nesqp->kick_waitq);
+			}
+			break;
+		case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
+			nesqp = *((struct nes_qp **)&context);
+			nes_add_ref(&nesqp->ibqp);
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
+			nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+			nesqp->last_aeq = async_event_id;
+			if (nesqp->cm_id) {
+				nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
+						" event on QP%u, remote IP = 0x%08X \n",
+						nesqp->hwqp.qp_id,
+						ntohl(nesqp->cm_id->remote_addr.sin_addr.s_addr));
+			} else {
+				nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
+						" event on QP%u \n",
+						nesqp->hwqp.qp_id);
+			}
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_RESET;
+			nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
+			if (nesqp->ibqp.event_handler) {
+				ibevent.device = nesqp->ibqp.device;
+				ibevent.element.qp = &nesqp->ibqp;
+				ibevent.event = IB_EVENT_QP_FATAL;
+				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+			}
+			break;
+		case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
+			if (NES_AEQE_INBOUND_RDMA&aeq_info) {
+				nesqp = nesadapter->qp_table[le32_to_cpu(
+						aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
+			} else {
+				/* TODO: get the actual WQE and mask off wqe index */
+				context &= ~((u64)511);
+				nesqp = *((struct nes_qp **)&context);
+			}
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = iwarp_state;
+			nesqp->hw_tcp_state = tcp_state;
+			nesqp->last_aeq = async_event_id;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_BAD_STAG_INDEX event on QP%u\n",
+					nesqp->hwqp.qp_id);
+			if (nesqp->ibqp.event_handler) {
+				ibevent.device = nesqp->ibqp.device;
+				ibevent.element.qp = &nesqp->ibqp;
+				ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+			}
+			break;
+		case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
+			nesqp = *((struct nes_qp **)&context);
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = iwarp_state;
+			nesqp->hw_tcp_state = tcp_state;
+			nesqp->last_aeq = async_event_id;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_UNALLOCATED_STAG event on QP%u\n",
+					nesqp->hwqp.qp_id);
+			if (nesqp->ibqp.event_handler) {
+				ibevent.device = nesqp->ibqp.device;
+				ibevent.element.qp = &nesqp->ibqp;
+				ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+			}
+			break;
+		case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
+			nesqp = nesadapter->qp_table[le32_to_cpu(aeqe->aeqe_words
+					[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = iwarp_state;
+			nesqp->hw_tcp_state = tcp_state;
+			nesqp->last_aeq = async_event_id;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_PRIV_OPERATION_DENIED event on QP%u,"
+					" nesqp = %p, AE reported %p\n",
+					nesqp->hwqp.qp_id, nesqp, *((struct nes_qp **)&context));
+			if (nesqp->ibqp.event_handler) {
+				ibevent.device = nesqp->ibqp.device;
+				ibevent.element.qp = &nesqp->ibqp;
+				ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+			}
+			break;
+		case NES_AEQE_AEID_CQ_OPERATION_ERROR:
+			context <<= 1;
+			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n",
+					le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), (void *)(unsigned long)context);
+			resource_allocated = nes_is_resource_allocated(nesadapter, nesadapter->allocated_cqs,
+					le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
+			if (resource_allocated) {
+				printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n",
+						__FUNCTION__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
+			}
+			break;
+		case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+			nesqp = nesadapter->qp_table[le32_to_cpu(
+					aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = iwarp_state;
+			nesqp->hw_tcp_state = tcp_state;
+			nesqp->last_aeq = async_event_id;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG"
+					"_FOR_AVAILABLE_BUFFER event on QP%u\n",
+					nesqp->hwqp.qp_id);
+			if (nesqp->ibqp.event_handler) {
+				ibevent.device = nesqp->ibqp.device;
+				ibevent.element.qp = &nesqp->ibqp;
+				ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+			}
+			/* tell cm to disconnect, cm will queue work to thread */
+			nes_add_ref(&nesqp->ibqp);
+			nes_cm_disconn(nesqp);
+			break;
+		case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+			nesqp = *((struct nes_qp **)&context);
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = iwarp_state;
+			nesqp->hw_tcp_state = tcp_state;
+			nesqp->last_aeq = async_event_id;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_INVALID_MSN"
+					"_NO_BUFFER_AVAILABLE event on QP%u\n",
+					nesqp->hwqp.qp_id);
+			if (nesqp->ibqp.event_handler) {
+				ibevent.device = nesqp->ibqp.device;
+				ibevent.element.qp = &nesqp->ibqp;
+				ibevent.event = IB_EVENT_QP_FATAL;
+				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+			}
+			/* tell cm to disconnect, cm will queue work to thread */
+			nes_add_ref(&nesqp->ibqp);
+			nes_cm_disconn(nesqp);
+			break;
+		case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
+			nesqp = *((struct nes_qp **)&context);
+			spin_lock_irqsave(&nesqp->lock, flags);
+			nesqp->hw_iwarp_state = iwarp_state;
+			nesqp->hw_tcp_state = tcp_state;
+			nesqp->last_aeq = async_event_id;
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR"
+					" event on QP%u \n  Q2 Data:\n",
+					nesqp->hwqp.qp_id);
+			if (nesqp->ibqp.event_handler) {
+				ibevent.device = nesqp->ibqp.device;
+				ibevent.element.qp = &nesqp->ibqp;
+				ibevent.event = IB_EVENT_QP_FATAL;
+				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+			}
+			/* tell cm to disconnect, cm will queue work to thread */
+			nes_add_ref(&nesqp->ibqp);
+			nes_cm_disconn(nesqp);
+			break;
+			/* TODO: additional AEs need to be here */
+		default:
+			nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n",
+					async_event_id);
+			break;
+	}
+
+}
+
+
+/**
+ * nes_iwarp_ce_handler
+ */
+void nes_iwarp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *hw_cq)
+{
+	struct nes_cq *nescq = container_of(hw_cq, struct nes_cq, hw_cq);
+
+	/* nes_debug(NES_DBG_CQ, "Processing completion event for iWARP CQ%u.\n",
+			nescq->hw_cq.cq_number); */
+	nes_write32(nesdev->regs+NES_CQ_ACK, nescq->hw_cq.cq_number);
+
+	if (nescq->ibcq.comp_handler)
+		nescq->ibcq.comp_handler(&nescq->ibcq, nescq->ibcq.cq_context);
+
+	return;
+}
+
+
+/**
+ * nes_manage_apbvt()
+ */
+int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
+		u32 nic_index, u32 add_port)
+{
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	unsigned long flags;
+	struct nes_cqp_request *cqp_request;
+	int ret = 0;
+	u16 major_code;
+
+	/* Send manage APBVT request to CQP */
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
+		return -ENOMEM;
+	}
+	cqp_request->waiting = 1;
+	cqp_wqe = &cqp_request->cqp_wqe;
+
+	nes_debug(NES_DBG_QP, "%s APBV for local port=%u(0x%04x), nic_index=%u\n",
+			(add_port == NES_MANAGE_APBVT_ADD) ? "ADD" : "DEL",
+			accel_local_port, accel_local_port, nic_index);
+
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, (NES_CQP_MANAGE_APBVT |
+			((add_port == NES_MANAGE_APBVT_ADD) ? NES_CQP_APBVT_ADD : 0)));
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+			((nic_index << NES_CQP_APBVT_NIC_SHIFT) | accel_local_port));
+
+	nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n");
+
+	atomic_set(&cqp_request->refcount, 2);
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+	if (add_port == NES_MANAGE_APBVT_ADD)
+		ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+				NES_EVENT_TIMEOUT);
+	nes_debug(NES_DBG_QP, "Completed, ret=%u,  CQP Major:Minor codes = 0x%04X:0x%04X\n",
+			ret, cqp_request->major_code, cqp_request->minor_code);
+	major_code = cqp_request->major_code;
+	if (atomic_dec_and_test(&cqp_request->refcount)) {
+		if (cqp_request->dynamic) {
+			kfree(cqp_request);
+		} else {
+			spin_lock_irqsave(&nesdev->cqp.lock, flags);
+			list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+			spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+		}
+	}
+	if (!ret)
+		return -ETIME;
+	else if (major_code)
+		return -EIO;
+	else
+		return 0;
+}
+
+
+/**
+ * nes_manage_arp_cache
+ */
+void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
+		u32 ip_addr, u32 action)
+{
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev;
+	struct nes_cqp_request *cqp_request;
+	int arp_index;
+
+	nesdev = nesvnic->nesdev;
+	arp_index = nes_arp_table(nesdev, ip_addr, mac_addr, action);
+	if (arp_index == -1) {
+		return;
+	}
+
+	/* update the ARP entry */
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_NETDEV, "Failed to get a cqp_request.\n");
+		return;
+	}
+	cqp_request->waiting = 0;
+	cqp_wqe = &cqp_request->cqp_wqe;
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
+			NES_CQP_MANAGE_ARP_CACHE | NES_CQP_ARP_PERM);
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(
+			(u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_CQP_ARP_AEQ_INDEX_SHIFT);
+	cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(arp_index);
+
+	if (action == NES_ARP_ADD) {
+		cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID);
+		cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32(
+				(((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) |
+				(((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]);
+		cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32(
+				(((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]);
+	} else {
+		cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0;
+		cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0;
+	}
+
+	nes_debug(NES_DBG_NETDEV, "Not waiting for CQP, cqp.sq_head=%u, cqp.sq_tail=%u\n",
+			nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
+
+	atomic_set(&cqp_request->refcount, 1);
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+}
+
+
+/**
+ * flush_wqes
+ */
+void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
+		u32 which_wq, u32 wait_completion)
+{
+	unsigned long flags;
+	struct nes_cqp_request *cqp_request;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	int ret;
+
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
+		return;
+	}
+	if (wait_completion) {
+		cqp_request->waiting = 1;
+		atomic_set(&cqp_request->refcount, 2);
+	} else {
+		cqp_request->waiting = 0;
+	}
+	cqp_wqe = &cqp_request->cqp_wqe;
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
+			cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
+	cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
+
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+	if (wait_completion) {
+		/* Wait for CQP */
+		ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+				NES_EVENT_TIMEOUT);
+		nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u,"
+				" CQP Major:Minor codes = 0x%04X:0x%04X\n",
+				ret, cqp_request->major_code, cqp_request->minor_code);
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+	}
+}
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
new file mode 100644
index 0000000..1e10df5
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -0,0 +1,1206 @@
+/*
+* Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenIB.org BSD license below:
+*
+*     Redistribution and use in source and binary forms, with or
+*     without modification, are permitted provided that the following
+*     conditions are met:
+*
+*      - Redistributions of source code must retain the above
+*        copyright notice, this list of conditions and the following
+*        disclaimer.
+*
+*      - Redistributions in binary form must reproduce the above
+*        copyright notice, this list of conditions and the following
+*        disclaimer in the documentation and/or other materials
+*        provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*/
+
+#ifndef __NES_HW_H
+#define __NES_HW_H
+
+#define NES_PHY_TYPE_1G   2
+#define NES_PHY_TYPE_IRIS 3
+#define NES_PHY_TYPE_PUMA_10G  6
+
+#define NES_MULTICAST_PF_MAX 8
+
+enum pci_regs {
+	NES_INT_STAT = 0x0000,
+	NES_INT_MASK = 0x0004,
+	NES_INT_PENDING = 0x0008,
+	NES_INTF_INT_STAT = 0x000C,
+	NES_INTF_INT_MASK = 0x0010,
+	NES_TIMER_STAT = 0x0014,
+	NES_PERIODIC_CONTROL = 0x0018,
+	NES_ONE_SHOT_CONTROL = 0x001C,
+	NES_EEPROM_COMMAND = 0x0020,
+	NES_EEPROM_DATA = 0x0024,
+	NES_FLASH_COMMAND = 0x0028,
+	NES_FLASH_DATA  = 0x002C,
+	NES_SOFTWARE_RESET = 0x0030,
+	NES_CQ_ACK = 0x0034,
+	NES_WQE_ALLOC = 0x0040,
+	NES_CQE_ALLOC = 0x0044,
+};
+
+enum indexed_regs {
+	NES_IDX_CREATE_CQP_LOW = 0x0000,
+	NES_IDX_CREATE_CQP_HIGH = 0x0004,
+	NES_IDX_QP_CONTROL = 0x0040,
+	NES_IDX_FLM_CONTROL = 0x0080,
+	NES_IDX_INT_CPU_STATUS = 0x00a0,
+	NES_IDX_GPIO_CONTROL = 0x00f0,
+	NES_IDX_GPIO_DATA = 0x00f4,
+	NES_IDX_TCP_CONFIG0 = 0x01e4,
+	NES_IDX_TCP_TIMER_CONFIG = 0x01ec,
+	NES_IDX_TCP_NOW = 0x01f0,
+	NES_IDX_QP_MAX_CFG_SIZES = 0x0200,
+	NES_IDX_QP_CTX_SIZE = 0x0218,
+	NES_IDX_TCP_TIMER_SIZE0 = 0x0238,
+	NES_IDX_TCP_TIMER_SIZE1 = 0x0240,
+	NES_IDX_ARP_CACHE_SIZE = 0x0258,
+	NES_IDX_CQ_CTX_SIZE = 0x0260,
+	NES_IDX_MRT_SIZE = 0x0278,
+	NES_IDX_PBL_REGION_SIZE = 0x0280,
+	NES_IDX_IRRQ_COUNT = 0x02b0,
+	NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x02f0,
+	NES_IDX_RX_WINDOW_BUFFER_SIZE = 0x0300,
+	NES_IDX_DST_IP_ADDR = 0x0400,
+	NES_IDX_PCIX_DIAG = 0x08e8,
+	NES_IDX_MPP_DEBUG = 0x0a00,
+	NES_IDX_PORT_RX_DISCARDS = 0x0a30,
+	NES_IDX_PORT_TX_DISCARDS = 0x0a34,
+	NES_IDX_MPP_LB_DEBUG = 0x0b00,
+	NES_IDX_DENALI_CTL_22 = 0x1058,
+	NES_IDX_MAC_TX_CONTROL = 0x2000,
+	NES_IDX_MAC_TX_CONFIG = 0x2004,
+	NES_IDX_MAC_TX_PAUSE_QUANTA = 0x2008,
+	NES_IDX_MAC_RX_CONTROL = 0x200c,
+	NES_IDX_MAC_RX_CONFIG = 0x2010,
+	NES_IDX_MAC_EXACT_MATCH_BOTTOM = 0x201c,
+	NES_IDX_MAC_MDIO_CONTROL = 0x2084,
+	NES_IDX_MAC_TX_OCTETS_LOW = 0x2100,
+	NES_IDX_MAC_TX_OCTETS_HIGH = 0x2104,
+	NES_IDX_MAC_TX_FRAMES_LOW = 0x2108,
+	NES_IDX_MAC_TX_FRAMES_HIGH = 0x210c,
+	NES_IDX_MAC_TX_PAUSE_FRAMES = 0x2118,
+	NES_IDX_MAC_TX_ERRORS = 0x2138,
+	NES_IDX_MAC_RX_OCTETS_LOW = 0x213c,
+	NES_IDX_MAC_RX_OCTETS_HIGH = 0x2140,
+	NES_IDX_MAC_RX_FRAMES_LOW = 0x2144,
+	NES_IDX_MAC_RX_FRAMES_HIGH = 0x2148,
+	NES_IDX_MAC_RX_BC_FRAMES_LOW = 0x214c,
+	NES_IDX_MAC_RX_MC_FRAMES_HIGH = 0x2150,
+	NES_IDX_MAC_RX_PAUSE_FRAMES = 0x2154,
+	NES_IDX_MAC_RX_SHORT_FRAMES = 0x2174,
+	NES_IDX_MAC_RX_OVERSIZED_FRAMES = 0x2178,
+	NES_IDX_MAC_RX_JABBER_FRAMES = 0x217c,
+	NES_IDX_MAC_RX_CRC_ERR_FRAMES = 0x2180,
+	NES_IDX_MAC_RX_LENGTH_ERR_FRAMES = 0x2184,
+	NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES = 0x2188,
+	NES_IDX_MAC_INT_STATUS = 0x21f0,
+	NES_IDX_MAC_INT_MASK = 0x21f4,
+	NES_IDX_PHY_PCS_CONTROL_STATUS0 = 0x2800,
+	NES_IDX_PHY_PCS_CONTROL_STATUS1 = 0x2a00,
+	NES_IDX_ETH_SERDES_COMMON_CONTROL0 = 0x2808,
+	NES_IDX_ETH_SERDES_COMMON_CONTROL1 = 0x2a08,
+	NES_IDX_ETH_SERDES_COMMON_STATUS0 = 0x280c,
+	NES_IDX_ETH_SERDES_COMMON_STATUS1 = 0x2a0c,
+	NES_IDX_ETH_SERDES_TX_EMP0 = 0x2810,
+	NES_IDX_ETH_SERDES_TX_EMP1 = 0x2a10,
+	NES_IDX_ETH_SERDES_TX_DRIVE0 = 0x2814,
+	NES_IDX_ETH_SERDES_TX_DRIVE1 = 0x2a14,
+	NES_IDX_ETH_SERDES_RX_MODE0 = 0x2818,
+	NES_IDX_ETH_SERDES_RX_MODE1 = 0x2a18,
+	NES_IDX_ETH_SERDES_RX_SIGDET0 = 0x281c,
+	NES_IDX_ETH_SERDES_RX_SIGDET1 = 0x2a1c,
+	NES_IDX_ETH_SERDES_BYPASS0 = 0x2820,
+	NES_IDX_ETH_SERDES_BYPASS1 = 0x2a20,
+	NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0 = 0x2824,
+	NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1 = 0x2a24,
+	NES_IDX_ETH_SERDES_RX_EQ_CONTROL0 = 0x2828,
+	NES_IDX_ETH_SERDES_RX_EQ_CONTROL1 = 0x2a28,
+	NES_IDX_ETH_SERDES_RX_EQ_STATUS0 = 0x282c,
+	NES_IDX_ETH_SERDES_RX_EQ_STATUS1 = 0x2a2c,
+	NES_IDX_ETH_SERDES_CDR_RESET0 = 0x2830,
+	NES_IDX_ETH_SERDES_CDR_RESET1 = 0x2a30,
+	NES_IDX_ETH_SERDES_CDR_CONTROL0 = 0x2834,
+	NES_IDX_ETH_SERDES_CDR_CONTROL1 = 0x2a34,
+	NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0 = 0x2838,
+	NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1 = 0x2a38,
+	NES_IDX_ENDNODE0_NSTAT_RX_DISCARD = 0x3080,
+	NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO = 0x3000,
+	NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI = 0x3004,
+	NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO = 0x3008,
+	NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI = 0x300c,
+	NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO = 0x7000,
+	NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004,
+	NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008,
+	NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c,
+	NES_IDX_CM_CONFIG = 0x5100,
+	NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000,
+	NES_IDX_NIC_PHYPORT_TO_USW = 0x6008,
+	NES_IDX_NIC_ACTIVE = 0x6010,
+	NES_IDX_NIC_UNICAST_ALL = 0x6018,
+	NES_IDX_NIC_MULTICAST_ALL = 0x6020,
+	NES_IDX_NIC_MULTICAST_ENABLE = 0x6028,
+	NES_IDX_NIC_BROADCAST_ON = 0x6030,
+	NES_IDX_USED_CHUNKS_TX = 0x60b0,
+	NES_IDX_TX_POOL_SIZE = 0x60b8,
+	NES_IDX_QUAD_HASH_TABLE_SIZE = 0x6148,
+	NES_IDX_PERFECT_FILTER_LOW = 0x6200,
+	NES_IDX_PERFECT_FILTER_HIGH = 0x6204,
+	NES_IDX_IPV4_TCP_REXMITS = 0x7080,
+	NES_IDX_DEBUG_ERROR_CONTROL_STATUS = 0x913c,
+	NES_IDX_DEBUG_ERROR_MASKS0 = 0x9140,
+	NES_IDX_DEBUG_ERROR_MASKS1 = 0x9144,
+	NES_IDX_DEBUG_ERROR_MASKS2 = 0x9148,
+	NES_IDX_DEBUG_ERROR_MASKS3 = 0x914c,
+	NES_IDX_DEBUG_ERROR_MASKS4 = 0x9150,
+	NES_IDX_DEBUG_ERROR_MASKS5 = 0x9154,
+};
+
+#define NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE   1
+#define NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE (1 << 17)
+
+enum nes_cqp_opcodes {
+	NES_CQP_CREATE_QP = 0x00,
+	NES_CQP_MODIFY_QP = 0x01,
+	NES_CQP_DESTROY_QP = 0x02,
+	NES_CQP_CREATE_CQ = 0x03,
+	NES_CQP_MODIFY_CQ = 0x04,
+	NES_CQP_DESTROY_CQ = 0x05,
+	NES_CQP_ALLOCATE_STAG = 0x09,
+	NES_CQP_REGISTER_STAG = 0x0a,
+	NES_CQP_QUERY_STAG = 0x0b,
+	NES_CQP_REGISTER_SHARED_STAG = 0x0c,
+	NES_CQP_DEALLOCATE_STAG = 0x0d,
+	NES_CQP_MANAGE_ARP_CACHE = 0x0f,
+	NES_CQP_SUSPEND_QPS = 0x11,
+	NES_CQP_UPLOAD_CONTEXT = 0x13,
+	NES_CQP_CREATE_CEQ = 0x16,
+	NES_CQP_DESTROY_CEQ = 0x18,
+	NES_CQP_CREATE_AEQ = 0x19,
+	NES_CQP_DESTROY_AEQ = 0x1b,
+	NES_CQP_LMI_ACCESS = 0x20,
+	NES_CQP_FLUSH_WQES = 0x22,
+	NES_CQP_MANAGE_APBVT = 0x23
+};
+
+enum nes_cqp_wqe_word_idx {
+	NES_CQP_WQE_OPCODE_IDX = 0,
+	NES_CQP_WQE_ID_IDX = 1,
+	NES_CQP_WQE_COMP_CTX_LOW_IDX = 2,
+	NES_CQP_WQE_COMP_CTX_HIGH_IDX = 3,
+	NES_CQP_WQE_COMP_SCRATCH_LOW_IDX = 4,
+	NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5,
+};
+
+enum nes_cqp_cq_wqeword_idx {
+	NES_CQP_CQ_WQE_PBL_LOW_IDX = 6,
+	NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7,
+	NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX = 8,
+	NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX = 9,
+	NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX = 10,
+};
+
+enum nes_cqp_stag_wqeword_idx {
+	NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX = 1,
+	NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX = 6,
+	NES_CQP_STAG_WQE_LEN_LOW_IDX = 7,
+	NES_CQP_STAG_WQE_STAG_IDX = 8,
+	NES_CQP_STAG_WQE_VA_LOW_IDX = 10,
+	NES_CQP_STAG_WQE_VA_HIGH_IDX = 11,
+	NES_CQP_STAG_WQE_PA_LOW_IDX = 12,
+	NES_CQP_STAG_WQE_PA_HIGH_IDX = 13,
+	NES_CQP_STAG_WQE_PBL_LEN_IDX = 14
+};
+
+#define NES_CQP_OP_IWARP_STATE_SHIFT 28
+
+enum nes_cqp_qp_bits {
+	NES_CQP_QP_ARP_VALID = (1<<8),
+	NES_CQP_QP_WINBUF_VALID = (1<<9),
+	NES_CQP_QP_CONTEXT_VALID = (1<<10),
+	NES_CQP_QP_ORD_VALID = (1<<11),
+	NES_CQP_QP_WINBUF_DATAIND_EN = (1<<12),
+	NES_CQP_QP_VIRT_WQS = (1<<13),
+	NES_CQP_QP_DEL_HTE = (1<<14),
+	NES_CQP_QP_CQS_VALID = (1<<15),
+	NES_CQP_QP_TYPE_TSA = 0,
+	NES_CQP_QP_TYPE_IWARP = (1<<16),
+	NES_CQP_QP_TYPE_CQP = (4<<16),
+	NES_CQP_QP_TYPE_NIC = (5<<16),
+	NES_CQP_QP_MSS_CHG = (1<<20),
+	NES_CQP_QP_STATIC_RESOURCES = (1<<21),
+	NES_CQP_QP_IGNORE_MW_BOUND = (1<<22),
+	NES_CQP_QP_VWQ_USE_LMI = (1<<23),
+	NES_CQP_QP_IWARP_STATE_IDLE = (1<<NES_CQP_OP_IWARP_STATE_SHIFT),
+	NES_CQP_QP_IWARP_STATE_RTS = (2<<NES_CQP_OP_IWARP_STATE_SHIFT),
+	NES_CQP_QP_IWARP_STATE_CLOSING = (3<<NES_CQP_OP_IWARP_STATE_SHIFT),
+	NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT),
+	NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT),
+	NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT),
+	NES_CQP_QP_RESET = (1<<31),
+};
+
+enum nes_cqp_qp_wqe_word_idx {
+	NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6,
+	NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7,
+	NES_CQP_QP_WQE_NEW_MSS_IDX = 15,
+};
+
+enum nes_nic_ctx_bits {
+	NES_NIC_CTX_RQ_SIZE_32 = (3<<8),
+	NES_NIC_CTX_RQ_SIZE_512 = (3<<8),
+	NES_NIC_CTX_SQ_SIZE_32 = (1<<10),
+	NES_NIC_CTX_SQ_SIZE_512 = (3<<10),
+};
+
+enum nes_nic_qp_ctx_word_idx {
+	NES_NIC_CTX_MISC_IDX = 0,
+	NES_NIC_CTX_SQ_LOW_IDX = 2,
+	NES_NIC_CTX_SQ_HIGH_IDX = 3,
+	NES_NIC_CTX_RQ_LOW_IDX = 4,
+	NES_NIC_CTX_RQ_HIGH_IDX = 5,
+};
+
+enum nes_cqp_cq_bits {
+	NES_CQP_CQ_CEQE_MASK = (1<<9),
+	NES_CQP_CQ_CEQ_VALID = (1<<10),
+	NES_CQP_CQ_RESIZE = (1<<11),
+	NES_CQP_CQ_CHK_OVERFLOW = (1<<12),
+	NES_CQP_CQ_4KB_CHUNK = (1<<14),
+	NES_CQP_CQ_VIRT = (1<<15),
+};
+
+enum nes_cqp_stag_bits {
+	NES_CQP_STAG_VA_TO = (1<<9),
+	NES_CQP_STAG_DEALLOC_PBLS = (1<<10),
+	NES_CQP_STAG_PBL_BLK_SIZE = (1<<11),
+	NES_CQP_STAG_MR = (1<<13),
+	NES_CQP_STAG_RIGHTS_LOCAL_READ = (1<<16),
+	NES_CQP_STAG_RIGHTS_LOCAL_WRITE = (1<<17),
+	NES_CQP_STAG_RIGHTS_REMOTE_READ = (1<<18),
+	NES_CQP_STAG_RIGHTS_REMOTE_WRITE = (1<<19),
+	NES_CQP_STAG_RIGHTS_WINDOW_BIND = (1<<20),
+	NES_CQP_STAG_REM_ACC_EN = (1<<21),
+	NES_CQP_STAG_LEAVE_PENDING = (1<<31),
+};
+
+enum nes_cqp_ceq_wqeword_idx {
+	NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX = 1,
+	NES_CQP_CEQ_WQE_PBL_LOW_IDX = 6,
+	NES_CQP_CEQ_WQE_PBL_HIGH_IDX = 7,
+};
+
+enum nes_cqp_ceq_bits {
+	NES_CQP_CEQ_4KB_CHUNK = (1<<14),
+	NES_CQP_CEQ_VIRT = (1<<15),
+};
+
+enum nes_cqp_aeq_wqeword_idx {
+	NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX = 1,
+	NES_CQP_AEQ_WQE_PBL_LOW_IDX = 6,
+	NES_CQP_AEQ_WQE_PBL_HIGH_IDX = 7,
+};
+
+enum nes_cqp_aeq_bits {
+	NES_CQP_AEQ_4KB_CHUNK = (1<<14),
+	NES_CQP_AEQ_VIRT = (1<<15),
+};
+
+enum nes_cqp_lmi_wqeword_idx {
+	NES_CQP_LMI_WQE_LMI_OFFSET_IDX = 1,
+	NES_CQP_LMI_WQE_FRAG_LOW_IDX = 8,
+	NES_CQP_LMI_WQE_FRAG_HIGH_IDX = 9,
+	NES_CQP_LMI_WQE_FRAG_LEN_IDX = 10,
+};
+
+enum nes_cqp_arp_wqeword_idx {
+	NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX = 6,
+	NES_CQP_ARP_WQE_MAC_HIGH_IDX = 7,
+	NES_CQP_ARP_WQE_REACHABILITY_MAX_IDX = 1,
+};
+
+enum nes_cqp_upload_wqeword_idx {
+	NES_CQP_UPLOAD_WQE_CTXT_LOW_IDX = 6,
+	NES_CQP_UPLOAD_WQE_CTXT_HIGH_IDX = 7,
+	NES_CQP_UPLOAD_WQE_HTE_IDX = 8,
+};
+
+enum nes_cqp_arp_bits {
+	NES_CQP_ARP_VALID = (1<<8),
+	NES_CQP_ARP_PERM = (1<<9),
+};
+
+enum nes_cqp_flush_bits {
+	NES_CQP_FLUSH_SQ = (1<<30),
+	NES_CQP_FLUSH_RQ = (1<<31),
+};
+
+enum nes_cqe_opcode_bits {
+	NES_CQE_STAG_VALID = (1<<6),
+	NES_CQE_ERROR = (1<<7),
+	NES_CQE_SQ = (1<<8),
+	NES_CQE_SE = (1<<9),
+	NES_CQE_PSH = (1<<29),
+	NES_CQE_FIN = (1<<30),
+	NES_CQE_VALID = (1<<31),
+};
+
+
+enum nes_cqe_word_idx {
+	NES_CQE_PAYLOAD_LENGTH_IDX = 0,
+	NES_CQE_COMP_COMP_CTX_LOW_IDX = 2,
+	NES_CQE_COMP_COMP_CTX_HIGH_IDX = 3,
+	NES_CQE_INV_STAG_IDX = 4,
+	NES_CQE_QP_ID_IDX = 5,
+	NES_CQE_ERROR_CODE_IDX = 6,
+	NES_CQE_OPCODE_IDX = 7,
+};
+
+enum nes_ceqe_word_idx {
+	NES_CEQE_CQ_CTX_LOW_IDX = 0,
+	NES_CEQE_CQ_CTX_HIGH_IDX = 1,
+};
+
+enum nes_ceqe_status_bit {
+	NES_CEQE_VALID = (1<<31),
+};
+
+enum nes_int_bits {
+	NES_INT_CEQ0 = (1<<0),
+	NES_INT_CEQ1 = (1<<1),
+	NES_INT_CEQ2 = (1<<2),
+	NES_INT_CEQ3 = (1<<3),
+	NES_INT_CEQ4 = (1<<4),
+	NES_INT_CEQ5 = (1<<5),
+	NES_INT_CEQ6 = (1<<6),
+	NES_INT_CEQ7 = (1<<7),
+	NES_INT_CEQ8 = (1<<8),
+	NES_INT_CEQ9 = (1<<9),
+	NES_INT_CEQ10 = (1<<10),
+	NES_INT_CEQ11 = (1<<11),
+	NES_INT_CEQ12 = (1<<12),
+	NES_INT_CEQ13 = (1<<13),
+	NES_INT_CEQ14 = (1<<14),
+	NES_INT_CEQ15 = (1<<15),
+	NES_INT_AEQ0 = (1<<16),
+	NES_INT_AEQ1 = (1<<17),
+	NES_INT_AEQ2 = (1<<18),
+	NES_INT_AEQ3 = (1<<19),
+	NES_INT_AEQ4 = (1<<20),
+	NES_INT_AEQ5 = (1<<21),
+	NES_INT_AEQ6 = (1<<22),
+	NES_INT_AEQ7 = (1<<23),
+	NES_INT_MAC0 = (1<<24),
+	NES_INT_MAC1 = (1<<25),
+	NES_INT_MAC2 = (1<<26),
+	NES_INT_MAC3 = (1<<27),
+	NES_INT_TSW = (1<<28),
+	NES_INT_TIMER = (1<<29),
+	NES_INT_INTF = (1<<30),
+};
+
+enum nes_intf_int_bits {
+	NES_INTF_INT_PCIERR = (1<<0),
+	NES_INTF_PERIODIC_TIMER = (1<<2),
+	NES_INTF_ONE_SHOT_TIMER = (1<<3),
+	NES_INTF_INT_CRITERR = (1<<14),
+	NES_INTF_INT_AEQ0_OFLOW = (1<<16),
+	NES_INTF_INT_AEQ1_OFLOW = (1<<17),
+	NES_INTF_INT_AEQ2_OFLOW = (1<<18),
+	NES_INTF_INT_AEQ3_OFLOW = (1<<19),
+	NES_INTF_INT_AEQ4_OFLOW = (1<<20),
+	NES_INTF_INT_AEQ5_OFLOW = (1<<21),
+	NES_INTF_INT_AEQ6_OFLOW = (1<<22),
+	NES_INTF_INT_AEQ7_OFLOW = (1<<23),
+	NES_INTF_INT_AEQ_OFLOW = (0xff<<16),
+};
+
+enum nes_mac_int_bits {
+	NES_MAC_INT_LINK_STAT_CHG = (1<<1),
+	NES_MAC_INT_XGMII_EXT = (1<<2),
+	NES_MAC_INT_TX_UNDERFLOW = (1<<6),
+	NES_MAC_INT_TX_ERROR = (1<<7),
+};
+
+enum nes_cqe_allocate_bits {
+	NES_CQE_ALLOC_INC_SELECT = (1<<28),
+	NES_CQE_ALLOC_NOTIFY_NEXT = (1<<29),
+	NES_CQE_ALLOC_NOTIFY_SE = (1<<30),
+	NES_CQE_ALLOC_RESET = (1<<31),
+};
+
+enum nes_nic_rq_wqe_word_idx {
+	NES_NIC_RQ_WQE_LENGTH_1_0_IDX = 0,
+	NES_NIC_RQ_WQE_LENGTH_3_2_IDX = 1,
+	NES_NIC_RQ_WQE_FRAG0_LOW_IDX = 2,
+	NES_NIC_RQ_WQE_FRAG0_HIGH_IDX = 3,
+	NES_NIC_RQ_WQE_FRAG1_LOW_IDX = 4,
+	NES_NIC_RQ_WQE_FRAG1_HIGH_IDX = 5,
+	NES_NIC_RQ_WQE_FRAG2_LOW_IDX = 6,
+	NES_NIC_RQ_WQE_FRAG2_HIGH_IDX = 7,
+	NES_NIC_RQ_WQE_FRAG3_LOW_IDX = 8,
+	NES_NIC_RQ_WQE_FRAG3_HIGH_IDX = 9,
+};
+
+enum nes_nic_sq_wqe_word_idx {
+	NES_NIC_SQ_WQE_MISC_IDX = 0,
+	NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX = 1,
+	NES_NIC_SQ_WQE_LSO_INFO_IDX = 2,
+	NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX = 3,
+	NES_NIC_SQ_WQE_LENGTH_2_1_IDX = 4,
+	NES_NIC_SQ_WQE_LENGTH_4_3_IDX = 5,
+	NES_NIC_SQ_WQE_FRAG0_LOW_IDX = 6,
+	NES_NIC_SQ_WQE_FRAG0_HIGH_IDX = 7,
+	NES_NIC_SQ_WQE_FRAG1_LOW_IDX = 8,
+	NES_NIC_SQ_WQE_FRAG1_HIGH_IDX = 9,
+	NES_NIC_SQ_WQE_FRAG2_LOW_IDX = 10,
+	NES_NIC_SQ_WQE_FRAG2_HIGH_IDX = 11,
+	NES_NIC_SQ_WQE_FRAG3_LOW_IDX = 12,
+	NES_NIC_SQ_WQE_FRAG3_HIGH_IDX = 13,
+	NES_NIC_SQ_WQE_FRAG4_LOW_IDX = 14,
+	NES_NIC_SQ_WQE_FRAG4_HIGH_IDX = 15,
+};
+
+enum nes_iwarp_sq_wqe_word_idx {
+	NES_IWARP_SQ_WQE_MISC_IDX = 0,
+	NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX = 1,
+	NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX = 2,
+	NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX = 3,
+	NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX = 4,
+	NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX = 5,
+	NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX = 7,
+	NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX = 8,
+	NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX = 9,
+	NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX = 10,
+	NES_IWARP_SQ_WQE_RDMA_STAG_IDX = 11,
+	NES_IWARP_SQ_WQE_IMM_DATA_START_IDX = 12,
+	NES_IWARP_SQ_WQE_FRAG0_LOW_IDX = 16,
+	NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX = 17,
+	NES_IWARP_SQ_WQE_LENGTH0_IDX = 18,
+	NES_IWARP_SQ_WQE_STAG0_IDX = 19,
+	NES_IWARP_SQ_WQE_FRAG1_LOW_IDX = 20,
+	NES_IWARP_SQ_WQE_FRAG1_HIGH_IDX = 21,
+	NES_IWARP_SQ_WQE_LENGTH1_IDX = 22,
+	NES_IWARP_SQ_WQE_STAG1_IDX = 23,
+	NES_IWARP_SQ_WQE_FRAG2_LOW_IDX = 24,
+	NES_IWARP_SQ_WQE_FRAG2_HIGH_IDX = 25,
+	NES_IWARP_SQ_WQE_LENGTH2_IDX = 26,
+	NES_IWARP_SQ_WQE_STAG2_IDX = 27,
+	NES_IWARP_SQ_WQE_FRAG3_LOW_IDX = 28,
+	NES_IWARP_SQ_WQE_FRAG3_HIGH_IDX = 29,
+	NES_IWARP_SQ_WQE_LENGTH3_IDX = 30,
+	NES_IWARP_SQ_WQE_STAG3_IDX = 31,
+};
+
+enum nes_iwarp_sq_bind_wqe_word_idx {
+	NES_IWARP_SQ_BIND_WQE_MR_IDX = 6,
+	NES_IWARP_SQ_BIND_WQE_MW_IDX = 7,
+	NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX = 8,
+	NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX = 9,
+	NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX = 10,
+	NES_IWARP_SQ_BIND_WQE_VA_FBO_HIGH_IDX = 11,
+};
+
+enum nes_iwarp_sq_fmr_wqe_word_idx {
+	NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX = 7,
+	NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX = 8,
+	NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX = 9,
+	NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX = 10,
+	NES_IWARP_SQ_FMR_WQE_VA_FBO_HIGH_IDX = 11,
+	NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX = 12,
+	NES_IWARP_SQ_FMR_WQE_PBL_ADDR_HIGH_IDX = 13,
+	NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
+};
+
+enum nes_iwarp_sq_locinv_wqe_word_idx {
+	NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
+};
+
+
+enum nes_iwarp_rq_wqe_word_idx {
+	NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
+	NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
+	NES_IWARP_RQ_WQE_COMP_CTX_HIGH_IDX = 3,
+	NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX = 4,
+	NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX = 5,
+	NES_IWARP_RQ_WQE_FRAG0_LOW_IDX = 8,
+	NES_IWARP_RQ_WQE_FRAG0_HIGH_IDX = 9,
+	NES_IWARP_RQ_WQE_LENGTH0_IDX = 10,
+	NES_IWARP_RQ_WQE_STAG0_IDX = 11,
+	NES_IWARP_RQ_WQE_FRAG1_LOW_IDX = 12,
+	NES_IWARP_RQ_WQE_FRAG1_HIGH_IDX = 13,
+	NES_IWARP_RQ_WQE_LENGTH1_IDX = 14,
+	NES_IWARP_RQ_WQE_STAG1_IDX = 15,
+	NES_IWARP_RQ_WQE_FRAG2_LOW_IDX = 16,
+	NES_IWARP_RQ_WQE_FRAG2_HIGH_IDX = 17,
+	NES_IWARP_RQ_WQE_LENGTH2_IDX = 18,
+	NES_IWARP_RQ_WQE_STAG2_IDX = 19,
+	NES_IWARP_RQ_WQE_FRAG3_LOW_IDX = 20,
+	NES_IWARP_RQ_WQE_FRAG3_HIGH_IDX = 21,
+	NES_IWARP_RQ_WQE_LENGTH3_IDX = 22,
+	NES_IWARP_RQ_WQE_STAG3_IDX = 23,
+};
+
+enum nes_nic_sq_wqe_bits {
+	NES_NIC_SQ_WQE_PHDR_CS_READY =  (1<<21),
+	NES_NIC_SQ_WQE_LSO_ENABLE = (1<<22),
+	NES_NIC_SQ_WQE_TAGVALUE_ENABLE = (1<<23),
+	NES_NIC_SQ_WQE_DISABLE_CHKSUM = (1<<30),
+	NES_NIC_SQ_WQE_COMPLETION = (1<<31),
+};
+
+enum nes_nic_cqe_word_idx {
+	NES_NIC_CQE_ACCQP_ID_IDX = 0,
+	NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2,
+	NES_NIC_CQE_MISC_IDX = 3,
+};
+
+#define NES_PKT_TYPE_APBVT_BITS 0xC112
+#define NES_PKT_TYPE_APBVT_MASK 0xff3e
+
+#define NES_PKT_TYPE_PVALID_BITS 0x10000000
+#define NES_PKT_TYPE_PVALID_MASK 0x30000000
+
+#define NES_PKT_TYPE_TCPV4_BITS 0x0110
+#define NES_PKT_TYPE_TCPV4_MASK 0x3f30
+
+#define NES_PKT_TYPE_UDPV4_BITS 0x0210
+#define NES_PKT_TYPE_UDPV4_MASK 0x3f30
+
+#define NES_PKT_TYPE_IPV4_BITS  0x0010
+#define NES_PKT_TYPE_IPV4_MASK  0x3f30
+
+#define NES_PKT_TYPE_OTHER_BITS 0x0000
+#define NES_PKT_TYPE_OTHER_MASK 0x0030
+
+#define NES_NIC_CQE_ERRV_SHIFT 16
+enum nes_nic_ev_bits {
+	NES_NIC_ERRV_BITS_MODE = (1<<0),
+	NES_NIC_ERRV_BITS_IPV4_CSUM_ERR = (1<<1),
+	NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR = (1<<2),
+	NES_NIC_ERRV_BITS_WQE_OVERRUN = (1<<3),
+	NES_NIC_ERRV_BITS_IPH_ERR = (1<<4),
+};
+
+enum nes_nic_cqe_bits {
+	NES_NIC_CQE_ERRV_MASK = (0xff<<NES_NIC_CQE_ERRV_SHIFT),
+	NES_NIC_CQE_SQ = (1<<24),
+	NES_NIC_CQE_ACCQP_PORT = (1<<28),
+	NES_NIC_CQE_ACCQP_VALID = (1<<29),
+	NES_NIC_CQE_TAG_VALID = (1<<30),
+	NES_NIC_CQE_VALID = (1<<31),
+};
+
+enum nes_aeqe_word_idx {
+	NES_AEQE_COMP_CTXT_LOW_IDX = 0,
+	NES_AEQE_COMP_CTXT_HIGH_IDX = 1,
+	NES_AEQE_COMP_QP_CQ_ID_IDX = 2,
+	NES_AEQE_MISC_IDX = 3,
+};
+
+enum nes_aeqe_bits {
+	NES_AEQE_QP = (1<<16),
+	NES_AEQE_CQ = (1<<17),
+	NES_AEQE_SQ = (1<<18),
+	NES_AEQE_INBOUND_RDMA = (1<<19),
+	NES_AEQE_IWARP_STATE_MASK = (7<<20),
+	NES_AEQE_TCP_STATE_MASK = (0xf<<24),
+	NES_AEQE_VALID = (1<<31),
+};
+
+#define NES_AEQE_IWARP_STATE_SHIFT	20
+#define NES_AEQE_TCP_STATE_SHIFT	24
+
+enum nes_aeqe_iwarp_state {
+	NES_AEQE_IWARP_STATE_NON_EXISTANT = 0,
+	NES_AEQE_IWARP_STATE_IDLE = 1,
+	NES_AEQE_IWARP_STATE_RTS = 2,
+	NES_AEQE_IWARP_STATE_CLOSING = 3,
+	NES_AEQE_IWARP_STATE_TERMINATE = 5,
+	NES_AEQE_IWARP_STATE_ERROR = 6
+};
+
+enum nes_aeqe_tcp_state {
+	NES_AEQE_TCP_STATE_NON_EXISTANT = 0,
+	NES_AEQE_TCP_STATE_CLOSED = 1,
+	NES_AEQE_TCP_STATE_LISTEN = 2,
+	NES_AEQE_TCP_STATE_SYN_SENT = 3,
+	NES_AEQE_TCP_STATE_SYN_RCVD = 4,
+	NES_AEQE_TCP_STATE_ESTABLISHED = 5,
+	NES_AEQE_TCP_STATE_CLOSE_WAIT = 6,
+	NES_AEQE_TCP_STATE_FIN_WAIT_1 = 7,
+	NES_AEQE_TCP_STATE_CLOSING = 8,
+	NES_AEQE_TCP_STATE_LAST_ACK = 9,
+	NES_AEQE_TCP_STATE_FIN_WAIT_2 = 10,
+	NES_AEQE_TCP_STATE_TIME_WAIT = 11
+};
+
+enum nes_aeqe_aeid {
+	NES_AEQE_AEID_AMP_UNALLOCATED_STAG                            = 0x0102,
+	NES_AEQE_AEID_AMP_INVALID_STAG                                = 0x0103,
+	NES_AEQE_AEID_AMP_BAD_QP                                      = 0x0104,
+	NES_AEQE_AEID_AMP_BAD_PD                                      = 0x0105,
+	NES_AEQE_AEID_AMP_BAD_STAG_KEY                                = 0x0106,
+	NES_AEQE_AEID_AMP_BAD_STAG_INDEX                              = 0x0107,
+	NES_AEQE_AEID_AMP_BOUNDS_VIOLATION                            = 0x0108,
+	NES_AEQE_AEID_AMP_RIGHTS_VIOLATION                            = 0x0109,
+	NES_AEQE_AEID_AMP_TO_WRAP                                     = 0x010a,
+	NES_AEQE_AEID_AMP_FASTREG_SHARED                              = 0x010b,
+	NES_AEQE_AEID_AMP_FASTREG_VALID_STAG                          = 0x010c,
+	NES_AEQE_AEID_AMP_FASTREG_MW_STAG                             = 0x010d,
+	NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS                      = 0x010e,
+	NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW                  = 0x010f,
+	NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH                      = 0x0110,
+	NES_AEQE_AEID_AMP_INVALIDATE_SHARED                           = 0x0111,
+	NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS          = 0x0112,
+	NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS            = 0x0113,
+	NES_AEQE_AEID_AMP_MWBIND_VALID_STAG                           = 0x0114,
+	NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG                           = 0x0115,
+	NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG                   = 0x0116,
+	NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG                           = 0x0117,
+	NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS                       = 0x0118,
+	NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS                       = 0x0119,
+	NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT                    = 0x011a,
+	NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED                        = 0x011b,
+	NES_AEQE_AEID_BAD_CLOSE                                       = 0x0201,
+	NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE                         = 0x0202,
+	NES_AEQE_AEID_CQ_OPERATION_ERROR                              = 0x0203,
+	NES_AEQE_AEID_PRIV_OPERATION_DENIED                           = 0x0204,
+	NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO                        = 0x0205,
+	NES_AEQE_AEID_STAG_ZERO_INVALID                               = 0x0206,
+	NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN                      = 0x0301,
+	NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID              = 0x0302,
+	NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER = 0x0303,
+	NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION                     = 0x0304,
+	NES_AEQE_AEID_DDP_UBE_INVALID_MO                              = 0x0305,
+	NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE         = 0x0306,
+	NES_AEQE_AEID_DDP_UBE_INVALID_QN                              = 0x0307,
+	NES_AEQE_AEID_DDP_NO_L_BIT                                    = 0x0308,
+	NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION                 = 0x0311,
+	NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE                     = 0x0312,
+	NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST                   = 0x0313,
+	NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP             = 0x0314,
+	NES_AEQE_AEID_INVALID_ARP_ENTRY                               = 0x0401,
+	NES_AEQE_AEID_INVALID_TCP_OPTION_RCVD                         = 0x0402,
+	NES_AEQE_AEID_STALE_ARP_ENTRY                                 = 0x0403,
+	NES_AEQE_AEID_LLP_CLOSE_COMPLETE                              = 0x0501,
+	NES_AEQE_AEID_LLP_CONNECTION_RESET                            = 0x0502,
+	NES_AEQE_AEID_LLP_FIN_RECEIVED                                = 0x0503,
+	NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH =  0x0504,
+	NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR                      = 0x0505,
+	NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE                           = 0x0506,
+	NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL                           = 0x0507,
+	NES_AEQE_AEID_LLP_SYN_RECEIVED                                = 0x0508,
+	NES_AEQE_AEID_LLP_TERMINATE_RECEIVED                          = 0x0509,
+	NES_AEQE_AEID_LLP_TOO_MANY_RETRIES                            = 0x050a,
+	NES_AEQE_AEID_LLP_TOO_MANY_KEEPALIVE_RETRIES                  = 0x050b,
+	NES_AEQE_AEID_RESET_SENT                                      = 0x0601,
+	NES_AEQE_AEID_TERMINATE_SENT                                  = 0x0602,
+	NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC                      = 0x0700
+};
+
+enum nes_iwarp_sq_opcodes {
+	NES_IWARP_SQ_WQE_WRPDU = (1<<15),
+	NES_IWARP_SQ_WQE_PSH = (1<<21),
+	NES_IWARP_SQ_WQE_STREAMING = (1<<23),
+	NES_IWARP_SQ_WQE_IMM_DATA = (1<<28),
+	NES_IWARP_SQ_WQE_READ_FENCE = (1<<29),
+	NES_IWARP_SQ_WQE_LOCAL_FENCE = (1<<30),
+	NES_IWARP_SQ_WQE_SIGNALED_COMPL = (1<<31),
+};
+
+enum nes_iwarp_sq_wqe_bits {
+	NES_IWARP_SQ_OP_RDMAW = 0,
+	NES_IWARP_SQ_OP_RDMAR = 1,
+	NES_IWARP_SQ_OP_SEND = 3,
+	NES_IWARP_SQ_OP_SENDINV = 4,
+	NES_IWARP_SQ_OP_SENDSE = 5,
+	NES_IWARP_SQ_OP_SENDSEINV = 6,
+	NES_IWARP_SQ_OP_BIND = 8,
+	NES_IWARP_SQ_OP_FAST_REG = 9,
+	NES_IWARP_SQ_OP_LOCINV = 10,
+	NES_IWARP_SQ_OP_RDMAR_LOCINV = 11,
+	NES_IWARP_SQ_OP_NOP = 12,
+};
+
+#define NES_EEPROM_READ_REQUEST (1<<16)
+#define NES_MAC_ADDR_VALID      (1<<20)
+
+/*
+ * NES index registers init values.
+ */
+struct nes_init_values {
+	u32 index;
+	u32 data;
+	u8  wrt;
+};
+
+/*
+ * NES registers in BAR0.
+ */
+struct nes_pci_regs {
+	u32 int_status;
+	u32 int_mask;
+	u32 int_pending;
+	u32 intf_int_status;
+	u32 intf_int_mask;
+	u32 other_regs[59];	 /* pad out to 256 bytes for now */
+};
+
+#define NES_CQP_SQ_SIZE    128
+#define NES_CCQ_SIZE       128
+#define NES_NIC_WQ_SIZE    512
+#define NES_NIC_CTX_SIZE   ((NES_NIC_CTX_RQ_SIZE_512) | (NES_NIC_CTX_SQ_SIZE_512))
+#define NES_NIC_BACK_STORE 0x00038000
+
+struct nes_device;
+
+struct nes_hw_nic_qp_context {
+	__le32 context_words[6];
+};
+
+struct nes_hw_nic_sq_wqe {
+	__le32 wqe_words[16];
+};
+
+struct nes_hw_nic_rq_wqe {
+	__le32 wqe_words[16];
+};
+
+struct nes_hw_nic_cqe {
+	__le32 cqe_words[4];
+};
+
+struct nes_hw_cqp_qp_context {
+	__le32 context_words[4];
+};
+
+struct nes_hw_cqp_wqe {
+	__le32 wqe_words[16];
+};
+
+struct nes_hw_qp_wqe {
+	__le32 wqe_words[32];
+};
+
+struct nes_hw_cqe {
+	__le32 cqe_words[8];
+};
+
+struct nes_hw_ceqe {
+	__le32 ceqe_words[2];
+};
+
+struct nes_hw_aeqe {
+	__le32 aeqe_words[4];
+};
+
+struct nes_cqp_request {
+	union {
+		u64 cqp_callback_context;
+		void *cqp_callback_pointer;
+	};
+	wait_queue_head_t     waitq;
+	struct nes_hw_cqp_wqe cqp_wqe;
+	struct list_head      list;
+	atomic_t              refcount;
+	void (*cqp_callback)(struct nes_device *nesdev, struct nes_cqp_request *cqp_request);
+	u16                   major_code;
+	u16                   minor_code;
+	u8                    waiting;
+	u8                    request_done;
+	u8                    dynamic;
+	u8                    callback;
+};
+
+struct nes_hw_cqp {
+	struct nes_hw_cqp_wqe *sq_vbase;
+	dma_addr_t            sq_pbase;
+	spinlock_t            lock;
+	wait_queue_head_t     waitq;
+	u16                   qp_id;
+	u16                   sq_head;
+	u16                   sq_tail;
+	u16                   sq_size;
+};
+
+#define NES_FIRST_FRAG_SIZE 128
+struct nes_first_frag {
+	u8 buffer[NES_FIRST_FRAG_SIZE];
+};
+
+struct nes_hw_nic {
+	struct nes_first_frag    *first_frag_vbase;	/* virtual address of first frags */
+	struct nes_hw_nic_sq_wqe *sq_vbase;			/* virtual address of sq */
+	struct nes_hw_nic_rq_wqe *rq_vbase;			/* virtual address of rq */
+	struct sk_buff           *tx_skb[NES_NIC_WQ_SIZE];
+	struct sk_buff           *rx_skb[NES_NIC_WQ_SIZE];
+	dma_addr_t frag_paddr[NES_NIC_WQ_SIZE];
+	unsigned long first_frag_overflow[BITS_TO_LONGS(NES_NIC_WQ_SIZE)];
+	dma_addr_t sq_pbase;			/* PCI memory for host rings */
+	dma_addr_t rq_pbase;			/* PCI memory for host rings */
+
+	u16 qp_id;
+	u16 sq_head;
+	u16 sq_tail;
+	u16 sq_size;
+	u16 rq_head;
+	u16 rq_tail;
+	u16 rq_size;
+	u8 replenishing_rq;
+	u8 reserved;
+
+	spinlock_t sq_lock;
+	spinlock_t rq_lock;
+};
+
+struct nes_hw_nic_cq {
+	struct nes_hw_nic_cqe volatile *cq_vbase;	/* PCI memory for host rings */
+	void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_nic_cq *cq);
+	dma_addr_t cq_pbase;	/* PCI memory for host rings */
+	int rx_cqes_completed;
+	int cqe_allocs_pending;
+	int rx_pkts_indicated;
+	u16 cq_head;
+	u16 cq_size;
+	u16 cq_number;
+	u8  cqes_pending;
+};
+
+struct nes_hw_qp {
+	struct nes_hw_qp_wqe *sq_vbase;		/* PCI memory for host rings */
+	struct nes_hw_qp_wqe *rq_vbase;		/* PCI memory for host rings */
+	void                 *q2_vbase;			/* PCI memory for host rings */
+	dma_addr_t sq_pbase;	/* PCI memory for host rings */
+	dma_addr_t rq_pbase;	/* PCI memory for host rings */
+	dma_addr_t q2_pbase;	/* PCI memory for host rings */
+	u32 qp_id;
+	u16 sq_head;
+	u16 sq_tail;
+	u16 sq_size;
+	u16 rq_head;
+	u16 rq_tail;
+	u16 rq_size;
+	u8  rq_encoded_size;
+	u8  sq_encoded_size;
+};
+
+struct nes_hw_cq {
+	struct nes_hw_cqe volatile *cq_vbase;	/* PCI memory for host rings */
+	void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_cq *cq);
+	dma_addr_t cq_pbase;	/* PCI memory for host rings */
+	u16 cq_head;
+	u16 cq_size;
+	u16 cq_number;
+};
+
+struct nes_hw_ceq {
+	struct nes_hw_ceqe volatile *ceq_vbase;	/* PCI memory for host rings */
+	dma_addr_t ceq_pbase;	/* PCI memory for host rings */
+	u16 ceq_head;
+	u16 ceq_size;
+};
+
+struct nes_hw_aeq {
+	struct nes_hw_aeqe volatile *aeq_vbase;	/* PCI memory for host rings */
+	dma_addr_t aeq_pbase;	/* PCI memory for host rings */
+	u16 aeq_head;
+	u16 aeq_size;
+};
+
+struct nic_qp_map {
+	u8 qpid;
+	u8 nic_index;
+	u8 logical_port;
+	u8 is_hnic;
+};
+
+#define	NES_CQP_ARP_AEQ_INDEX_MASK  0x000f0000
+#define	NES_CQP_ARP_AEQ_INDEX_SHIFT 16
+
+#define NES_CQP_APBVT_ADD			0x00008000
+#define NES_CQP_APBVT_NIC_SHIFT		16
+
+#define NES_ARP_ADD     1
+#define NES_ARP_DELETE  2
+#define NES_ARP_RESOLVE 3
+
+#define NES_MAC_SW_IDLE      0
+#define NES_MAC_SW_INTERRUPT 1
+#define NES_MAC_SW_MH        2
+
+struct nes_arp_entry {
+	u32 ip_addr;
+	u8  mac_addr[ETH_ALEN];
+};
+
+#define NES_NIC_FAST_TIMER          96
+#define NES_NIC_FAST_TIMER_LOW      40
+#define NES_NIC_FAST_TIMER_HIGH     1000
+#define DEFAULT_NES_QL_HIGH         256
+#define DEFAULT_NES_QL_LOW          16
+#define DEFAULT_NES_QL_TARGET       64
+#define DEFAULT_JUMBO_NES_QL_LOW    12
+#define DEFAULT_JUMBO_NES_QL_TARGET 40
+#define DEFAULT_JUMBO_NES_QL_HIGH   128
+#define NES_NIC_CQ_DOWNWARD_TREND   8
+
+struct nes_hw_tune_timer {
+    //u16 cq_count;
+    u16 threshold_low;
+    u16 threshold_target;
+    u16 threshold_high;
+    u16 timer_in_use;
+    u16 timer_in_use_old;
+    u16 timer_in_use_min;
+    u16 timer_in_use_max;
+    u8  timer_direction_upward;
+    u8  timer_direction_downward;
+    u16 cq_count_old;
+    u8  cq_direction_downward;
+};
+
+#define NES_TIMER_INT_LIMIT         2
+#define NES_TIMER_INT_LIMIT_DYNAMIC 10
+#define NES_TIMER_ENABLE_LIMIT      4
+#define NES_MAX_LINK_INTERRUPTS		128
+#define NES_MAX_LINK_CHECK		200
+
+struct nes_adapter {
+	u64              fw_ver;
+	unsigned long    *allocated_qps;
+	unsigned long    *allocated_cqs;
+	unsigned long    *allocated_mrs;
+	unsigned long    *allocated_pds;
+	unsigned long    *allocated_arps;
+	struct nes_qp    **qp_table;
+	struct workqueue_struct *work_q;
+
+	struct list_head list;
+	struct list_head active_listeners;
+	/* list of the netdev's associated with each logical port */
+	struct list_head nesvnic_list[4];
+
+	struct timer_list  mh_timer;
+	struct timer_list  lc_timer;
+	struct work_struct work;
+	spinlock_t         resource_lock;
+	spinlock_t         phy_lock;
+	spinlock_t         pbl_lock;
+	spinlock_t         periodic_timer_lock;
+
+	struct nes_arp_entry arp_table[NES_MAX_ARP_TABLE_SIZE];
+
+	/* Adapter CEQ and AEQs */
+	struct nes_hw_ceq ceq[16];
+	struct nes_hw_aeq aeq[8];
+
+	struct nes_hw_tune_timer tune_timer;
+
+	unsigned long doorbell_start;
+
+	u32 hw_rev;
+	u32 vendor_id;
+	u32 vendor_part_id;
+	u32 device_cap_flags;
+	u32 tick_delta;
+	u32 timer_int_req;
+	u32 arp_table_size;
+	u32 next_arp_index;
+
+	u32 max_mr;
+	u32 max_256pbl;
+	u32 max_4kpbl;
+	u32 free_256pbl;
+	u32 free_4kpbl;
+	u32 max_mr_size;
+	u32 max_qp;
+	u32 next_qp;
+	u32 max_irrq;
+	u32 max_qp_wr;
+	u32 max_sge;
+	u32 max_cq;
+	u32 next_cq;
+	u32 max_cqe;
+	u32 max_pd;
+	u32 base_pd;
+	u32 next_pd;
+	u32 hte_index_mask;
+
+	/* EEPROM information */
+	u32 rx_pool_size;
+	u32 tx_pool_size;
+	u32 rx_threshold;
+	u32 tcp_timer_core_clk_divisor;
+	u32 iwarp_config;
+	u32 cm_config;
+	u32 sws_timer_config;
+	u32 tcp_config1;
+	u32 wqm_wat;
+	u32 core_clock;
+	u32 firmware_version;
+
+	u32 nic_rx_eth_route_err;
+
+	u32 et_rx_coalesce_usecs;
+	u32	et_rx_max_coalesced_frames;
+	u32 et_rx_coalesce_usecs_irq;
+	u32 et_rx_max_coalesced_frames_irq;
+	u32 et_pkt_rate_low;
+	u32 et_rx_coalesce_usecs_low;
+	u32 et_rx_max_coalesced_frames_low;
+	u32 et_pkt_rate_high;
+	u32 et_rx_coalesce_usecs_high;
+	u32 et_rx_max_coalesced_frames_high;
+	u32 et_rate_sample_interval;
+	u32 timer_int_limit;
+
+	/* Adapter base MAC address */
+	u32 mac_addr_low;
+	u16 mac_addr_high;
+
+	u16 firmware_eeprom_offset;
+	u16 software_eeprom_offset;
+
+	u16 max_irrq_wr;
+
+	/* pd config for each port */
+	u16 pd_config_size[4];
+	u16 pd_config_base[4];
+
+	u16 link_interrupt_count[4];
+
+	/* the phy index for each port */
+	u8  phy_index[4];
+	u8  mac_sw_state[4];
+	u8  mac_link_down[4];
+	u8  phy_type[4];
+
+	/* PCI information */
+	unsigned int  devfn;
+	unsigned char bus_number;
+	unsigned char OneG_Mode;
+
+	unsigned char ref_count;
+	u8            netdev_count;
+	u8            netdev_max;	/* from host nic address count in EEPROM */
+	u8            port_count;
+	u8            virtwq;
+	u8            et_use_adaptive_rx_coalesce;
+	u8            adapter_fcn_count;
+};
+
+struct nes_pbl {
+	u64              *pbl_vbase;
+	dma_addr_t       pbl_pbase;
+	struct page      *page;
+	unsigned long    user_base;
+	u32              pbl_size;
+	struct list_head list;
+	/* TODO: need to add list for two level tables */
+};
+
+struct nes_listener {
+	struct work_struct      work;
+	struct workqueue_struct *wq;
+	struct nes_vnic         *nesvnic;
+	struct iw_cm_id         *cm_id;
+	struct list_head        list;
+	unsigned long           socket;
+	u8                      accept_failed;
+};
+
+struct nes_ib_device;
+
+struct nes_vnic {
+	struct nes_ib_device *nesibdev;
+	u64 sq_full;
+	u64 sq_locked;
+	u64 tso_requests;
+	u64 segmented_tso_requests;
+	u64 linearized_skbs;
+	u64 tx_sw_dropped;
+	u64 endnode_nstat_rx_discard;
+	u64 endnode_nstat_rx_octets;
+	u64 endnode_nstat_rx_frames;
+	u64 endnode_nstat_tx_octets;
+	u64 endnode_nstat_tx_frames;
+	u64 endnode_ipv4_tcp_retransmits;
+	/* void *mem; */
+	struct nes_device *nesdev;
+	struct net_device *netdev;
+	struct vlan_group *vlan_grp;
+	atomic_t          rx_skbs_needed;
+	atomic_t          rx_skb_timer_running;
+	int               budget;
+	u32               msg_enable;
+	/* u32 tx_avail; */
+	__be32            local_ipaddr;
+	struct napi_struct   napi;
+	spinlock_t           tx_lock;	/* could use netdev tx lock? */
+	struct timer_list    rq_wqes_timer;
+	u32                  nic_mem_size;
+	void                 *nic_vbase;
+	dma_addr_t           nic_pbase;
+	struct nes_hw_nic    nic;
+	struct nes_hw_nic_cq nic_cq;
+	u32    mcrq_qp_id;
+	struct nes_ucontext *mcrq_ucontext;
+	struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev);
+	void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *, int);
+	int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr );
+	struct net_device_stats netstats;
+	/* used to put the netdev on the adapters logical port list */
+	struct list_head list;
+	u16 max_frame_size;
+	u8  netdev_open;
+	u8  linkup;
+	u8  logical_port;
+	u8  netdev_index;  /* might not be needed, indexes nesdev->netdev */
+	u8  perfect_filter_index;
+	u8  nic_index;
+	u8  qp_nic_index[4];
+	u8  next_qp_nic_index;
+	u8  of_device_registered;
+	u8  rdma_enabled;
+	u8  rx_checksum_disabled;
+};
+
+struct nes_ib_device {
+	struct ib_device ibdev;
+	struct nes_vnic *nesvnic;
+
+	/* Virtual RNIC Limits */
+	u32 max_mr;
+	u32 max_qp;
+	u32 max_cq;
+	u32 max_pd;
+	u32 num_mr;
+	u32 num_qp;
+	u32 num_cq;
+	u32 num_pd;
+};
+
+#define nes_vlan_rx vlan_hwaccel_receive_skb
+#define nes_netif_rx netif_receive_skb
+
+#endif		/* __NES_HW_H */
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
new file mode 100644
index 0000000..b6cc265
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -0,0 +1,1703 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/ethtool.h>
+#include <net/tcp.h>
+
+#include <net/inet_common.h>
+#include <linux/inet.h>
+
+#include "nes.h"
+
+static struct nic_qp_map nic_qp_mapping_0[] = {
+	{16,0,0,1},{24,4,0,0},{28,8,0,0},{32,12,0,0},
+	{20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0},
+	{18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0},
+	{22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_1[] = {
+	{18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0},
+	{22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_2[] = {
+	{20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_3[] = {
+	{22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_4[] = {
+	{28,8,0,0},{32,12,0,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_5[] = {
+	{29,9,1,0},{33,13,1,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_6[] = {
+	{30,10,2,0},{34,14,2,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_7[] = {
+	{31,11,3,0},{35,15,3,0}
+};
+
+static struct nic_qp_map *nic_qp_mapping_per_function[] = {
+	nic_qp_mapping_0, nic_qp_mapping_1, nic_qp_mapping_2, nic_qp_mapping_3,
+	nic_qp_mapping_4, nic_qp_mapping_5, nic_qp_mapping_6, nic_qp_mapping_7
+};
+
+static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
+		| NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
+static int debug = -1;
+
+
+static int nes_netdev_open(struct net_device *);
+static int nes_netdev_stop(struct net_device *);
+static int nes_netdev_start_xmit(struct sk_buff *, struct net_device *);
+static struct net_device_stats *nes_netdev_get_stats(struct net_device *);
+static void nes_netdev_tx_timeout(struct net_device *);
+static int nes_netdev_set_mac_address(struct net_device *, void *);
+static int nes_netdev_change_mtu(struct net_device *, int);
+
+/**
+ * nes_netdev_poll
+ */
+static int nes_netdev_poll(struct napi_struct *napi, int budget)
+{
+	struct nes_vnic *nesvnic = container_of(napi, struct nes_vnic, napi);
+	struct net_device *netdev = nesvnic->netdev;
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_hw_nic_cq *nescq = &nesvnic->nic_cq;
+
+	nesvnic->budget = budget;
+	nescq->cqes_pending = 0;
+	nescq->rx_cqes_completed = 0;
+	nescq->cqe_allocs_pending = 0;
+	nescq->rx_pkts_indicated = 0;
+
+	nes_nic_ce_handler(nesdev, nescq);
+
+	if (nescq->cqes_pending == 0) {
+		netif_rx_complete(netdev, napi);
+		/* clear out completed cqes and arm */
+		nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+				nescq->cq_number | (nescq->cqe_allocs_pending << 16));
+		nes_read32(nesdev->regs+NES_CQE_ALLOC);
+	} else {
+		/* clear out completed cqes but don't arm */
+		nes_write32(nesdev->regs+NES_CQE_ALLOC,
+				nescq->cq_number | (nescq->cqe_allocs_pending << 16));
+		nes_debug(NES_DBG_NETDEV, "%s: exiting with work pending\n",
+				nesvnic->netdev->name);
+	}
+	return nescq->rx_pkts_indicated;
+}
+
+
+/**
+ * nes_netdev_open - Activate the network interface; ifconfig
+ * ethx up.
+ */
+static int nes_netdev_open(struct net_device *netdev)
+{
+	u32 macaddr_low;
+	u16 macaddr_high;
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	int ret;
+	int i;
+	struct nes_vnic *first_nesvnic;
+	u32 nic_active_bit;
+	u32 nic_active;
+
+	assert(nesdev != NULL);
+
+	first_nesvnic = list_entry(nesdev->nesadapter->nesvnic_list[nesdev->mac_index].next,
+			struct nes_vnic, list);
+
+	if (netif_msg_ifup(nesvnic))
+		printk(KERN_INFO PFX "%s: enabling interface\n", netdev->name);
+
+	ret = nes_init_nic_qp(nesdev, netdev);
+	if (ret) {
+		return ret;
+	}
+
+	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
+
+	if ((!nesvnic->of_device_registered) && (nesvnic->rdma_enabled)) {
+		nesvnic->nesibdev = nes_init_ofa_device(netdev);
+		if (nesvnic->nesibdev == NULL) {
+			printk(KERN_ERR PFX "%s: nesvnic->nesibdev alloc failed", netdev->name);
+		} else {
+			nesvnic->nesibdev->nesvnic = nesvnic;
+			ret = nes_register_ofa_device(nesvnic->nesibdev);
+			if (ret) {
+				printk(KERN_ERR PFX "%s: Unable to register RDMA device, ret = %d\n",
+						netdev->name, ret);
+			}
+		}
+	}
+	/* Set packet filters */
+	nic_active_bit = 1 << nesvnic->nic_index;
+	nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE);
+	nic_active |= nic_active_bit;
+	nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active);
+	nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE);
+	nic_active |= nic_active_bit;
+	nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active);
+	nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON);
+	nic_active |= nic_active_bit;
+	nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
+
+	macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
+	macaddr_high += (u16)netdev->dev_addr[1];
+	macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
+	macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
+	macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
+	macaddr_low += (u32)netdev->dev_addr[5];
+
+	/* Program the various MAC regs */
+	for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
+		if (nesvnic->qp_nic_index[i] == 0xf) {
+			break;
+		}
+		nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW"
+				" (Addr:%08X) = %08X, HIGH = %08X.\n",
+				i, nesvnic->qp_nic_index[i],
+				NES_IDX_PERFECT_FILTER_LOW+((nesvnic->perfect_filter_index + i) * 8),
+				macaddr_low,
+				(u32)macaddr_high | NES_MAC_ADDR_VALID |
+				((((u32)nesvnic->nic_index) << 16)));
+		nes_write_indexed(nesdev,
+				NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8),
+				macaddr_low);
+		nes_write_indexed(nesdev,
+				NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8),
+				(u32)macaddr_high | NES_MAC_ADDR_VALID |
+				((((u32)nesvnic->nic_index) << 16)));
+	}
+
+
+	nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+			nesvnic->nic_cq.cq_number);
+	nes_read32(nesdev->regs+NES_CQE_ALLOC);
+
+	if (first_nesvnic->linkup) {
+		/* Enable network packets */
+		nesvnic->linkup = 1;
+		netif_start_queue(netdev);
+		netif_carrier_on(netdev);
+	}
+	napi_enable(&nesvnic->napi);
+	nesvnic->netdev_open = 1;
+
+	return 0;
+}
+
+
+/**
+ * nes_netdev_stop
+ */
+static int nes_netdev_stop(struct net_device *netdev)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	u32 nic_active_mask;
+	u32 nic_active;
+
+	nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n",
+			nesvnic, nesdev, netdev, netdev->name);
+	if (nesvnic->netdev_open == 0)
+		return 0;
+
+	if (netif_msg_ifdown(nesvnic))
+		printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name);
+
+	/* Disable network packets */
+	napi_disable(&nesvnic->napi);
+	netif_stop_queue(netdev);
+	if ((nesdev->netdev[0] == netdev) & (nesvnic->logical_port == nesdev->mac_index)) {
+		nes_write_indexed(nesdev,
+				NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff);
+	}
+
+	nic_active_mask = ~((u32)(1 << nesvnic->nic_index));
+	nes_write_indexed(nesdev, NES_IDX_PERFECT_FILTER_HIGH+
+			(nesvnic->perfect_filter_index*8), 0);
+	nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE);
+	nic_active &= nic_active_mask;
+	nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active);
+	nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
+	nic_active &= nic_active_mask;
+	nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
+	nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE);
+	nic_active &= nic_active_mask;
+	nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active);
+	nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+	nic_active &= nic_active_mask;
+	nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+	nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON);
+	nic_active &= nic_active_mask;
+	nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
+
+
+	if (nesvnic->of_device_registered) {
+		nes_destroy_ofa_device(nesvnic->nesibdev);
+		nesvnic->nesibdev = NULL;
+		nesvnic->of_device_registered = 0;
+	}
+	nes_destroy_nic_qp(nesvnic);
+
+	nesvnic->netdev_open = 0;
+
+	return 0;
+}
+
+
+/**
+ * nes_nic_send
+ */
+static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_hw_nic *nesnic = &nesvnic->nic;
+	struct nes_hw_nic_sq_wqe *nic_sqe;
+	struct tcphdr *tcph;
+	__le16 *wqe_fragment_length;
+	u32 wqe_misc;
+	u16 wqe_fragment_index = 1;	/* first fragment (0) is used by copy buffer */
+	u16 skb_fragment_index;
+	dma_addr_t bus_address;
+
+	nic_sqe = &nesnic->sq_vbase[nesnic->sq_head];
+	wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
+
+	/* setup the VLAN tag if present */
+	if (vlan_tx_tag_present(skb)) {
+		nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
+				netdev->name, vlan_tx_tag_get(skb));
+		wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
+		wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb);
+	} else
+		wqe_misc = 0;
+
+	/* bump past the vlan tag */
+	wqe_fragment_length++;
+	/*	wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		tcph = tcp_hdr(skb);
+		if (1) {
+			if (skb_is_gso(skb)) {
+				/* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... seg size = %u\n",
+						netdev->name, skb_is_gso(skb)); */
+				wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE |
+						NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb);
+				set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
+						((u32)tcph->doff) |
+						(((u32)(((unsigned char *)tcph) - skb->data)) << 4));
+			} else {
+				wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
+			}
+		}
+	} else {	/* CHECKSUM_HW */
+		wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM | NES_NIC_SQ_WQE_COMPLETION;
+	}
+
+	set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
+				skb->len);
+	memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
+			skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), skb_headlen(skb)));
+	wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE),
+			skb_headlen(skb)));
+	wqe_fragment_length[1] = 0;
+	if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) {
+		if ((skb_shinfo(skb)->nr_frags + 1) > 4) {
+			nes_debug(NES_DBG_NIC_TX, "%s: Packet with %u fragments not sent, skb_headlen=%u\n",
+					netdev->name, skb_shinfo(skb)->nr_frags + 2, skb_headlen(skb));
+			kfree_skb(skb);
+			nesvnic->tx_sw_dropped++;
+			return NETDEV_TX_LOCKED;
+		}
+		set_bit(nesnic->sq_head, nesnic->first_frag_overflow);
+		bus_address = pci_map_single(nesdev->pcidev, skb->data + NES_FIRST_FRAG_SIZE,
+				skb_headlen(skb) - NES_FIRST_FRAG_SIZE, PCI_DMA_TODEVICE);
+		wqe_fragment_length[wqe_fragment_index++] =
+				cpu_to_le16(skb_headlen(skb) - NES_FIRST_FRAG_SIZE);
+		wqe_fragment_length[wqe_fragment_index] = 0;
+		set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
+				((u64)(bus_address)));
+		nesnic->tx_skb[nesnic->sq_head] = skb;
+	}
+
+	if (skb_headlen(skb) == skb->len) {
+		if (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE) {
+			nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_2_1_IDX] = 0;
+			nesnic->tx_skb[nesnic->sq_head] = NULL;
+			dev_kfree_skb(skb);
+		}
+	} else {
+		/* Deal with Fragments */
+		nesnic->tx_skb[nesnic->sq_head] = skb;
+		for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags;
+				skb_fragment_index++) {
+			bus_address = pci_map_page( nesdev->pcidev,
+					skb_shinfo(skb)->frags[skb_fragment_index].page,
+					skb_shinfo(skb)->frags[skb_fragment_index].page_offset,
+					skb_shinfo(skb)->frags[skb_fragment_index].size,
+					PCI_DMA_TODEVICE);
+			wqe_fragment_length[wqe_fragment_index] =
+					cpu_to_le16(skb_shinfo(skb)->frags[skb_fragment_index].size);
+			set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
+				bus_address);
+			wqe_fragment_index++;
+			if (wqe_fragment_index < 5)
+				wqe_fragment_length[wqe_fragment_index] = 0;
+		}
+	}
+
+	set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, wqe_misc);
+	nesnic->sq_head++;
+	nesnic->sq_head &= nesnic->sq_size - 1;
+
+	return NETDEV_TX_OK;
+}
+
+
+/**
+ * nes_netdev_start_xmit
+ */
+static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_hw_nic *nesnic = &nesvnic->nic;
+	struct nes_hw_nic_sq_wqe *nic_sqe;
+	struct tcphdr *tcph;
+	/* struct udphdr *udph; */
+#define NES_MAX_TSO_FRAGS 18
+	/* 64K segment plus overflow on each side */
+	dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS];
+	dma_addr_t bus_address;
+	u32 tso_frag_index;
+	u32 tso_frag_count;
+	u32 tso_wqe_length;
+	u32 curr_tcp_seq;
+	u32 wqe_count=1;
+	u32 send_rc;
+	struct iphdr *iph;
+	unsigned long flags;
+	__le16 *wqe_fragment_length;
+	u32 nr_frags;
+	u32 original_first_length;
+//	u64 *wqe_fragment_address;
+	/* first fragment (0) is used by copy buffer */
+	u16 wqe_fragment_index=1;
+	u16 hoffset;
+	u16 nhoffset;
+	u16 wqes_needed;
+	u16 wqes_available;
+	u32 old_head;
+	u32 wqe_misc;
+
+	/* nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
+			" (%u frags), tso_size=%u\n",
+			netdev->name, skb->len, skb_headlen(skb),
+			skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
+	*/
+
+	if (!netif_carrier_ok(netdev))
+		return NETDEV_TX_OK;
+
+	if (netif_queue_stopped(netdev))
+		return NETDEV_TX_BUSY;
+
+	local_irq_save(flags);
+	if (!spin_trylock(&nesnic->sq_lock)) {
+		local_irq_restore(flags);
+		nesvnic->sq_locked++;
+		return NETDEV_TX_LOCKED;
+	}
+
+	/* Check if SQ is full */
+	if ((((nesnic->sq_tail+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) == 1) {
+		if (!netif_queue_stopped(netdev)) {
+			netif_stop_queue(netdev);
+			barrier();
+			if ((((((volatile u16)nesnic->sq_tail)+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) != 1) {
+				netif_start_queue(netdev);
+				goto sq_no_longer_full;
+			}
+		}
+		nesvnic->sq_full++;
+		spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+		return NETDEV_TX_BUSY;
+	}
+
+sq_no_longer_full:
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) {
+		nr_frags++;
+	}
+	/* Check if too many fragments */
+	if (unlikely((nr_frags > 4))) {
+		if (skb_is_gso(skb)) {
+			nesvnic->segmented_tso_requests++;
+			nesvnic->tso_requests++;
+			old_head = nesnic->sq_head;
+			/* Basically 4 fragments available per WQE with extended fragments */
+			wqes_needed = nr_frags >> 2;
+			wqes_needed += (nr_frags&3)?1:0;
+			wqes_available = (((nesnic->sq_tail+nesnic->sq_size)-nesnic->sq_head) - 1) &
+					(nesnic->sq_size - 1);
+
+			if (unlikely(wqes_needed > wqes_available)) {
+				if (!netif_queue_stopped(netdev)) {
+					netif_stop_queue(netdev);
+					barrier();
+					wqes_available = (((((volatile u16)nesnic->sq_tail)+nesnic->sq_size)-nesnic->sq_head) - 1) &
+						(nesnic->sq_size - 1);
+					if (wqes_needed <= wqes_available) {
+						netif_start_queue(netdev);
+						goto tso_sq_no_longer_full;
+					}
+				}
+				nesvnic->sq_full++;
+				spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+				nes_debug(NES_DBG_NIC_TX, "%s: HNIC SQ full- TSO request has too many frags!\n",
+						netdev->name);
+				return NETDEV_TX_BUSY;
+			}
+tso_sq_no_longer_full:
+			/* Map all the buffers */
+			for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags;
+					tso_frag_count++) {
+				tso_bus_address[tso_frag_count] = pci_map_page( nesdev->pcidev,
+						skb_shinfo(skb)->frags[tso_frag_count].page,
+						skb_shinfo(skb)->frags[tso_frag_count].page_offset,
+						skb_shinfo(skb)->frags[tso_frag_count].size,
+						PCI_DMA_TODEVICE);
+			}
+
+			tso_frag_index = 0;
+			curr_tcp_seq = ntohl(tcp_hdr(skb)->seq);
+			hoffset = skb_transport_header(skb) - skb->data;
+			nhoffset = skb_network_header(skb) - skb->data;
+			original_first_length = hoffset + ((((struct tcphdr *)skb_transport_header(skb))->doff)<<2);
+
+			for (wqe_count=0; wqe_count<((u32)wqes_needed); wqe_count++) {
+				tso_wqe_length = 0;
+				nic_sqe = &nesnic->sq_vbase[nesnic->sq_head];
+				wqe_fragment_length =
+						(__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
+				/* setup the VLAN tag if present */
+				if (vlan_tx_tag_present(skb)) {
+					nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
+							netdev->name, vlan_tx_tag_get(skb) );
+					wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
+					wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb);
+				} else
+					wqe_misc = 0;
+
+				/* bump past the vlan tag */
+				wqe_fragment_length++;
+
+				/* Assumes header totally fits in allocated buffer and is in first fragment */
+				if (original_first_length > NES_FIRST_FRAG_SIZE) {
+					nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n",
+							original_first_length, NES_FIRST_FRAG_SIZE);
+					nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
+							" (%u frags), tso_size=%u\n",
+							netdev->name,
+							skb->len, skb_headlen(skb),
+							skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
+				}
+				memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
+						skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE),
+						original_first_length));
+				iph = (struct iphdr *)
+				(&nesnic->first_frag_vbase[nesnic->sq_head].buffer[nhoffset]);
+				tcph = (struct tcphdr *)
+				(&nesnic->first_frag_vbase[nesnic->sq_head].buffer[hoffset]);
+				if ((wqe_count+1)!=(u32)wqes_needed) {
+					tcph->fin = 0;
+					tcph->psh = 0;
+					tcph->rst = 0;
+					tcph->urg = 0;
+				}
+				if (wqe_count) {
+					tcph->syn = 0;
+				}
+				tcph->seq = htonl(curr_tcp_seq);
+				wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE),
+						original_first_length));
+
+				wqe_fragment_index = 1;
+				if ((wqe_count==0) && (skb_headlen(skb) > original_first_length)) {
+					set_bit(nesnic->sq_head, nesnic->first_frag_overflow);
+					bus_address = pci_map_single(nesdev->pcidev, skb->data + original_first_length,
+							skb_headlen(skb) - original_first_length, PCI_DMA_TODEVICE);
+					wqe_fragment_length[wqe_fragment_index++] =
+						cpu_to_le16(skb_headlen(skb) - original_first_length);
+					wqe_fragment_length[wqe_fragment_index] = 0;
+					set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
+									bus_address);
+				}
+				while (wqe_fragment_index < 5) {
+					wqe_fragment_length[wqe_fragment_index] =
+							cpu_to_le16(skb_shinfo(skb)->frags[tso_frag_index].size);
+					set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
+						(u64)tso_bus_address[tso_frag_index]);
+					wqe_fragment_index++;
+					tso_wqe_length += skb_shinfo(skb)->frags[tso_frag_index++].size;
+					if (wqe_fragment_index < 5)
+						wqe_fragment_length[wqe_fragment_index] = 0;
+					if (tso_frag_index == tso_frag_count)
+						break;
+				}
+				if ((wqe_count+1) == (u32)wqes_needed) {
+					nesnic->tx_skb[nesnic->sq_head] = skb;
+				} else {
+					nesnic->tx_skb[nesnic->sq_head] = NULL;
+				}
+				wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb);
+				if ((tso_wqe_length + original_first_length) > skb_is_gso(skb)) {
+					wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE;
+				} else {
+					iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset);
+				}
+
+				set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX,
+						 wqe_misc);
+				set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
+						((u32)tcph->doff) | (((u32)hoffset) << 4));
+
+				set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
+						tso_wqe_length + original_first_length);
+				curr_tcp_seq += tso_wqe_length;
+				nesnic->sq_head++;
+				nesnic->sq_head &= nesnic->sq_size-1;
+			}
+		} else {
+			nesvnic->linearized_skbs++;
+			hoffset = skb_transport_header(skb) - skb->data;
+			nhoffset = skb_network_header(skb) - skb->data;
+			skb_linearize(skb);
+			skb_set_transport_header(skb, hoffset);
+			skb_set_network_header(skb, nhoffset);
+			send_rc = nes_nic_send(skb, netdev);
+			if (send_rc != NETDEV_TX_OK) {
+				spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+				return NETDEV_TX_OK;
+			}
+		}
+	} else {
+		send_rc = nes_nic_send(skb, netdev);
+		if (send_rc != NETDEV_TX_OK) {
+			spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+			return NETDEV_TX_OK;
+		}
+	}
+
+	barrier();
+
+	if (wqe_count)
+		nes_write32(nesdev->regs+NES_WQE_ALLOC,
+				(wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id);
+
+	netdev->trans_start = jiffies;
+	spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+
+	return NETDEV_TX_OK;
+}
+
+
+/**
+ * nes_netdev_get_stats
+ */
+static struct net_device_stats *nes_netdev_get_stats(struct net_device *netdev)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	u64 u64temp;
+	u32 u32temp;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_ENDNODE0_NSTAT_RX_DISCARD + (nesvnic->nic_index*0x200));
+	nesvnic->netstats.rx_dropped += u32temp;
+	nesvnic->endnode_nstat_rx_discard += u32temp;
+
+	u64temp = (u64)nes_read_indexed(nesdev,
+			NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO + (nesvnic->nic_index*0x200));
+	u64temp += ((u64)nes_read_indexed(nesdev,
+			NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32;
+
+	nesvnic->endnode_nstat_rx_octets += u64temp;
+	nesvnic->netstats.rx_bytes += u64temp;
+
+	u64temp = (u64)nes_read_indexed(nesdev,
+			NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO + (nesvnic->nic_index*0x200));
+	u64temp += ((u64)nes_read_indexed(nesdev,
+			NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32;
+
+	nesvnic->endnode_nstat_rx_frames += u64temp;
+	nesvnic->netstats.rx_packets += u64temp;
+
+	u64temp = (u64)nes_read_indexed(nesdev,
+			NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO + (nesvnic->nic_index*0x200));
+	u64temp += ((u64)nes_read_indexed(nesdev,
+			NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32;
+
+	nesvnic->endnode_nstat_tx_octets += u64temp;
+	nesvnic->netstats.tx_bytes += u64temp;
+
+	u64temp = (u64)nes_read_indexed(nesdev,
+			NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO + (nesvnic->nic_index*0x200));
+	u64temp += ((u64)nes_read_indexed(nesdev,
+			NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32;
+
+	nesvnic->endnode_nstat_tx_frames += u64temp;
+	nesvnic->netstats.tx_packets += u64temp;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+	nesvnic->netstats.rx_dropped += u32temp;
+	nesvnic->nesdev->mac_rx_errors += u32temp;
+	nesvnic->nesdev->mac_rx_short_frames += u32temp;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+	nesvnic->netstats.rx_dropped += u32temp;
+	nesvnic->nesdev->mac_rx_errors += u32temp;
+	nesvnic->nesdev->mac_rx_oversized_frames += u32temp;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+	nesvnic->netstats.rx_dropped += u32temp;
+	nesvnic->nesdev->mac_rx_errors += u32temp;
+	nesvnic->nesdev->mac_rx_jabber_frames += u32temp;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+	nesvnic->netstats.rx_dropped += u32temp;
+	nesvnic->nesdev->mac_rx_errors += u32temp;
+	nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+	nesvnic->netstats.rx_length_errors += u32temp;
+	nesvnic->nesdev->mac_rx_errors += u32temp;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+	nesvnic->nesdev->mac_rx_errors += u32temp;
+	nesvnic->nesdev->mac_rx_crc_errors += u32temp;
+	nesvnic->netstats.rx_crc_errors += u32temp;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200));
+	nesvnic->nesdev->mac_tx_errors += u32temp;
+	nesvnic->netstats.tx_errors += u32temp;
+
+	return &nesvnic->netstats;
+}
+
+
+/**
+ * nes_netdev_tx_timeout
+ */
+static void nes_netdev_tx_timeout(struct net_device *netdev)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+	if (netif_msg_timer(nesvnic))
+		nes_debug(NES_DBG_NIC_TX, "%s: tx timeout\n", netdev->name);
+}
+
+
+/**
+ * nes_netdev_set_mac_address
+ */
+static int nes_netdev_set_mac_address(struct net_device *netdev, void *p)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct sockaddr *mac_addr = p;
+	int i;
+	u32 macaddr_low;
+	u16 macaddr_high;
+
+	if (!is_valid_ether_addr(mac_addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len);
+	printk(PFX "%s: Address length = %d, Address = %02X%02X%02X%02X%02X%02X..\n",
+		   __FUNCTION__, netdev->addr_len,
+		   mac_addr->sa_data[0], mac_addr->sa_data[1],
+		   mac_addr->sa_data[2], mac_addr->sa_data[3],
+		   mac_addr->sa_data[4], mac_addr->sa_data[5]);
+	macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
+	macaddr_high += (u16)netdev->dev_addr[1];
+	macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
+	macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
+	macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
+	macaddr_low += (u32)netdev->dev_addr[5];
+
+	for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
+		if (nesvnic->qp_nic_index[i] == 0xf) {
+			break;
+		}
+		nes_write_indexed(nesdev,
+				NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8),
+				macaddr_low);
+		nes_write_indexed(nesdev,
+				NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8),
+				(u32)macaddr_high | NES_MAC_ADDR_VALID |
+				((((u32)nesvnic->nic_index) << 16)));
+	}
+	return 0;
+}
+
+
+/**
+ * nes_netdev_set_multicast_list
+ */
+void nes_netdev_set_multicast_list(struct net_device *netdev)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct dev_mc_list *multicast_addr;
+	u32 nic_active_bit;
+	u32 nic_active;
+	u32 perfect_filter_register_address;
+	u32 macaddr_low;
+	u16 macaddr_high;
+	u8 mc_all_on = 0;
+	u8 mc_index;
+	int mc_nic_index = -1;
+
+	nic_active_bit = 1 << nesvnic->nic_index;
+
+	if (netdev->flags & IFF_PROMISC) {
+		nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
+		nic_active |= nic_active_bit;
+		nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
+		nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+		nic_active |= nic_active_bit;
+		nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+		mc_all_on = 1;
+	} else if ((netdev->flags & IFF_ALLMULTI) || (netdev->mc_count > NES_MULTICAST_PF_MAX) ||
+			   (nesvnic->nic_index > 3)) {
+		nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
+		nic_active |= nic_active_bit;
+		nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
+		nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+		nic_active &= ~nic_active_bit;
+		nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+		mc_all_on = 1;
+	} else {
+		nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
+		nic_active &= ~nic_active_bit;
+		nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
+		nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+		nic_active &= ~nic_active_bit;
+		nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+	}
+
+	nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscous = %d, All Multicast = %d.\n",
+			  netdev->mc_count, (netdev->flags & IFF_PROMISC)?1:0,
+			  (netdev->flags & IFF_ALLMULTI)?1:0);
+	if (!mc_all_on) {
+		multicast_addr = netdev->mc_list;
+		perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + 0x80;
+		perfect_filter_register_address += nesvnic->nic_index*0x40;
+		for (mc_index=0; mc_index < NES_MULTICAST_PF_MAX; mc_index++) {
+			while (multicast_addr && nesvnic->mcrq_mcast_filter && ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, multicast_addr->dmi_addr)) == 0))
+				multicast_addr = multicast_addr->next;
+
+			if (mc_nic_index < 0)
+				mc_nic_index = nesvnic->nic_index;
+			if (multicast_addr) {
+				nes_debug(NES_DBG_NIC_RX, "Assigning MC Address = %02X%02X%02X%02X%02X%02X to register 0x%04X nic_idx=%d\n",
+						  multicast_addr->dmi_addr[0], multicast_addr->dmi_addr[1],
+						  multicast_addr->dmi_addr[2], multicast_addr->dmi_addr[3],
+						  multicast_addr->dmi_addr[4], multicast_addr->dmi_addr[5],
+						  perfect_filter_register_address+(mc_index * 8), mc_nic_index);
+				macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8;
+				macaddr_high += (u16)multicast_addr->dmi_addr[1];
+				macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24;
+				macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16;
+				macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8;
+				macaddr_low += (u32)multicast_addr->dmi_addr[5];
+				nes_write_indexed(nesdev,
+						perfect_filter_register_address+(mc_index * 8),
+						macaddr_low);
+				nes_write_indexed(nesdev,
+						perfect_filter_register_address+4+(mc_index * 8),
+						(u32)macaddr_high | NES_MAC_ADDR_VALID |
+						((((u32)(1<<mc_nic_index)) << 16)));
+				multicast_addr = multicast_addr->next;
+			} else {
+				nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n",
+						  perfect_filter_register_address+(mc_index * 8));
+				nes_write_indexed(nesdev,
+						perfect_filter_register_address+4+(mc_index * 8),
+						0);
+			}
+		}
+	}
+}
+
+
+/**
+ * nes_netdev_change_mtu
+ */
+static int nes_netdev_change_mtu(struct	net_device *netdev,	int	new_mtu)
+{
+	struct nes_vnic	*nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev =	nesvnic->nesdev;
+	int	ret	= 0;
+	u8 jumbomode=0;
+
+	if ((new_mtu < ETH_ZLEN) ||	(new_mtu > max_mtu))
+		return -EINVAL;
+
+	netdev->mtu	= new_mtu;
+	nesvnic->max_frame_size	= new_mtu+ETH_HLEN;
+
+	if (netdev->mtu	> 1500)	{
+		jumbomode=1;
+	}
+	nes_nic_init_timer_defaults(nesdev,	jumbomode);
+
+	if (netif_running(netdev)) {
+		nes_netdev_stop(netdev);
+		nes_netdev_open(netdev);
+	}
+
+	return ret;
+}
+
+
+/**
+ * nes_netdev_exit - destroy network device
+ */
+void nes_netdev_exit(struct nes_vnic *nesvnic)
+{
+	struct net_device *netdev = nesvnic->netdev;
+	struct nes_ib_device *nesibdev = nesvnic->nesibdev;
+
+	nes_debug(NES_DBG_SHUTDOWN, "\n");
+
+	// destroy the ibdevice if RDMA enabled
+	if ((nesvnic->rdma_enabled)&&(nesvnic->of_device_registered)) {
+		nes_destroy_ofa_device( nesibdev );
+		nesvnic->of_device_registered = 0;
+		nesvnic->nesibdev = NULL;
+	}
+	unregister_netdev(netdev);
+	nes_debug(NES_DBG_SHUTDOWN, "\n");
+}
+
+
+#define NES_ETHTOOL_STAT_COUNT 55
+static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] = {
+	"Link Change Interrupts",
+	"Linearized SKBs",
+	"T/GSO Requests",
+	"Pause Frames Sent",
+	"Pause Frames Received",
+	"Internal Routing Errors",
+	"SQ SW Dropped SKBs",
+	"SQ Locked",
+	"SQ Full",
+	"Segmented TSO Requests",
+	"Rx Symbol Errors",
+	"Rx Jabber Errors",
+	"Rx Oversized Frames",
+	"Rx Short Frames",
+	"Endnode Rx Discards",
+	"Endnode Rx Octets",
+	"Endnode Rx Frames",
+	"Endnode Tx Octets",
+	"Endnode Tx Frames",
+	"mh detected",
+	"mh pauses",
+	"Retransmission Count",
+	"CM Connects",
+	"CM Accepts",
+	"Disconnects",
+	"Connected Events",
+	"Connect Requests",
+	"CM Rejects",
+	"ModifyQP Timeouts",
+	"CreateQPs",
+	"SW DestroyQPs",
+	"DestroyQPs",
+	"CM Closes",
+	"CM Packets Sent",
+	"CM Packets Bounced",
+	"CM Packets Created",
+	"CM Packets Rcvd",
+	"CM Packets Dropped",
+	"CM Packets Retrans",
+	"CM Listens Created",
+	"CM Listens Destroyed",
+	"CM Backlog Drops",
+	"CM Loopbacks",
+	"CM Nodes Created",
+	"CM Nodes Destroyed",
+	"CM Accel Drops",
+	"CM Resets Received",
+	"Timer Inits",
+	"CQ Depth 1",
+	"CQ Depth 4",
+	"CQ Depth 16",
+	"CQ Depth 24",
+	"CQ Depth 32",
+	"CQ Depth 128",
+	"CQ Depth 256",
+};
+
+
+/**
+ * nes_netdev_get_rx_csum
+ */
+static u32 nes_netdev_get_rx_csum (struct net_device *netdev)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+	if (nesvnic->rx_checksum_disabled)
+		return 0;
+	else
+		return 1;
+}
+
+
+/**
+ * nes_netdev_set_rc_csum
+ */
+static int nes_netdev_set_rx_csum(struct net_device *netdev, u32 enable)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+	if (enable)
+		nesvnic->rx_checksum_disabled = 0;
+	else
+		nesvnic->rx_checksum_disabled = 1;
+	return 0;
+}
+
+
+/**
+ * nes_netdev_get_stats_count
+ */
+static int nes_netdev_get_stats_count(struct net_device *netdev)
+{
+	return NES_ETHTOOL_STAT_COUNT;
+}
+
+
+/**
+ * nes_netdev_get_strings
+ */
+static void nes_netdev_get_strings(struct net_device *netdev, u32 stringset,
+		u8 *ethtool_strings)
+{
+	if (stringset == ETH_SS_STATS)
+		memcpy(ethtool_strings,
+				&nes_ethtool_stringset,
+				sizeof(nes_ethtool_stringset));
+}
+
+
+/**
+ * nes_netdev_get_ethtool_stats
+ */
+static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
+		struct ethtool_stats *target_ethtool_stats, u64 *target_stat_values)
+{
+	u64 u64temp;
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	u32 nic_count;
+	u32 u32temp;
+
+	target_ethtool_stats->n_stats = NES_ETHTOOL_STAT_COUNT;
+	target_stat_values[0] = nesvnic->nesdev->link_status_interrupts;
+	target_stat_values[1] = nesvnic->linearized_skbs;
+	target_stat_values[2] = nesvnic->tso_requests;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_MAC_TX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+	nesvnic->nesdev->mac_pause_frames_sent += u32temp;
+	target_stat_values[3] = nesvnic->nesdev->mac_pause_frames_sent;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+	nesvnic->nesdev->mac_pause_frames_received += u32temp;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_PORT_RX_DISCARDS + (nesvnic->nesdev->mac_index*0x40));
+	nesvnic->nesdev->port_rx_discards += u32temp;
+	nesvnic->netstats.rx_dropped += u32temp;
+
+	u32temp = nes_read_indexed(nesdev,
+			NES_IDX_PORT_TX_DISCARDS + (nesvnic->nesdev->mac_index*0x40));
+	nesvnic->nesdev->port_tx_discards += u32temp;
+	nesvnic->netstats.tx_dropped += u32temp;
+
+	for (nic_count = 0; nic_count < NES_MAX_PORT_COUNT; nic_count++) {
+		if (nesvnic->qp_nic_index[nic_count] == 0xf)
+			break;
+
+		u32temp = nes_read_indexed(nesdev,
+				NES_IDX_ENDNODE0_NSTAT_RX_DISCARD +
+				(nesvnic->qp_nic_index[nic_count]*0x200));
+		nesvnic->netstats.rx_dropped += u32temp;
+		nesvnic->endnode_nstat_rx_discard += u32temp;
+
+		u64temp = (u64)nes_read_indexed(nesdev,
+				NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO +
+				(nesvnic->qp_nic_index[nic_count]*0x200));
+		u64temp += ((u64)nes_read_indexed(nesdev,
+				NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI +
+				(nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
+
+		nesvnic->endnode_nstat_rx_octets += u64temp;
+		nesvnic->netstats.rx_bytes += u64temp;
+
+		u64temp = (u64)nes_read_indexed(nesdev,
+				NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO +
+				(nesvnic->qp_nic_index[nic_count]*0x200));
+		u64temp += ((u64)nes_read_indexed(nesdev,
+				NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI +
+				(nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
+
+		nesvnic->endnode_nstat_rx_frames += u64temp;
+		nesvnic->netstats.rx_packets += u64temp;
+
+		u64temp = (u64)nes_read_indexed(nesdev,
+				NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO +
+				(nesvnic->qp_nic_index[nic_count]*0x200));
+		u64temp += ((u64)nes_read_indexed(nesdev,
+				NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI +
+				(nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
+
+		nesvnic->endnode_nstat_tx_octets += u64temp;
+		nesvnic->netstats.tx_bytes += u64temp;
+
+		u64temp = (u64)nes_read_indexed(nesdev,
+				NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO +
+				(nesvnic->qp_nic_index[nic_count]*0x200));
+		u64temp += ((u64)nes_read_indexed(nesdev,
+				NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI +
+				(nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
+
+		nesvnic->endnode_nstat_tx_frames += u64temp;
+		nesvnic->netstats.tx_packets += u64temp;
+
+		u32temp = nes_read_indexed(nesdev,
+				NES_IDX_IPV4_TCP_REXMITS + (nesvnic->qp_nic_index[nic_count]*0x200));
+		nesvnic->endnode_ipv4_tcp_retransmits += u32temp;
+	}
+
+	target_stat_values[4] = nesvnic->nesdev->mac_pause_frames_received;
+	target_stat_values[5] = nesdev->nesadapter->nic_rx_eth_route_err;
+	target_stat_values[6] = nesvnic->tx_sw_dropped;
+	target_stat_values[7] = nesvnic->sq_locked;
+	target_stat_values[8] = nesvnic->sq_full;
+	target_stat_values[9] = nesvnic->segmented_tso_requests;
+	target_stat_values[10] = nesvnic->nesdev->mac_rx_symbol_err_frames;
+	target_stat_values[11] = nesvnic->nesdev->mac_rx_jabber_frames;
+	target_stat_values[12] = nesvnic->nesdev->mac_rx_oversized_frames;
+	target_stat_values[13] = nesvnic->nesdev->mac_rx_short_frames;
+	target_stat_values[14] = nesvnic->endnode_nstat_rx_discard;
+	target_stat_values[15] = nesvnic->endnode_nstat_rx_octets;
+	target_stat_values[16] = nesvnic->endnode_nstat_rx_frames;
+	target_stat_values[17] = nesvnic->endnode_nstat_tx_octets;
+	target_stat_values[18] = nesvnic->endnode_nstat_tx_frames;
+	target_stat_values[19] = mh_detected;
+	target_stat_values[20] = mh_pauses_sent;
+	target_stat_values[21] = nesvnic->endnode_ipv4_tcp_retransmits;
+	target_stat_values[22] = atomic_read(&cm_connects);
+	target_stat_values[23] = atomic_read(&cm_accepts);
+	target_stat_values[24] = atomic_read(&cm_disconnects);
+	target_stat_values[25] = atomic_read(&cm_connecteds);
+	target_stat_values[26] = atomic_read(&cm_connect_reqs);
+	target_stat_values[27] = atomic_read(&cm_rejects);
+	target_stat_values[28] = atomic_read(&mod_qp_timouts);
+	target_stat_values[29] = atomic_read(&qps_created);
+	target_stat_values[30] = atomic_read(&sw_qps_destroyed);
+	target_stat_values[31] = atomic_read(&qps_destroyed);
+	target_stat_values[32] = atomic_read(&cm_closes);
+	target_stat_values[33] = cm_packets_sent;
+	target_stat_values[34] = cm_packets_bounced;
+	target_stat_values[35] = cm_packets_created;
+	target_stat_values[36] = cm_packets_received;
+	target_stat_values[37] = cm_packets_dropped;
+	target_stat_values[38] = cm_packets_retrans;
+	target_stat_values[39] = cm_listens_created;
+	target_stat_values[40] = cm_listens_destroyed;
+	target_stat_values[41] = cm_backlog_drops;
+	target_stat_values[42] = atomic_read(&cm_loopbacks);
+	target_stat_values[43] = atomic_read(&cm_nodes_created);
+	target_stat_values[44] = atomic_read(&cm_nodes_destroyed);
+	target_stat_values[45] = atomic_read(&cm_accel_dropped_pkts);
+	target_stat_values[46] = atomic_read(&cm_resets_recvd);
+	target_stat_values[47] = int_mod_timer_init;
+	target_stat_values[48] = int_mod_cq_depth_1;
+	target_stat_values[49] = int_mod_cq_depth_4;
+	target_stat_values[50] = int_mod_cq_depth_16;
+	target_stat_values[51] = int_mod_cq_depth_24;
+	target_stat_values[52] = int_mod_cq_depth_32;
+	target_stat_values[53] = int_mod_cq_depth_128;
+	target_stat_values[54] = int_mod_cq_depth_256;
+
+}
+
+
+/**
+ * nes_netdev_get_drvinfo
+ */
+static void nes_netdev_get_drvinfo(struct net_device *netdev,
+		struct ethtool_drvinfo *drvinfo)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+	strcpy(drvinfo->driver, DRV_NAME);
+	strcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev));
+	strcpy(drvinfo->fw_version, "TBD");
+	strcpy(drvinfo->version, DRV_VERSION);
+	drvinfo->n_stats = nes_netdev_get_stats_count(netdev);
+	drvinfo->testinfo_len = 0;
+	drvinfo->eedump_len = 0;
+	drvinfo->regdump_len = 0;
+}
+
+
+/**
+ * nes_netdev_set_coalesce
+ */
+static int nes_netdev_set_coalesce(struct net_device *netdev,
+		struct ethtool_coalesce	*et_coalesce)
+{
+	struct nes_vnic	*nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev =	nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
+	unsigned long flags;
+
+	spin_lock_irqsave(&nesadapter->periodic_timer_lock,	flags);
+	if (et_coalesce->rx_max_coalesced_frames_low) {
+		shared_timer->threshold_low	 = et_coalesce->rx_max_coalesced_frames_low;
+	}
+	if (et_coalesce->rx_max_coalesced_frames_irq) {
+		shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq;
+	}
+	if (et_coalesce->rx_max_coalesced_frames_high) {
+		shared_timer->threshold_high = et_coalesce->rx_max_coalesced_frames_high;
+	}
+	if (et_coalesce->rx_coalesce_usecs_low) {
+		shared_timer->timer_in_use_min = et_coalesce->rx_coalesce_usecs_low;
+	}
+	if (et_coalesce->rx_coalesce_usecs_high) {
+		shared_timer->timer_in_use_max = et_coalesce->rx_coalesce_usecs_high;
+	}
+	spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+
+	/* using this to drive total interrupt moderation */
+	nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq;
+	if (et_coalesce->use_adaptive_rx_coalesce) {
+		nesadapter->et_use_adaptive_rx_coalesce	= 1;
+		nesadapter->timer_int_limit	= NES_TIMER_INT_LIMIT_DYNAMIC;
+		nesadapter->et_rx_coalesce_usecs_irq = 0;
+		if (et_coalesce->pkt_rate_low) {
+			nesadapter->et_pkt_rate_low	= et_coalesce->pkt_rate_low;
+		}
+	} else {
+		nesadapter->et_use_adaptive_rx_coalesce	= 0;
+		nesadapter->timer_int_limit	= NES_TIMER_INT_LIMIT;
+		if (nesadapter->et_rx_coalesce_usecs_irq) {
+			nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
+					0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8)));
+		}
+	}
+	return 0;
+}
+
+
+/**
+ * nes_netdev_get_coalesce
+ */
+static int nes_netdev_get_coalesce(struct net_device *netdev,
+		struct ethtool_coalesce	*et_coalesce)
+{
+	struct nes_vnic	*nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev =	nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct ethtool_coalesce	temp_et_coalesce;
+	struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
+	unsigned long flags;
+
+	memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce));
+	temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq;
+	temp_et_coalesce.use_adaptive_rx_coalesce =	nesadapter->et_use_adaptive_rx_coalesce;
+	temp_et_coalesce.rate_sample_interval =	nesadapter->et_rate_sample_interval;
+	temp_et_coalesce.pkt_rate_low =	nesadapter->et_pkt_rate_low;
+	spin_lock_irqsave(&nesadapter->periodic_timer_lock,	flags);
+	temp_et_coalesce.rx_max_coalesced_frames_low =	shared_timer->threshold_low;
+	temp_et_coalesce.rx_max_coalesced_frames_irq =	shared_timer->threshold_target;
+	temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high;
+	temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min;
+	temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max;
+	if (nesadapter->et_use_adaptive_rx_coalesce) {
+		temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use;
+	}
+	spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+	memcpy(et_coalesce,	&temp_et_coalesce, sizeof(*et_coalesce));
+	return 0;
+}
+
+
+/**
+ * nes_netdev_get_pauseparam
+ */
+static void nes_netdev_get_pauseparam(struct net_device *netdev,
+		struct ethtool_pauseparam *et_pauseparam)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+	et_pauseparam->autoneg = 0;
+	et_pauseparam->rx_pause = (nesvnic->nesdev->disable_rx_flow_control == 0) ? 1:0;
+	et_pauseparam->tx_pause = (nesvnic->nesdev->disable_tx_flow_control == 0) ? 1:0;
+}
+
+
+/**
+ * nes_netdev_set_pauseparam
+ */
+static int nes_netdev_set_pauseparam(struct net_device *netdev,
+		struct ethtool_pauseparam *et_pauseparam)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	u32 u32temp;
+
+	if (et_pauseparam->autoneg) {
+		/* TODO: should return unsupported */
+		return 0;
+	}
+	if ((et_pauseparam->tx_pause == 1) && (nesdev->disable_tx_flow_control == 1)) {
+		u32temp = nes_read_indexed(nesdev,
+				NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200));
+		u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE;
+		nes_write_indexed(nesdev,
+				NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp);
+		nesdev->disable_tx_flow_control = 0;
+	} else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) {
+		u32temp = nes_read_indexed(nesdev,
+				NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200));
+		u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE;
+		nes_write_indexed(nesdev,
+				NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp);
+		nesdev->disable_tx_flow_control = 1;
+	}
+	if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) {
+		u32temp = nes_read_indexed(nesdev,
+				NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40));
+		u32temp &= ~NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE;
+		nes_write_indexed(nesdev,
+				NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp);
+		nesdev->disable_rx_flow_control = 0;
+	} else if ((et_pauseparam->rx_pause == 0) && (nesdev->disable_rx_flow_control == 0)) {
+		u32temp = nes_read_indexed(nesdev,
+				NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40));
+		u32temp |= NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE;
+		nes_write_indexed(nesdev,
+				NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp);
+		nesdev->disable_rx_flow_control = 1;
+	}
+
+	return 0;
+}
+
+
+/**
+ * nes_netdev_get_settings
+ */
+static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u16 phy_data;
+
+	et_cmd->duplex = DUPLEX_FULL;
+	et_cmd->port = PORT_MII;
+	if (nesadapter->OneG_Mode) {
+		et_cmd->supported = SUPPORTED_1000baseT_Full|SUPPORTED_Autoneg;
+		et_cmd->advertising = ADVERTISED_1000baseT_Full|ADVERTISED_Autoneg;
+		et_cmd->speed = SPEED_1000;
+		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
+				&phy_data);
+		if (phy_data&0x1000) {
+			et_cmd->autoneg = AUTONEG_ENABLE;
+		} else {
+			et_cmd->autoneg = AUTONEG_DISABLE;
+		}
+		et_cmd->transceiver = XCVR_EXTERNAL;
+		et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
+	} else {
+		if (nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) {
+			et_cmd->transceiver = XCVR_EXTERNAL;
+			et_cmd->port = PORT_FIBRE;
+			et_cmd->supported = SUPPORTED_FIBRE;
+			et_cmd->advertising = ADVERTISED_FIBRE;
+			et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
+		} else {
+			et_cmd->transceiver = XCVR_INTERNAL;
+			et_cmd->supported = SUPPORTED_10000baseT_Full;
+			et_cmd->advertising = ADVERTISED_10000baseT_Full;
+			et_cmd->phy_address = nesdev->mac_index;
+		}
+		et_cmd->speed = SPEED_10000;
+		et_cmd->autoneg = AUTONEG_DISABLE;
+	}
+	et_cmd->maxtxpkt = 511;
+	et_cmd->maxrxpkt = 511;
+	return 0;
+}
+
+
+/**
+ * nes_netdev_set_settings
+ */
+static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u16 phy_data;
+
+	if (nesadapter->OneG_Mode) {
+		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
+				&phy_data);
+		if (et_cmd->autoneg) {
+			/* Turn on Full duplex, Autoneg, and restart autonegotiation */
+			phy_data |= 0x1300;
+		} else {
+			// Turn off autoneg
+			phy_data &= ~0x1000;
+		}
+		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
+				phy_data);
+	}
+
+	return 0;
+}
+
+
+static struct ethtool_ops nes_ethtool_ops = {
+	.get_link = ethtool_op_get_link,
+	.get_settings = nes_netdev_get_settings,
+	.set_settings = nes_netdev_set_settings,
+	.get_tx_csum = ethtool_op_get_tx_csum,
+	.get_rx_csum = nes_netdev_get_rx_csum,
+	.get_sg = ethtool_op_get_sg,
+	.get_strings = nes_netdev_get_strings,
+	.get_stats_count = nes_netdev_get_stats_count,
+	.get_ethtool_stats = nes_netdev_get_ethtool_stats,
+	.get_drvinfo = nes_netdev_get_drvinfo,
+	.get_coalesce = nes_netdev_get_coalesce,
+	.set_coalesce = nes_netdev_set_coalesce,
+	.get_pauseparam = nes_netdev_get_pauseparam,
+	.set_pauseparam = nes_netdev_set_pauseparam,
+	.set_tx_csum = ethtool_op_set_tx_csum,
+	.set_rx_csum = nes_netdev_set_rx_csum,
+	.set_sg = ethtool_op_set_sg,
+	.get_tso = ethtool_op_get_tso,
+	.set_tso = ethtool_op_set_tso,
+};
+
+
+static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	u32 u32temp;
+
+	nesvnic->vlan_grp = grp;
+
+	/* Enable/Disable VLAN Stripping */
+	u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG);
+	if (grp)
+		u32temp &= 0xfdffffff;
+	else
+		u32temp	|= 0x02000000;
+
+	nes_write_indexed(nesdev, NES_IDX_PCIX_DIAG, u32temp);
+}
+
+
+/**
+ * nes_netdev_init - initialize network device
+ */
+struct net_device *nes_netdev_init(struct nes_device *nesdev,
+		void __iomem *mmio_addr)
+{
+	u64 u64temp;
+	struct nes_vnic *nesvnic = NULL;
+	struct net_device *netdev;
+	struct nic_qp_map *curr_qp_map;
+	u32 u32temp;
+	u16 phy_data;
+	u16 temp_phy_data;
+
+	netdev = alloc_etherdev(sizeof(struct nes_vnic));
+	if (!netdev) {
+		printk(KERN_ERR PFX "nesvnic etherdev alloc failed");
+		return NULL;
+	}
+
+	nes_debug(NES_DBG_INIT, "netdev = %p, %s\n", netdev, netdev->name);
+
+	SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev);
+
+	nesvnic = netdev_priv(netdev);
+	memset(nesvnic, 0, sizeof(*nesvnic));
+
+	netdev->open = nes_netdev_open;
+	netdev->stop = nes_netdev_stop;
+	netdev->hard_start_xmit = nes_netdev_start_xmit;
+	netdev->get_stats = nes_netdev_get_stats;
+	netdev->tx_timeout = nes_netdev_tx_timeout;
+	netdev->set_mac_address = nes_netdev_set_mac_address;
+	netdev->set_multicast_list = nes_netdev_set_multicast_list;
+	netdev->change_mtu = nes_netdev_change_mtu;
+	netdev->watchdog_timeo = NES_TX_TIMEOUT;
+	netdev->irq = nesdev->pcidev->irq;
+	netdev->mtu = ETH_DATA_LEN;
+	netdev->hard_header_len = ETH_HLEN;
+	netdev->addr_len = ETH_ALEN;
+	netdev->type = ARPHRD_ETHER;
+	netdev->features = NETIF_F_HIGHDMA;
+	netdev->ethtool_ops = &nes_ethtool_ops;
+	netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128);
+	nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n");
+	netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+	netdev->vlan_rx_register = nes_netdev_vlan_rx_register;
+	netdev->features |= NETIF_F_LLTX;
+
+	/* Fill in the port structure */
+	nesvnic->netdev = netdev;
+	nesvnic->nesdev = nesdev;
+	nesvnic->msg_enable = netif_msg_init(debug, default_msg);
+	nesvnic->netdev_index = nesdev->netdev_count;
+	nesvnic->perfect_filter_index = nesdev->nesadapter->netdev_count;
+	nesvnic->max_frame_size = netdev->mtu+netdev->hard_header_len;
+
+	curr_qp_map = nic_qp_mapping_per_function[PCI_FUNC(nesdev->pcidev->devfn)];
+	nesvnic->nic.qp_id = curr_qp_map[nesdev->netdev_count].qpid;
+	nesvnic->nic_index = curr_qp_map[nesdev->netdev_count].nic_index;
+	nesvnic->logical_port = curr_qp_map[nesdev->netdev_count].logical_port;
+
+	/* Setup the burned in MAC address */
+	u64temp = (u64)nesdev->nesadapter->mac_addr_low;
+	u64temp += ((u64)nesdev->nesadapter->mac_addr_high) << 32;
+	u64temp += nesvnic->nic_index;
+	netdev->dev_addr[0] = (u8)(u64temp>>40);
+	netdev->dev_addr[1] = (u8)(u64temp>>32);
+	netdev->dev_addr[2] = (u8)(u64temp>>24);
+	netdev->dev_addr[3] = (u8)(u64temp>>16);
+	netdev->dev_addr[4] = (u8)(u64temp>>8);
+	netdev->dev_addr[5] = (u8)u64temp;
+	memcpy(netdev->perm_addr, netdev->dev_addr, 6);
+
+	if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) {
+		netdev->features |= NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM;
+		netdev->features |= NETIF_F_GSO | NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM;
+	} else {
+		netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+	}
+
+	nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"
+			" nic_index = %d, logical_port = %d, mac_index = %d.\n",
+			nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id,
+			nesvnic->nic_index, nesvnic->logical_port,  nesdev->mac_index);
+
+	if (nesvnic->nesdev->nesadapter->port_count == 1) {
+		nesvnic->qp_nic_index[0] = nesvnic->nic_index;
+		nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1;
+		if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) {
+			nesvnic->qp_nic_index[2] = 0xf;
+			nesvnic->qp_nic_index[3] = 0xf;
+		} else {
+			nesvnic->qp_nic_index[2] = nesvnic->nic_index + 2;
+			nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3;
+		}
+	} else {
+		if (nesvnic->nesdev->nesadapter->port_count == 2) {
+			nesvnic->qp_nic_index[0] = nesvnic->nic_index;
+			nesvnic->qp_nic_index[1] = nesvnic->nic_index + 2;
+			nesvnic->qp_nic_index[2] = 0xf;
+			nesvnic->qp_nic_index[3] = 0xf;
+		} else {
+			nesvnic->qp_nic_index[0] = nesvnic->nic_index;
+			nesvnic->qp_nic_index[1] = 0xf;
+			nesvnic->qp_nic_index[2] = 0xf;
+			nesvnic->qp_nic_index[3] = 0xf;
+		}
+	}
+	nesvnic->next_qp_nic_index = 0;
+
+	if (nesdev->netdev_count == 0) {
+		nesvnic->rdma_enabled = 1;
+	} else {
+		nesvnic->rdma_enabled = 0;
+	}
+	nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id;
+	spin_lock_init(&nesvnic->tx_lock);
+	nesdev->netdev[nesdev->netdev_count] = netdev;
+
+	nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n",
+			nesvnic, nesdev->mac_index);
+	list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]);
+
+	if ((nesdev->netdev_count == 0) &&
+			(PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) {
+		nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
+				NES_IDX_PHY_PCS_CONTROL_STATUS0+(0x200*(nesvnic->logical_port&1)));
+		u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+				(0x200*(nesvnic->logical_port&1)));
+		u32temp |= 0x00200000;
+		nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+				(0x200*(nesvnic->logical_port&1)), u32temp);
+		u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+				(0x200*(nesvnic->logical_port&1)) );
+		if ((u32temp&0x0f1f0000) == 0x0f0f0000) {
+			if (nesdev->nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) {
+				nes_init_phy(nesdev);
+				nes_read_10G_phy_reg(nesdev, 1,
+						nesdev->nesadapter->phy_index[nesvnic->logical_port]);
+				temp_phy_data = (u16)nes_read_indexed(nesdev,
+									NES_IDX_MAC_MDIO_CONTROL);
+				u32temp = 20;
+				do {
+					nes_read_10G_phy_reg(nesdev, 1,
+							nesdev->nesadapter->phy_index[nesvnic->logical_port]);
+					phy_data = (u16)nes_read_indexed(nesdev,
+									NES_IDX_MAC_MDIO_CONTROL);
+					if ((phy_data == temp_phy_data) || (!(--u32temp)))
+						break;
+					temp_phy_data = phy_data;
+				} while (1);
+				if (phy_data & 4) {
+					nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
+					nesvnic->linkup = 1;
+				} else {
+					nes_debug(NES_DBG_INIT, "The Link is DOWN!!.\n");
+				}
+			} else {
+				nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
+				nesvnic->linkup = 1;
+			}
+		}
+		nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n");
+		/* clear the MAC interrupt status, assumes direct logical to physical mapping */
+		u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS+(0x200*nesvnic->logical_port));
+		nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp);
+		nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS+(0x200*nesvnic->logical_port), u32temp);
+
+		if (nesdev->nesadapter->phy_type[nesvnic->logical_port] != NES_PHY_TYPE_IRIS)
+			nes_init_phy(nesdev);
+
+		nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesvnic->logical_port),
+				~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
+				NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
+	}
+
+	return netdev;
+}
+
+
+/**
+ * nes_netdev_destroy - destroy network device structure
+ */
+void nes_netdev_destroy(struct net_device *netdev)
+{
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+	/* make sure 'stop' method is called by Linux stack */
+	/* nes_netdev_stop(netdev); */
+
+	list_del(&nesvnic->list);
+
+	if (nesvnic->of_device_registered) {
+		nes_destroy_ofa_device(nesvnic->nesibdev);
+	}
+
+	free_netdev(netdev);
+}
+
+
+/**
+ * nes_nic_cm_xmit -- CM calls this to send out pkts
+ */
+int nes_nic_cm_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	int ret;
+
+	skb->dev = netdev;
+	ret = dev_queue_xmit(skb);
+	if (ret) {
+		nes_debug(NES_DBG_CM, "Bad return code from dev_queue_xmit %d\n", ret);
+	}
+
+	return ret;
+}
diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h
new file mode 100644
index 0000000..e64306b
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_user.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect.  All rights reserved.
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef NES_USER_H
+#define NES_USER_H
+
+#include <linux/types.h>
+
+#define NES_ABI_USERSPACE_VER 1
+#define NES_ABI_KERNEL_VER    1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct nes_alloc_ucontext_req {
+	__u32 reserved32;
+	__u8  userspace_ver;
+	__u8  reserved8[3];
+};
+
+struct nes_alloc_ucontext_resp {
+	__u32 max_pds; /* maximum pds allowed for this user process */
+	__u32 max_qps; /* maximum qps allowed for this user process */
+	__u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */
+	__u8  virtwq;  /* flag to indicate if virtual WQ are to be used or not */
+	__u8  kernel_ver;
+	__u8  reserved[2];
+};
+
+struct nes_alloc_pd_resp {
+	__u32 pd_id;
+	__u32 mmap_db_index;
+};
+
+struct nes_create_cq_req {
+	__u64 user_cq_buffer;
+	__u32 mcrqf;
+	__u8 reserved[4];
+};
+
+struct nes_create_qp_req {
+	__u64 user_wqe_buffers;
+};
+
+enum iwnes_memreg_type {
+	IWNES_MEMREG_TYPE_MEM = 0x0000,
+	IWNES_MEMREG_TYPE_QP = 0x0001,
+	IWNES_MEMREG_TYPE_CQ = 0x0002,
+	IWNES_MEMREG_TYPE_MW = 0x0003,
+	IWNES_MEMREG_TYPE_FMR = 0x0004,
+};
+
+struct nes_mem_reg_req {
+	__u32 reg_type;	/* indicates if id is memory, QP or CQ */
+	__u32 reserved;
+};
+
+struct nes_create_cq_resp {
+	__u32 cq_id;
+	__u32 cq_size;
+	__u32 mmap_db_index;
+	__u32 reserved;
+};
+
+struct nes_create_qp_resp {
+	__u32 qp_id;
+	__u32 actual_sq_size;
+	__u32 actual_rq_size;
+	__u32 mmap_sq_db_index;
+	__u32 mmap_rq_db_index;
+	__u32 nes_drv_opt;
+};
+
+#endif				/* NES_USER_H */
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
new file mode 100644
index 0000000..c4ec6ac
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -0,0 +1,917 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include "nes.h"
+
+
+
+static u16 nes_read16_eeprom(void __iomem *addr, u16 offset);
+
+u32 mh_detected;
+u32 mh_pauses_sent;
+
+/**
+ * nes_read_eeprom_values -
+ */
+int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesadapter)
+{
+	u32 mac_addr_low;
+	u16 mac_addr_high;
+	u16 eeprom_data;
+	u16 eeprom_offset;
+	u16 next_section_address;
+	u16 sw_section_ver;
+	u8  major_ver = 0;
+	u8  minor_ver = 0;
+
+	/* TODO: deal with EEPROM endian issues */
+	if (nesadapter->firmware_eeprom_offset == 0) {
+		/* Read the EEPROM Parameters */
+		eeprom_data = nes_read16_eeprom(nesdev->regs, 0);
+		nes_debug(NES_DBG_HW, "EEPROM Offset 0  = 0x%04X\n", eeprom_data);
+		eeprom_offset = 2 + (((eeprom_data & 0x007f) << 3) <<
+				((eeprom_data & 0x0080) >> 7));
+		nes_debug(NES_DBG_HW, "Firmware Offset = 0x%04X\n", eeprom_offset);
+		nesadapter->firmware_eeprom_offset = eeprom_offset;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4);
+		if (eeprom_data != 0x5746) {
+			nes_debug(NES_DBG_HW, "Not a valid Firmware Image = 0x%04X\n", eeprom_data);
+			return -1;
+		}
+
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+		nes_debug(NES_DBG_HW, "EEPROM Offset %u  = 0x%04X\n",
+				eeprom_offset + 2, eeprom_data);
+		eeprom_offset += ((eeprom_data & 0x00ff) << 3) << ((eeprom_data & 0x0100) >> 8);
+		nes_debug(NES_DBG_HW, "Software Offset = 0x%04X\n", eeprom_offset);
+		nesadapter->software_eeprom_offset = eeprom_offset;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4);
+		if (eeprom_data != 0x5753) {
+			printk("Not a valid Software Image = 0x%04X\n", eeprom_data);
+			return -1;
+		}
+		sw_section_ver = nes_read16_eeprom(nesdev->regs, nesadapter->software_eeprom_offset  + 6);
+		nes_debug(NES_DBG_HW, "Software section version number = 0x%04X\n",
+				sw_section_ver);
+
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+		nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section)  = 0x%04X\n",
+				eeprom_offset + 2, eeprom_data);
+		next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) <<
+				((eeprom_data & 0x0100) >> 8));
+		eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+		if (eeprom_data != 0x414d) {
+			nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n",
+					eeprom_data);
+			goto no_fw_rev;
+		}
+		eeprom_offset = next_section_address;
+
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+		nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section)  = 0x%04X\n",
+				eeprom_offset + 2, eeprom_data);
+		next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) <<
+				((eeprom_data & 0x0100) >> 8));
+		eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+		if (eeprom_data != 0x4f52) {
+			nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x4f52 but was 0x%04X\n",
+					eeprom_data);
+			goto no_fw_rev;
+		}
+		eeprom_offset = next_section_address;
+
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+		nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section)  = 0x%04X\n",
+				eeprom_offset + 2, eeprom_data);
+		next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
+		eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+		if (eeprom_data != 0x5746) {
+			nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5746 but was 0x%04X\n",
+					eeprom_data);
+			goto no_fw_rev;
+		}
+		eeprom_offset = next_section_address;
+
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+		nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section)  = 0x%04X\n",
+				eeprom_offset + 2, eeprom_data);
+		next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
+		eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+		if (eeprom_data != 0x5753) {
+			nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5753 but was 0x%04X\n",
+					eeprom_data);
+			goto no_fw_rev;
+		}
+		eeprom_offset = next_section_address;
+
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+		nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section)  = 0x%04X\n",
+				eeprom_offset + 2, eeprom_data);
+		next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
+		eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+		if (eeprom_data != 0x414d) {
+			nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n",
+					eeprom_data);
+			goto no_fw_rev;
+		}
+		eeprom_offset = next_section_address;
+
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+		nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section)  = 0x%04X\n",
+				eeprom_offset + 2, eeprom_data);
+		next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
+		eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+		if (eeprom_data != 0x464e) {
+			nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x464e but was 0x%04X\n",
+					eeprom_data);
+			goto no_fw_rev;
+		}
+		eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 8);
+		printk(PFX "Firmware version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data);
+		major_ver = (u8)(eeprom_data >> 8);
+		minor_ver = (u8)(eeprom_data);
+
+		if (nes_drv_opt & NES_DRV_OPT_DISABLE_VIRT_WQ) {
+			nes_debug(NES_DBG_HW, "Virtual WQs have been disabled\n");
+		} else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) {
+			nesadapter->virtwq = 1;
+		}
+		nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8))  <<  16) +
+				(u32)((u8)eeprom_data);
+
+no_fw_rev:
+		/* eeprom is valid */
+		eeprom_offset = nesadapter->software_eeprom_offset;
+		eeprom_offset += 8;
+		nesadapter->netdev_max = (u8)nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		mac_addr_high = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		mac_addr_low = (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		mac_addr_low <<= 16;
+		mac_addr_low += (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "Base MAC Address = 0x%04X%08X\n",
+				mac_addr_high, mac_addr_low);
+		nes_debug(NES_DBG_HW, "MAC Address count = %u\n", nesadapter->netdev_max);
+
+		nesadapter->mac_addr_low = mac_addr_low;
+		nesadapter->mac_addr_high = mac_addr_high;
+
+		/* Read the Phy Type array */
+		eeprom_offset += 10;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nesadapter->phy_type[0] = (u8)(eeprom_data >> 8);
+		nesadapter->phy_type[1] = (u8)eeprom_data;
+
+		/* Read the port array */
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nesadapter->phy_type[2] = (u8)(eeprom_data >> 8);
+		nesadapter->phy_type[3] = (u8)eeprom_data;
+		/* port_count is set by soft reset reg */
+		nes_debug(NES_DBG_HW, "port_count = %u, port 0 -> %u, port 1 -> %u,"
+				" port 2 -> %u, port 3 -> %u\n",
+				nesadapter->port_count,
+				nesadapter->phy_type[0], nesadapter->phy_type[1],
+				nesadapter->phy_type[2], nesadapter->phy_type[3]);
+
+		/* Read PD config array */
+		eeprom_offset += 10;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nesadapter->pd_config_size[0] = eeprom_data;
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nesadapter->pd_config_base[0] = eeprom_data;
+		nes_debug(NES_DBG_HW, "PD0 config, size=0x%04x, base=0x%04x\n",
+				nesadapter->pd_config_size[0], nesadapter->pd_config_base[0]);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nesadapter->pd_config_size[1] = eeprom_data;
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nesadapter->pd_config_base[1] = eeprom_data;
+		nes_debug(NES_DBG_HW, "PD1 config, size=0x%04x, base=0x%04x\n",
+				nesadapter->pd_config_size[1], nesadapter->pd_config_base[1]);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nesadapter->pd_config_size[2] = eeprom_data;
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nesadapter->pd_config_base[2] = eeprom_data;
+		nes_debug(NES_DBG_HW, "PD2 config, size=0x%04x, base=0x%04x\n",
+				nesadapter->pd_config_size[2], nesadapter->pd_config_base[2]);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nesadapter->pd_config_size[3] = eeprom_data;
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nesadapter->pd_config_base[3] = eeprom_data;
+		nes_debug(NES_DBG_HW, "PD3 config, size=0x%04x, base=0x%04x\n",
+				nesadapter->pd_config_size[3], nesadapter->pd_config_base[3]);
+
+		/* Read Rx Pool Size */
+		eeprom_offset += 22;   /* 46 */
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		nesadapter->rx_pool_size = (((u32)eeprom_data) << 16) +
+				nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "rx_pool_size = 0x%08X\n", nesadapter->rx_pool_size);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		nesadapter->tx_pool_size = (((u32)eeprom_data) << 16) +
+				nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "tx_pool_size = 0x%08X\n", nesadapter->tx_pool_size);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		nesadapter->rx_threshold = (((u32)eeprom_data) << 16) +
+				nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "rx_threshold = 0x%08X\n", nesadapter->rx_threshold);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		nesadapter->tcp_timer_core_clk_divisor = (((u32)eeprom_data) << 16) +
+				nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "tcp_timer_core_clk_divisor = 0x%08X\n",
+				nesadapter->tcp_timer_core_clk_divisor);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		nesadapter->iwarp_config = (((u32)eeprom_data) << 16) +
+				nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "iwarp_config = 0x%08X\n", nesadapter->iwarp_config);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		nesadapter->cm_config = (((u32)eeprom_data) << 16) +
+				nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "cm_config = 0x%08X\n", nesadapter->cm_config);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		nesadapter->sws_timer_config = (((u32)eeprom_data) << 16) +
+				nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "sws_timer_config = 0x%08X\n", nesadapter->sws_timer_config);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		nesadapter->tcp_config1 = (((u32)eeprom_data) << 16) +
+				nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "tcp_config1 = 0x%08X\n", nesadapter->tcp_config1);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		nesadapter->wqm_wat = (((u32)eeprom_data) << 16) +
+				nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "wqm_wat = 0x%08X\n", nesadapter->wqm_wat);
+
+		eeprom_offset += 2;
+		eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		eeprom_offset += 2;
+		nesadapter->core_clock = (((u32)eeprom_data) << 16) +
+				nes_read16_eeprom(nesdev->regs, eeprom_offset);
+		nes_debug(NES_DBG_HW, "core_clock = 0x%08X\n", nesadapter->core_clock);
+
+		if ((sw_section_ver) && (nesadapter->hw_rev != NE020_REV)) {
+			eeprom_offset += 2;
+			eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+			nesadapter->phy_index[0] = (eeprom_data & 0xff00)>>8;
+			nesadapter->phy_index[1] = eeprom_data & 0x00ff;
+			eeprom_offset += 2;
+			eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+			nesadapter->phy_index[2] = (eeprom_data & 0xff00)>>8;
+			nesadapter->phy_index[3] = eeprom_data & 0x00ff;
+		} else {
+			nesadapter->phy_index[0] = 4;
+			nesadapter->phy_index[1] = 5;
+			nesadapter->phy_index[2] = 6;
+			nesadapter->phy_index[3] = 7;
+		}
+		nes_debug(NES_DBG_HW, "Phy address map = 0 > %u,  1 > %u, 2 > %u, 3 > %u\n",
+			   nesadapter->phy_index[0],nesadapter->phy_index[1],
+			   nesadapter->phy_index[2],nesadapter->phy_index[3]);
+	}
+
+	return 0;
+}
+
+
+/**
+ * nes_read16_eeprom
+ */
+static u16 nes_read16_eeprom(void __iomem *addr, u16 offset)
+{
+	writel(NES_EEPROM_READ_REQUEST + (offset >> 1),
+			(void __iomem *)addr + NES_EEPROM_COMMAND);
+
+	do {
+	} while (readl((void __iomem *)addr + NES_EEPROM_COMMAND) &
+			NES_EEPROM_READ_REQUEST);
+
+	return readw((void __iomem *)addr + NES_EEPROM_DATA);
+}
+
+
+/**
+ * nes_write_1G_phy_reg
+ */
+void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 data)
+{
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u32 u32temp;
+	u32 counter;
+	unsigned long flags;
+
+	spin_lock_irqsave(&nesadapter->phy_lock, flags);
+
+	nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+			0x50020000 | data | ((u32)phy_reg << 18) | ((u32)phy_addr << 23));
+	for (counter = 0; counter < 100 ; counter++) {
+		udelay(30);
+		u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+		if (u32temp & 1) {
+			/* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */
+			nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+			break;
+		}
+	}
+	if (!(u32temp & 1))
+		nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+				u32temp);
+
+	spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+}
+
+
+/**
+ * nes_read_1G_phy_reg
+ * This routine only issues the read, the data must be read
+ * separately.
+ */
+void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 *data)
+{
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u32 u32temp;
+	u32 counter;
+	unsigned long flags;
+
+	/* nes_debug(NES_DBG_PHY, "phy addr = %d, mac_index = %d\n",
+			phy_addr, nesdev->mac_index); */
+	spin_lock_irqsave(&nesadapter->phy_lock, flags);
+
+	nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+			0x60020000 | ((u32)phy_reg << 18) | ((u32)phy_addr << 23));
+	for (counter = 0; counter < 100 ; counter++) {
+		udelay(30);
+		u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+		if (u32temp & 1) {
+			/* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */
+			nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+			break;
+		}
+	}
+	if (!(u32temp & 1)) {
+		nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+				u32temp);
+		*data = 0xffff;
+	} else {
+		*data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+	}
+	spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+}
+
+
+/**
+ * nes_write_10G_phy_reg
+ */
+void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg,
+		u8 phy_addr, u16 data)
+{
+	u32 dev_addr;
+	u32 port_addr;
+	u32 u32temp;
+	u32 counter;
+
+	dev_addr = 1;
+	port_addr = phy_addr;
+
+	/* set address */
+	nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+			0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
+	for (counter = 0; counter < 100 ; counter++) {
+		udelay(30);
+		u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+		if (u32temp & 1) {
+			nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+			break;
+		}
+	}
+	if (!(u32temp & 1))
+		nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+				u32temp);
+
+	/* set data */
+	nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+			0x10020000 | (u32)data | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
+	for (counter = 0; counter < 100 ; counter++) {
+		udelay(30);
+		u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+		if (u32temp & 1) {
+			nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+			break;
+		}
+	}
+	if (!(u32temp & 1))
+		nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+				u32temp);
+}
+
+
+/**
+ * nes_read_10G_phy_reg
+ * This routine only issues the read, the data must be read
+ * separately.
+ */
+void nes_read_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u8 phy_addr)
+{
+	u32 dev_addr;
+	u32 port_addr;
+	u32 u32temp;
+	u32 counter;
+
+	dev_addr = 1;
+	port_addr = phy_addr;
+
+	/* set address */
+	nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+			0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
+	for (counter = 0; counter < 100 ; counter++) {
+		udelay(30);
+		u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+		if (u32temp & 1) {
+			nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+			break;
+		}
+	}
+	if (!(u32temp & 1))
+		nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+				u32temp);
+
+	/* issue read */
+	nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+			0x30020000 | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
+	for (counter = 0; counter < 100 ; counter++) {
+		udelay(30);
+		u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+		if (u32temp & 1) {
+			nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+			break;
+		}
+	}
+	if (!(u32temp & 1))
+		nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+				u32temp);
+}
+
+
+/**
+ * nes_get_cqp_request
+ */
+struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
+{
+	unsigned long flags;
+	struct nes_cqp_request *cqp_request = NULL;
+
+	if (!list_empty(&nesdev->cqp_avail_reqs)) {
+		spin_lock_irqsave(&nesdev->cqp.lock, flags);
+		cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
+				struct nes_cqp_request, list);
+		list_del_init(&cqp_request->list);
+		spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+	} else {
+		cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL);
+		if (cqp_request) {
+			cqp_request->dynamic = 1;
+			INIT_LIST_HEAD(&cqp_request->list);
+		}
+	}
+
+	if (cqp_request) {
+		init_waitqueue_head(&cqp_request->waitq);
+		cqp_request->waiting = 0;
+		cqp_request->request_done = 0;
+		cqp_request->callback = 0;
+		init_waitqueue_head(&cqp_request->waitq);
+		nes_debug(NES_DBG_CQP, "Got cqp request %p from the available list \n",
+				cqp_request);
+	} else
+		printk(KERN_ERR PFX "%s: Could not allocated a CQP request.\n",
+			   __FUNCTION__);
+
+	return cqp_request;
+}
+
+
+/**
+ * nes_post_cqp_request
+ */
+void nes_post_cqp_request(struct nes_device *nesdev,
+		struct nes_cqp_request *cqp_request, int ring_doorbell)
+{
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	unsigned long flags;
+	u32 cqp_head;
+	u64 u64temp;
+
+	spin_lock_irqsave(&nesdev->cqp.lock, flags);
+
+	if (((((nesdev->cqp.sq_tail+(nesdev->cqp.sq_size*2))-nesdev->cqp.sq_head) &
+			(nesdev->cqp.sq_size - 1)) != 1)
+			&& (list_empty(&nesdev->cqp_pending_reqs))) {
+		cqp_head = nesdev->cqp.sq_head++;
+		nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+		cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+		memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
+		barrier();
+		u64temp = (unsigned long)cqp_request;
+		set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_SCRATCH_LOW_IDX,
+				    u64temp);
+		nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ,"
+				" request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u,"
+				" waiting = %d, refcount = %d.\n",
+				le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f,
+				le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request,
+				nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
+				cqp_request->waiting, atomic_read(&cqp_request->refcount));
+		barrier();
+		if (ring_doorbell) {
+			/* Ring doorbell (1 WQEs) */
+			nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
+		}
+
+		barrier();
+	} else {
+		nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X), line 1 = 0x%08X"
+				" put on the pending queue.\n",
+				cqp_request,
+				le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f,
+				le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_ID_IDX]));
+		list_add_tail(&cqp_request->list, &nesdev->cqp_pending_reqs);
+	}
+
+	spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+
+	return;
+}
+
+
+/**
+ * nes_arp_table
+ */
+int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 action)
+{
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	int arp_index;
+	int err = 0;
+
+	for (arp_index = 0; (u32) arp_index < nesadapter->arp_table_size; arp_index++) {
+		if (nesadapter->arp_table[arp_index].ip_addr == ip_addr)
+			break;
+	}
+
+	if (action == NES_ARP_ADD) {
+		if (arp_index != nesadapter->arp_table_size) {
+			return -1;
+		}
+
+		arp_index = 0;
+		err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps,
+				nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index);
+		if (err) {
+			nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err);
+			return err;
+		}
+		nes_debug(NES_DBG_NETDEV, "ADD, arp_index=%d\n", arp_index);
+
+		nesadapter->arp_table[arp_index].ip_addr = ip_addr;
+		memcpy(nesadapter->arp_table[arp_index].mac_addr, mac_addr, ETH_ALEN);
+		return arp_index;
+	}
+
+	/* DELETE or RESOLVE */
+	if (arp_index == nesadapter->arp_table_size) {
+		nes_debug(NES_DBG_NETDEV, "mac address not in ARP table - cannot delete or resolve\n");
+		return -1;
+	}
+
+	if (action == NES_ARP_RESOLVE) {
+		nes_debug(NES_DBG_NETDEV, "RESOLVE, arp_index=%d\n", arp_index);
+		return arp_index;
+	}
+
+	if (action == NES_ARP_DELETE) {
+		nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
+		nesadapter->arp_table[arp_index].ip_addr = 0;
+		memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN);
+		nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
+		return arp_index;
+	}
+
+	return -1;
+}
+
+
+/**
+ * nes_mh_fix
+ */
+void nes_mh_fix(unsigned long parm)
+{
+	unsigned long flags;
+	struct nes_device *nesdev = (struct nes_device *)parm;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_vnic *nesvnic;
+	u32 used_chunks_tx;
+	u32 temp_used_chunks_tx;
+	u32 temp_last_used_chunks_tx;
+	u32 used_chunks_mask;
+	u32 mac_tx_frames_low;
+	u32 mac_tx_frames_high;
+	u32 mac_tx_pauses;
+	u32 serdes_status;
+	u32 reset_value;
+	u32 tx_control;
+	u32 tx_config;
+	u32 tx_pause_quanta;
+	u32 rx_control;
+	u32 rx_config;
+	u32 mac_exact_match;
+	u32 mpp_debug;
+	u32 i=0;
+	u32 chunks_tx_progress = 0;
+
+	spin_lock_irqsave(&nesadapter->phy_lock, flags);
+	if ((nesadapter->mac_sw_state[0] != NES_MAC_SW_IDLE) || (nesadapter->mac_link_down[0])) {
+		spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+		goto no_mh_work;
+	}
+	nesadapter->mac_sw_state[0] = NES_MAC_SW_MH;
+	spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+	do {
+		mac_tx_frames_low = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_LOW);
+		mac_tx_frames_high = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_HIGH);
+		mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
+		used_chunks_tx = nes_read_indexed(nesdev, NES_IDX_USED_CHUNKS_TX);
+		nesdev->mac_pause_frames_sent += mac_tx_pauses;
+		used_chunks_mask = 0;
+		temp_used_chunks_tx = used_chunks_tx;
+		temp_last_used_chunks_tx = nesdev->last_used_chunks_tx;
+
+		if (nesdev->netdev[0]) {
+			nesvnic = netdev_priv(nesdev->netdev[0]);
+		} else {
+			break;
+		}
+
+		for (i=0; i<4; i++) {
+			used_chunks_mask <<= 8;
+			if (nesvnic->qp_nic_index[i] != 0xff) {
+				used_chunks_mask |= 0xff;
+				if ((temp_used_chunks_tx&0xff)<(temp_last_used_chunks_tx&0xff)) {
+					chunks_tx_progress = 1;
+				}
+			}
+			temp_used_chunks_tx >>= 8;
+			temp_last_used_chunks_tx >>= 8;
+		}
+		if ((mac_tx_frames_low) || (mac_tx_frames_high) ||
+			(!(used_chunks_tx&used_chunks_mask)) ||
+			(!(nesdev->last_used_chunks_tx&used_chunks_mask)) ||
+			(chunks_tx_progress) ) {
+			nesdev->last_used_chunks_tx = used_chunks_tx;
+			break;
+		}
+		nesdev->last_used_chunks_tx = used_chunks_tx;
+		barrier();
+
+		nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000005);
+		mh_pauses_sent++;
+		mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
+		if (mac_tx_pauses) {
+			nesdev->mac_pause_frames_sent += mac_tx_pauses;
+			break;
+		}
+
+		tx_control = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONTROL);
+		tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
+		tx_pause_quanta = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA);
+		rx_control = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONTROL);
+		rx_config = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONFIG);
+		mac_exact_match = nes_read_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM);
+		mpp_debug = nes_read_indexed(nesdev, NES_IDX_MPP_DEBUG);
+
+		/* one last ditch effort to avoid a false positive */
+		mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
+		if (mac_tx_pauses) {
+			nesdev->last_mac_tx_pauses = nesdev->mac_pause_frames_sent;
+			nes_debug(NES_DBG_HW, "failsafe caught slow outbound pause\n");
+			break;
+		}
+		mh_detected++;
+
+		nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000000);
+		nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, 0x00000000);
+		reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
+
+		nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value | 0x0000001d);
+
+		while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
+				& 0x00000040) != 0x00000040) && (i++ < 5000)) {
+			/* mdelay(1); */
+		}
+
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
+		serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0);
+
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000);
+		if (nesadapter->OneG_Mode) {
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222);
+		} else {
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222);
+		}
+		serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_STATUS0);
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff);
+
+		nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, tx_control);
+		nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+		nes_write_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA, tx_pause_quanta);
+		nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONTROL, rx_control);
+		nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONFIG, rx_config);
+		nes_write_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM, mac_exact_match);
+		nes_write_indexed(nesdev, NES_IDX_MPP_DEBUG, mpp_debug);
+
+	} while (0);
+
+	nesadapter->mac_sw_state[0] = NES_MAC_SW_IDLE;
+no_mh_work:
+	nesdev->nesadapter->mh_timer.expires = jiffies + (HZ/5);
+	add_timer(&nesdev->nesadapter->mh_timer);
+}
+
+/**
+ * nes_clc
+ */
+void nes_clc(unsigned long parm)
+{
+	unsigned long flags;
+	struct nes_device *nesdev = (struct nes_device *)parm;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+	spin_lock_irqsave(&nesadapter->phy_lock, flags);
+    nesadapter->link_interrupt_count[0] = 0;
+    nesadapter->link_interrupt_count[1] = 0;
+    nesadapter->link_interrupt_count[2] = 0;
+    nesadapter->link_interrupt_count[3] = 0;
+	spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+
+	nesadapter->lc_timer.expires = jiffies + 3600 * HZ;  /* 1 hour */
+	add_timer(&nesadapter->lc_timer);
+}
+
+
+/**
+ * nes_dump_mem
+ */
+void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length)
+{
+	char  xlate[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+		'a', 'b', 'c', 'd', 'e', 'f'};
+	char  *ptr;
+	char  hex_buf[80];
+	char  ascii_buf[20];
+	int   num_char;
+	int   num_ascii;
+	int   num_hex;
+
+	if (!(nes_debug_level & dump_debug_level)) {
+		return;
+	}
+
+	ptr = addr;
+	if (length > 0x100) {
+		nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100);
+		length = 0x100;
+	}
+	nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", ptr, length, length);
+
+	memset(ascii_buf, 0, 20);
+	memset(hex_buf, 0, 80);
+
+	num_ascii = 0;
+	num_hex = 0;
+	for (num_char = 0; num_char < length; num_char++) {
+		if (num_ascii == 8) {
+			ascii_buf[num_ascii++] = ' ';
+			hex_buf[num_hex++] = '-';
+			hex_buf[num_hex++] = ' ';
+		}
+
+		if (*ptr < 0x20 || *ptr > 0x7e)
+			ascii_buf[num_ascii++] = '.';
+		else
+			ascii_buf[num_ascii++] = *ptr;
+		hex_buf[num_hex++] = xlate[((*ptr & 0xf0) >> 4)];
+		hex_buf[num_hex++] = xlate[*ptr & 0x0f];
+		hex_buf[num_hex++] = ' ';
+		ptr++;
+
+		if (num_ascii >= 17) {
+			/* output line and reset */
+			nes_debug(dump_debug_level, "   %s |  %s\n", hex_buf, ascii_buf);
+			memset(ascii_buf, 0, 20);
+			memset(hex_buf, 0, 80);
+			num_ascii = 0;
+			num_hex = 0;
+		}
+	}
+
+	/* output the rest */
+	if (num_ascii) {
+		while (num_ascii < 17) {
+			if (num_ascii == 8) {
+				hex_buf[num_hex++] = ' ';
+				hex_buf[num_hex++] = ' ';
+			}
+			hex_buf[num_hex++] = ' ';
+			hex_buf[num_hex++] = ' ';
+			hex_buf[num_hex++] = ' ';
+			num_ascii++;
+		}
+
+		nes_debug(dump_debug_level, "   %s |  %s\n", hex_buf, ascii_buf);
+	}
+}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
new file mode 100644
index 0000000..ffd4b42
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -0,0 +1,3917 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/highmem.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "nes.h"
+
+#include <rdma/ib_umem.h>
+
+atomic_t mod_qp_timouts;
+atomic_t qps_created;
+atomic_t sw_qps_destroyed;
+
+
+/**
+ * nes_alloc_mw
+ */
+static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
+	unsigned long flags;
+	struct nes_pd *nespd = to_nespd(ibpd);
+	struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_cqp_request *cqp_request;
+	struct nes_mr *nesmr;
+	struct ib_mw *ibmw;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	int ret;
+	u32 stag;
+	u32 stag_index = 0;
+	u32 next_stag_index = 0;
+	u32 driver_key = 0;
+	u8 stag_key = 0;
+
+	get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+	stag_key = (u8)next_stag_index;
+
+	driver_key = 0;
+
+	next_stag_index >>= 8;
+	next_stag_index %= nesadapter->max_mr;
+
+	ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
+			nesadapter->max_mr, &stag_index, &next_stag_index);
+	if (ret) {
+		return ERR_PTR(ret);
+	}
+
+	nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+	if (!nesmr) {
+		nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	stag = stag_index << 8;
+	stag |= driver_key;
+	stag += (u32)stag_key;
+
+	nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n",
+			stag, stag_index);
+
+	/* Register the region with the adapter */
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		kfree(nesmr);
+		nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	cqp_request->waiting = 1;
+	cqp_wqe = &cqp_request->cqp_wqe;
+
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
+			cpu_to_le32( NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_RIGHTS_REMOTE_READ |
+			NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_VA_TO |
+			NES_CQP_STAG_REM_ACC_EN);
+
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff));
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+
+	atomic_set(&cqp_request->refcount, 2);
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+	/* Wait for CQP */
+	ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+			NES_EVENT_TIMEOUT);
+	nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
+			" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+			stag, ret, cqp_request->major_code, cqp_request->minor_code);
+	if ((!ret) || (cqp_request->major_code)) {
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+		kfree(nesmr);
+		nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+		if (!ret) {
+			return ERR_PTR(-ETIME);
+		} else {
+			return ERR_PTR(-ENOMEM);
+		}
+	} else {
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+	}
+
+	nesmr->ibmw.rkey = stag;
+	nesmr->mode = IWNES_MEMREG_TYPE_MW;
+	ibmw = &nesmr->ibmw;
+	nesmr->pbl_4k = 0;
+	nesmr->pbls_used = 0;
+
+	return ibmw;
+}
+
+
+/**
+ * nes_dealloc_mw
+ */
+static int nes_dealloc_mw(struct ib_mw *ibmw)
+{
+	struct nes_mr *nesmr = to_nesmw(ibmw);
+	struct nes_vnic *nesvnic = to_nesvnic(ibmw->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_cqp_request *cqp_request;
+	int err = 0;
+	unsigned long flags;
+	int ret;
+
+	/* Deallocate the window with the adapter */
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
+		return -ENOMEM;
+	}
+	cqp_request->waiting = 1;
+	cqp_wqe = &cqp_request->cqp_wqe;
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, NES_CQP_DEALLOCATE_STAG);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey);
+
+	atomic_set(&cqp_request->refcount, 2);
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+	/* Wait for CQP */
+	nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n",
+			ibmw->rkey);
+	ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
+			NES_EVENT_TIMEOUT);
+	nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u,"
+			" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+			ret, cqp_request->major_code, cqp_request->minor_code);
+	if ((!ret) || (cqp_request->major_code)) {
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+		if (!ret) {
+			err = -ETIME;
+		} else {
+			err = -EIO;
+		}
+	} else {
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+	}
+
+	nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+			(ibmw->rkey & 0x0fffff00) >> 8);
+	kfree(nesmr);
+
+	return err;
+}
+
+
+/**
+ * nes_bind_mw
+ */
+static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
+		struct ib_mw_bind *ibmw_bind)
+{
+	u64 u64temp;
+	struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	/* struct nes_mr *nesmr = to_nesmw(ibmw); */
+	struct nes_qp *nesqp = to_nesqp(ibqp);
+	struct nes_hw_qp_wqe *wqe;
+	unsigned long flags = 0;
+	u32 head;
+	u32 wqe_misc = 0;
+	u32 qsize;
+
+	if (nesqp->ibqp_state > IB_QPS_RTS)
+		return -EINVAL;
+
+		spin_lock_irqsave(&nesqp->lock, flags);
+
+	head = nesqp->hwqp.sq_head;
+	qsize = nesqp->hwqp.sq_tail;
+
+	/* Check for SQ overflow */
+	if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+		return -EINVAL;
+	}
+
+	wqe = &nesqp->hwqp.sq_vbase[head];
+	/* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */
+	nes_fill_init_qp_wqe(wqe, nesqp, head);
+	u64temp = ibmw_bind->wr_id;
+	set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp);
+	wqe_misc = NES_IWARP_SQ_OP_BIND;
+
+	wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+
+	if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
+		wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+
+	if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_WRITE) {
+		wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
+	}
+	if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_READ) {
+		wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
+	}
+
+	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
+	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, ibmw_bind->mr->lkey);
+	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
+	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
+			ibmw_bind->length);
+	wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
+	u64temp = (u64)ibmw_bind->addr;
+	set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
+
+	head++;
+	if (head >= qsize)
+		head = 0;
+
+	nesqp->hwqp.sq_head = head;
+	barrier();
+
+	nes_write32(nesdev->regs+NES_WQE_ALLOC,
+			(1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
+
+		spin_unlock_irqrestore(&nesqp->lock, flags);
+
+	return 0;
+}
+
+
+/**
+ * nes_alloc_fmr
+ */
+static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
+		int ibmr_access_flags,
+		struct ib_fmr_attr *ibfmr_attr)
+{
+	unsigned long flags;
+	struct nes_pd *nespd = to_nespd(ibpd);
+	struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_fmr *nesfmr;
+	struct nes_cqp_request *cqp_request;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	int ret;
+	u32 stag;
+	u32 stag_index = 0;
+	u32 next_stag_index = 0;
+	u32 driver_key = 0;
+	u32 opcode = 0;
+	u8 stag_key = 0;
+	int i=0;
+	struct nes_vpbl vpbl;
+
+	get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+	stag_key = (u8)next_stag_index;
+
+	driver_key = 0;
+
+	next_stag_index >>= 8;
+	next_stag_index %= nesadapter->max_mr;
+
+	ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
+			nesadapter->max_mr, &stag_index, &next_stag_index);
+	if (ret) {
+		goto failed_resource_alloc;
+	}
+
+	nesfmr = kzalloc(sizeof(*nesfmr), GFP_KERNEL);
+	if (!nesfmr) {
+		ret = -ENOMEM;
+		goto failed_fmr_alloc;
+	}
+
+	nesfmr->nesmr.mode = IWNES_MEMREG_TYPE_FMR;
+	if (ibfmr_attr->max_pages == 1) {
+		/* use zero length PBL */
+		nesfmr->nesmr.pbl_4k = 0;
+		nesfmr->nesmr.pbls_used = 0;
+	} else if (ibfmr_attr->max_pages <= 32) {
+		/* use PBL 256 */
+		nesfmr->nesmr.pbl_4k = 0;
+		nesfmr->nesmr.pbls_used = 1;
+	} else if (ibfmr_attr->max_pages <= 512) {
+		/* use 4K PBLs */
+		nesfmr->nesmr.pbl_4k = 1;
+		nesfmr->nesmr.pbls_used = 1;
+	} else {
+		/* use two level 4K PBLs */
+		/* add support for two level 256B PBLs */
+		nesfmr->nesmr.pbl_4k = 1;
+		nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages >> 9) +
+				((ibfmr_attr->max_pages & 511) ? 1 : 0);
+	}
+	/* Register the region with the adapter */
+	spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+
+	/* track PBL resources */
+	if (nesfmr->nesmr.pbls_used != 0) {
+		if (nesfmr->nesmr.pbl_4k) {
+			if (nesfmr->nesmr.pbls_used > nesadapter->free_4kpbl) {
+				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				ret = -ENOMEM;
+				goto failed_vpbl_alloc;
+			} else {
+				nesadapter->free_4kpbl -= nesfmr->nesmr.pbls_used;
+			}
+		} else {
+			if (nesfmr->nesmr.pbls_used > nesadapter->free_256pbl) {
+				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				ret = -ENOMEM;
+				goto failed_vpbl_alloc;
+			} else {
+				nesadapter->free_256pbl -= nesfmr->nesmr.pbls_used;
+			}
+		}
+	}
+
+	/* one level pbl */
+	if (nesfmr->nesmr.pbls_used == 0) {
+		nesfmr->root_vpbl.pbl_vbase = NULL;
+		nes_debug(NES_DBG_MR,  "zero level pbl \n");
+	} else if (nesfmr->nesmr.pbls_used == 1) {
+		/* can change it to kmalloc & dma_map_single */
+		nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+				&nesfmr->root_vpbl.pbl_pbase);
+		if (!nesfmr->root_vpbl.pbl_vbase) {
+			spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+			ret = -ENOMEM;
+			goto failed_vpbl_alloc;
+		}
+		nesfmr->leaf_pbl_cnt = 0;
+		nes_debug(NES_DBG_MR, "one level pbl, root_vpbl.pbl_vbase=%p \n",
+				nesfmr->root_vpbl.pbl_vbase);
+	}
+	/* two level pbl */
+	else {
+		nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
+				&nesfmr->root_vpbl.pbl_pbase);
+		if (!nesfmr->root_vpbl.pbl_vbase) {
+			spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+			ret = -ENOMEM;
+			goto failed_vpbl_alloc;
+		}
+
+		nesfmr->root_vpbl.leaf_vpbl = kzalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
+		if (!nesfmr->root_vpbl.leaf_vpbl) {
+			spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+			ret = -ENOMEM;
+			goto failed_leaf_vpbl_alloc;
+		}
+
+		nesfmr->leaf_pbl_cnt = nesfmr->nesmr.pbls_used-1;
+		nes_debug(NES_DBG_MR, "two level pbl, root_vpbl.pbl_vbase=%p"
+				" leaf_pbl_cnt=%d root_vpbl.leaf_vpbl=%p\n",
+				nesfmr->root_vpbl.pbl_vbase, nesfmr->leaf_pbl_cnt, nesfmr->root_vpbl.leaf_vpbl);
+
+		for (i=0; i<nesfmr->leaf_pbl_cnt; i++)
+			nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase = NULL;
+
+		for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
+			vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+					&vpbl.pbl_pbase);
+
+			if (!vpbl.pbl_vbase) {
+				ret = -ENOMEM;
+				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				goto failed_leaf_vpbl_pages_alloc;
+			}
+
+			nesfmr->root_vpbl.pbl_vbase[i].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
+			nesfmr->root_vpbl.pbl_vbase[i].pa_high = cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
+			nesfmr->root_vpbl.leaf_vpbl[i] = vpbl;
+
+			nes_debug(NES_DBG_MR, "pbase_low=0x%x, pbase_high=0x%x, vpbl=%p\n",
+					nesfmr->root_vpbl.pbl_vbase[i].pa_low,
+					nesfmr->root_vpbl.pbl_vbase[i].pa_high,
+					&nesfmr->root_vpbl.leaf_vpbl[i]);
+		}
+	}
+	nesfmr->ib_qp = NULL;
+	nesfmr->access_rights =0;
+
+	stag = stag_index << 8;
+	stag |= driver_key;
+	stag += (u32)stag_key;
+
+	spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
+		ret = -ENOMEM;
+		goto failed_leaf_vpbl_pages_alloc;
+	}
+	cqp_request->waiting = 1;
+	cqp_wqe = &cqp_request->cqp_wqe;
+
+	nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n",
+			stag, stag_index);
+
+	opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
+
+	if (nesfmr->nesmr.pbl_4k == 1)
+		opcode |= NES_CQP_STAG_PBL_BLK_SIZE;
+
+	if (ibmr_access_flags & IB_ACCESS_REMOTE_WRITE) {
+		opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE |
+				NES_CQP_STAG_RIGHTS_LOCAL_WRITE | NES_CQP_STAG_REM_ACC_EN;
+		nesfmr->access_rights |=
+				NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_RIGHTS_LOCAL_WRITE |
+				NES_CQP_STAG_REM_ACC_EN;
+	}
+
+	if (ibmr_access_flags & IB_ACCESS_REMOTE_READ) {
+		opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ |
+				NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_REM_ACC_EN;
+		nesfmr->access_rights |=
+				NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ |
+				NES_CQP_STAG_REM_ACC_EN;
+	}
+
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff));
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+
+	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] =
+			cpu_to_le32((nesfmr->nesmr.pbls_used>1) ?
+			(nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used);
+
+	atomic_set(&cqp_request->refcount, 2);
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+	/* Wait for CQP */
+	ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+			NES_EVENT_TIMEOUT);
+	nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
+			" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+			stag, ret, cqp_request->major_code, cqp_request->minor_code);
+
+	if ((!ret) || (cqp_request->major_code)) {
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+		ret = (!ret) ? -ETIME : -EIO;
+		goto failed_leaf_vpbl_pages_alloc;
+	} else {
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+	}
+
+	nesfmr->nesmr.ibfmr.lkey = stag;
+	nesfmr->nesmr.ibfmr.rkey = stag;
+	nesfmr->attr = *ibfmr_attr;
+
+	return &nesfmr->nesmr.ibfmr;
+
+	failed_leaf_vpbl_pages_alloc:
+	/* unroll all allocated pages */
+	for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
+		if (nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase) {
+			pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
+					nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
+		}
+	}
+	if (nesfmr->root_vpbl.leaf_vpbl)
+		kfree(nesfmr->root_vpbl.leaf_vpbl);
+
+	failed_leaf_vpbl_alloc:
+	if (nesfmr->leaf_pbl_cnt == 0) {
+		if (nesfmr->root_vpbl.pbl_vbase)
+			pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
+					nesfmr->root_vpbl.pbl_pbase);
+	} else
+		pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
+				nesfmr->root_vpbl.pbl_pbase);
+
+	failed_vpbl_alloc:
+	kfree(nesfmr);
+
+	failed_fmr_alloc:
+	nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+
+	failed_resource_alloc:
+	return ERR_PTR(ret);
+}
+
+
+/**
+ * nes_dealloc_fmr
+ */
+static int nes_dealloc_fmr(struct ib_fmr *ibfmr)
+{
+	struct nes_mr *nesmr = to_nesmr_from_ibfmr(ibfmr);
+	struct nes_fmr *nesfmr = to_nesfmr(nesmr);
+	struct nes_vnic *nesvnic = to_nesvnic(ibfmr->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_mr temp_nesmr = *nesmr;
+	int i = 0;
+
+	temp_nesmr.ibmw.device = ibfmr->device;
+	temp_nesmr.ibmw.pd = ibfmr->pd;
+	temp_nesmr.ibmw.rkey = ibfmr->rkey;
+	temp_nesmr.ibmw.uobject = NULL;
+
+	/* free the resources */
+	if (nesfmr->leaf_pbl_cnt == 0) {
+		/* single PBL case */
+		if (nesfmr->root_vpbl.pbl_vbase)
+			pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
+					nesfmr->root_vpbl.pbl_pbase);
+	} else {
+		for (i = 0; i < nesfmr->leaf_pbl_cnt; i++) {
+			pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
+					nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
+		}
+		kfree(nesfmr->root_vpbl.leaf_vpbl);
+		pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
+				nesfmr->root_vpbl.pbl_pbase);
+	}
+
+	return nes_dealloc_mw(&temp_nesmr.ibmw);
+}
+
+
+/**
+ * nes_map_phys_fmr
+ */
+static int nes_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+		int list_len, u64 iova)
+{
+	return 0;
+}
+
+
+/**
+ * nes_unmap_frm
+ */
+static int nes_unmap_fmr(struct list_head *ibfmr_list)
+{
+	return 0;
+}
+
+
+
+/**
+ * nes_query_device
+ */
+static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+{
+	struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_ib_device *nesibdev = nesvnic->nesibdev;
+
+	memset(props, 0, sizeof(*props));
+	memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6);
+
+	props->fw_ver = nesdev->nesadapter->fw_ver;
+	props->device_cap_flags = nesdev->nesadapter->device_cap_flags;
+	props->vendor_id = nesdev->nesadapter->vendor_id;
+	props->vendor_part_id = nesdev->nesadapter->vendor_part_id;
+	props->hw_ver = nesdev->nesadapter->hw_rev;
+	props->max_mr_size = 0x80000000;
+	props->max_qp = nesibdev->max_qp;
+	props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
+	props->max_sge = nesdev->nesadapter->max_sge;
+	props->max_cq = nesibdev->max_cq;
+	props->max_cqe = nesdev->nesadapter->max_cqe - 1;
+	props->max_mr = nesibdev->max_mr;
+	props->max_mw = nesibdev->max_mr;
+	props->max_pd = nesibdev->max_pd;
+	props->max_sge_rd = 1;
+	switch (nesdev->nesadapter->max_irrq_wr) {
+		case 0:
+			props->max_qp_rd_atom = 1;
+			break;
+		case 1:
+			props->max_qp_rd_atom = 4;
+			break;
+		case 2:
+			props->max_qp_rd_atom = 16;
+			break;
+		case 3:
+			props->max_qp_rd_atom = 32;
+			break;
+		default:
+			props->max_qp_rd_atom = 0;
+	}
+	props->max_qp_init_rd_atom = props->max_qp_wr;
+	props->atomic_cap = IB_ATOMIC_NONE;
+	props->max_map_per_fmr = 1;
+
+	return 0;
+}
+
+
+/**
+ * nes_query_port
+ */
+static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
+{
+	memset(props, 0, sizeof(*props));
+
+	props->max_mtu = IB_MTU_2048;
+	props->active_mtu = IB_MTU_2048;
+	props->lid = 1;
+	props->lmc = 0;
+	props->sm_lid = 0;
+	props->sm_sl = 0;
+	props->state = IB_PORT_ACTIVE;
+	props->phys_state = 0;
+	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+			IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->qkey_viol_cntr = 0;
+	props->active_width = IB_WIDTH_4X;
+	props->active_speed = 1;
+	props->max_msg_sz = 0x80000000;
+
+	return 0;
+}
+
+
+/**
+ * nes_modify_port
+ */
+static int nes_modify_port(struct ib_device *ibdev, u8 port,
+		int port_modify_mask, struct ib_port_modify *props)
+{
+	return 0;
+}
+
+
+/**
+ * nes_query_pkey
+ */
+static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+	*pkey = 0;
+	return 0;
+}
+
+
+/**
+ * nes_query_gid
+ */
+static int nes_query_gid(struct ib_device *ibdev, u8 port,
+		int index, union ib_gid *gid)
+{
+	struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+
+	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+	memcpy(&(gid->raw[0]), nesvnic->netdev->dev_addr, 6);
+
+	return 0;
+}
+
+
+/**
+ * nes_alloc_ucontext - Allocate the user context data structure. This keeps track
+ * of all objects associated with a particular user-mode client.
+ */
+static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev,
+		struct ib_udata *udata)
+{
+	struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_alloc_ucontext_req req;
+	struct nes_alloc_ucontext_resp uresp;
+	struct nes_ucontext *nes_ucontext;
+	struct nes_ib_device *nesibdev = nesvnic->nesibdev;
+
+
+	if (ib_copy_from_udata(&req, udata, sizeof(struct nes_alloc_ucontext_req))) {
+		printk(KERN_ERR PFX "Invalid structure size on allocate user context.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (req.userspace_ver != NES_ABI_USERSPACE_VER) {
+		printk(KERN_ERR PFX "Invalid userspace driver version detected. Detected version %d, should be %d\n",
+			req.userspace_ver, NES_ABI_USERSPACE_VER);
+		return ERR_PTR(-EINVAL);
+	}
+
+
+	memset(&uresp, 0, sizeof uresp);
+
+	uresp.max_qps = nesibdev->max_qp;
+	uresp.max_pds = nesibdev->max_pd;
+	uresp.wq_size = nesdev->nesadapter->max_qp_wr * 2;
+	uresp.virtwq = nesadapter->virtwq;
+	uresp.kernel_ver = NES_ABI_KERNEL_VER;
+
+	nes_ucontext = kzalloc(sizeof *nes_ucontext, GFP_KERNEL);
+	if (!nes_ucontext)
+		return ERR_PTR(-ENOMEM);
+
+	nes_ucontext->nesdev = nesdev;
+	nes_ucontext->mmap_wq_offset = uresp.max_pds;
+	nes_ucontext->mmap_cq_offset = nes_ucontext->mmap_wq_offset +
+			((sizeof(struct nes_hw_qp_wqe) * uresp.max_qps * 2) + PAGE_SIZE-1) /
+			PAGE_SIZE;
+
+
+	if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+		kfree(nes_ucontext);
+		return ERR_PTR(-EFAULT);
+	}
+
+	INIT_LIST_HEAD(&nes_ucontext->cq_reg_mem_list);
+	INIT_LIST_HEAD(&nes_ucontext->qp_reg_mem_list);
+	atomic_set(&nes_ucontext->usecnt, 1);
+	return &nes_ucontext->ibucontext;
+}
+
+
+/**
+ * nes_dealloc_ucontext
+ */
+static int nes_dealloc_ucontext(struct ib_ucontext *context)
+{
+	/* struct nes_vnic *nesvnic = to_nesvnic(context->device); */
+	/* struct nes_device *nesdev = nesvnic->nesdev; */
+	struct nes_ucontext *nes_ucontext = to_nesucontext(context);
+
+	if (!atomic_dec_and_test(&nes_ucontext->usecnt))
+	  return 0;
+	kfree(nes_ucontext);
+	return 0;
+}
+
+
+/**
+ * nes_mmap
+ */
+static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	unsigned long index;
+	struct nes_vnic *nesvnic = to_nesvnic(context->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	/* struct nes_adapter *nesadapter = nesdev->nesadapter; */
+	struct nes_ucontext *nes_ucontext;
+	struct nes_qp *nesqp;
+
+	nes_ucontext = to_nesucontext(context);
+
+
+	if (vma->vm_pgoff >= nes_ucontext->mmap_wq_offset) {
+		index = (vma->vm_pgoff - nes_ucontext->mmap_wq_offset) * PAGE_SIZE;
+		index /= ((sizeof(struct nes_hw_qp_wqe) * nesdev->nesadapter->max_qp_wr * 2) +
+				PAGE_SIZE-1) & (~(PAGE_SIZE-1));
+		if (!test_bit(index, nes_ucontext->allocated_wqs)) {
+			nes_debug(NES_DBG_MMAP, "wq %lu not allocated\n", index);
+			return -EFAULT;
+		}
+		nesqp = nes_ucontext->mmap_nesqp[index];
+		if (nesqp == NULL) {
+			nes_debug(NES_DBG_MMAP, "wq %lu has a NULL QP base.\n", index);
+			return -EFAULT;
+		}
+		if (remap_pfn_range(vma, vma->vm_start,
+				virt_to_phys(nesqp->hwqp.sq_vbase) >> PAGE_SHIFT,
+				vma->vm_end - vma->vm_start,
+				vma->vm_page_prot)) {
+			nes_debug(NES_DBG_MMAP, "remap_pfn_range failed.\n");
+			return -EAGAIN;
+		}
+		vma->vm_private_data = nesqp;
+		return 0;
+	} else {
+		index = vma->vm_pgoff;
+		if (!test_bit(index, nes_ucontext->allocated_doorbells))
+			return -EFAULT;
+
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		if (io_remap_pfn_range(vma, vma->vm_start,
+				(nesdev->doorbell_start +
+				((nes_ucontext->mmap_db_index[index] - nesdev->base_doorbell_index) * 4096))
+				>> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot))
+			return -EAGAIN;
+		vma->vm_private_data = nes_ucontext;
+		return 0;
+	}
+
+	return -ENOSYS;
+}
+
+
+/**
+ * nes_alloc_pd
+ */
+static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
+		struct ib_ucontext *context, struct ib_udata *udata)
+{
+	struct nes_pd *nespd;
+	struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_ucontext *nesucontext;
+	struct nes_alloc_pd_resp uresp;
+	u32 pd_num = 0;
+	int err;
+
+	nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n",
+			nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context,
+			atomic_read(&nesvnic->netdev->refcnt));
+
+	err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
+			nesadapter->max_pd, &pd_num, &nesadapter->next_pd);
+	if (err) {
+		return ERR_PTR(err);
+	}
+
+	nespd = kzalloc(sizeof (struct nes_pd), GFP_KERNEL);
+	if (!nespd) {
+		nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n",
+			nespd, nesvnic->nesibdev->ibdev.name);
+
+	nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd;
+
+	if (context) {
+		nesucontext = to_nesucontext(context);
+		nespd->mmap_db_index = find_next_zero_bit(nesucontext->allocated_doorbells,
+				NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db);
+		nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n",
+				nespd->mmap_db_index, nespd->pd_id);
+		if (nespd->mmap_db_index > NES_MAX_USER_DB_REGIONS) {
+			nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n");
+			nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
+			kfree(nespd);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		uresp.pd_id = nespd->pd_id;
+		uresp.mmap_db_index = nespd->mmap_db_index;
+		if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) {
+			nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
+			kfree(nespd);
+			return ERR_PTR(-EFAULT);
+		}
+
+		set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells);
+		nesucontext->mmap_db_index[nespd->mmap_db_index] = nespd->pd_id;
+		nesucontext->first_free_db = nespd->mmap_db_index + 1;
+	}
+
+	nes_debug(NES_DBG_PD, "PD%u structure located @%p.\n", nespd->pd_id, nespd);
+	return &nespd->ibpd;
+}
+
+
+/**
+ * nes_dealloc_pd
+ */
+static int nes_dealloc_pd(struct ib_pd *ibpd)
+{
+	struct nes_ucontext *nesucontext;
+	struct nes_pd *nespd = to_nespd(ibpd);
+	struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+	if ((ibpd->uobject) && (ibpd->uobject->context)) {
+		nesucontext = to_nesucontext(ibpd->uobject->context);
+		nes_debug(NES_DBG_PD, "Clearing bit %u from allocated doorbells\n",
+				nespd->mmap_db_index);
+		clear_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells);
+		nesucontext->mmap_db_index[nespd->mmap_db_index] = 0;
+		if (nesucontext->first_free_db > nespd->mmap_db_index) {
+			nesucontext->first_free_db = nespd->mmap_db_index;
+		}
+	}
+
+	nes_debug(NES_DBG_PD, "Deallocating PD%u structure located @%p.\n",
+			nespd->pd_id, nespd);
+	nes_free_resource(nesadapter, nesadapter->allocated_pds,
+			(nespd->pd_id-nesadapter->base_pd)>>(PAGE_SHIFT-12));
+	kfree(nespd);
+
+	return 0;
+}
+
+
+/**
+ * nes_create_ah
+ */
+static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+
+/**
+ * nes_destroy_ah
+ */
+static int nes_destroy_ah(struct ib_ah *ah)
+{
+	return -ENOSYS;
+}
+
+
+/**
+ * nes_get_encoded_size
+ */
+static inline u8 nes_get_encoded_size(int *size)
+{
+	u8 encoded_size = 0;
+	if (*size <= 32) {
+		*size = 32;
+		encoded_size = 1;
+	} else if (*size <= 128) {
+		*size = 128;
+		encoded_size = 2;
+	} else if (*size <= 512) {
+		*size = 512;
+		encoded_size = 3;
+	}
+	return (encoded_size);
+}
+
+
+
+/**
+ * nes_setup_virt_qp
+ */
+static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl,
+		struct nes_vnic *nesvnic, int sq_size, int rq_size)
+{
+	unsigned long flags;
+	void *mem;
+	__le64 *pbl = NULL;
+	__le64 *tpbl;
+	__le64 *pblbuffer;
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u32 pbl_entries;
+	u8 rq_pbl_entries;
+	u8 sq_pbl_entries;
+
+	pbl_entries = nespbl->pbl_size >> 3;
+	nes_debug(NES_DBG_QP, "Userspace PBL, pbl_size=%u, pbl_entries = %d pbl_vbase=%p, pbl_pbase=%p\n",
+			nespbl->pbl_size, pbl_entries,
+			(void *)nespbl->pbl_vbase,
+			(void *)nespbl->pbl_pbase);
+	pbl = (__le64 *) nespbl->pbl_vbase; /* points to first pbl entry */
+	/* now lets set the sq_vbase as well as rq_vbase addrs we will assign */
+	/* the first pbl to be fro the rq_vbase... */
+	rq_pbl_entries = (rq_size * sizeof(struct nes_hw_qp_wqe)) >> 12;
+	sq_pbl_entries = (sq_size * sizeof(struct nes_hw_qp_wqe)) >> 12;
+	nesqp->hwqp.sq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32);
+	if (!nespbl->page) {
+		nes_debug(NES_DBG_QP, "QP nespbl->page is NULL \n");
+		kfree(nespbl);
+		return -ENOMEM;
+	}
+
+	nesqp->hwqp.sq_vbase = kmap(nespbl->page);
+	nesqp->page = nespbl->page;
+	if (!nesqp->hwqp.sq_vbase) {
+		nes_debug(NES_DBG_QP, "QP sq_vbase kmap failed\n");
+		kfree(nespbl);
+		return -ENOMEM;
+	}
+
+	/* Now to get to sq.. we need to calculate how many */
+	/* PBL entries were used by the rq.. */
+	pbl += sq_pbl_entries;
+	nesqp->hwqp.rq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32);
+	/* nesqp->hwqp.rq_vbase = bus_to_virt(*pbl); */
+	/*nesqp->hwqp.rq_vbase = phys_to_virt(*pbl); */
+
+	nes_debug(NES_DBG_QP, "QP sq_vbase= %p sq_pbase=%p rq_vbase=%p rq_pbase=%p\n",
+			nesqp->hwqp.sq_vbase, (void *)nesqp->hwqp.sq_pbase,
+			nesqp->hwqp.rq_vbase, (void *)nesqp->hwqp.rq_pbase);
+	spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+	if (!nesadapter->free_256pbl) {
+		pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
+				nespbl->pbl_pbase);
+		spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+		kunmap(nesqp->page);
+		kfree(nespbl);
+		return -ENOMEM;
+	}
+	nesadapter->free_256pbl--;
+	spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+
+	nesqp->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 256, &nesqp->pbl_pbase);
+	pblbuffer = nesqp->pbl_vbase;
+	if (!nesqp->pbl_vbase) {
+		/* memory allocated during nes_reg_user_mr() */
+		pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
+				    nespbl->pbl_pbase);
+		kfree(nespbl);
+		spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+		nesadapter->free_256pbl++;
+		spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+		kunmap(nesqp->page);
+		return -ENOMEM;
+	}
+	memset(nesqp->pbl_vbase, 0, 256);
+	/* fill in the page address in the pbl buffer.. */
+	tpbl = pblbuffer + 16;
+	pbl = (__le64 *)nespbl->pbl_vbase;
+	while (sq_pbl_entries--)
+		*tpbl++ = *pbl++;
+	tpbl = pblbuffer;
+	while (rq_pbl_entries--)
+		*tpbl++ = *pbl++;
+
+	/* done with memory allocated during nes_reg_user_mr() */
+	pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
+			    nespbl->pbl_pbase);
+	kfree(nespbl);
+
+	nesqp->qp_mem_size =
+			max((u32)sizeof(struct nes_qp_context), ((u32)256)) + 256;     /* this is Q2 */
+	/* Round up to a multiple of a page */
+	nesqp->qp_mem_size += PAGE_SIZE - 1;
+	nesqp->qp_mem_size &= ~(PAGE_SIZE - 1);
+
+	mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size,
+			&nesqp->hwqp.q2_pbase);
+
+	if (!mem) {
+		pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase);
+		nesqp->pbl_vbase = NULL;
+		spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+		nesadapter->free_256pbl++;
+		spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+		kunmap(nesqp->page);
+		return -ENOMEM;
+	}
+	nesqp->hwqp.q2_vbase = mem;
+	mem += 256;
+	memset(nesqp->hwqp.q2_vbase, 0, 256);
+	nesqp->nesqp_context = mem;
+	memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context));
+	nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256;
+
+	return 0;
+}
+
+
+/**
+ * nes_setup_mmap_qp
+ */
+static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic,
+		int sq_size, int rq_size)
+{
+	void *mem;
+	struct nes_device *nesdev = nesvnic->nesdev;
+
+	nesqp->qp_mem_size = (sizeof(struct nes_hw_qp_wqe) * sq_size) +
+			(sizeof(struct nes_hw_qp_wqe) * rq_size) +
+			max((u32)sizeof(struct nes_qp_context), ((u32)256)) +
+			256; /* this is Q2 */
+	/* Round up to a multiple of a page */
+	nesqp->qp_mem_size += PAGE_SIZE - 1;
+	nesqp->qp_mem_size &= ~(PAGE_SIZE - 1);
+
+	mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size,
+			&nesqp->hwqp.sq_pbase);
+	if (!mem)
+		return -ENOMEM;
+	nes_debug(NES_DBG_QP, "PCI consistent memory for "
+			"host descriptor rings located @ %p (pa = 0x%08lX.) size = %u.\n",
+			mem, (unsigned long)nesqp->hwqp.sq_pbase, nesqp->qp_mem_size);
+
+	memset(mem, 0, nesqp->qp_mem_size);
+
+	nesqp->hwqp.sq_vbase = mem;
+	mem += sizeof(struct nes_hw_qp_wqe) * sq_size;
+
+	nesqp->hwqp.rq_vbase = mem;
+	nesqp->hwqp.rq_pbase = nesqp->hwqp.sq_pbase +
+			sizeof(struct nes_hw_qp_wqe) * sq_size;
+	mem += sizeof(struct nes_hw_qp_wqe) * rq_size;
+
+	nesqp->hwqp.q2_vbase = mem;
+	nesqp->hwqp.q2_pbase = nesqp->hwqp.rq_pbase +
+			sizeof(struct nes_hw_qp_wqe) * rq_size;
+	mem += 256;
+	memset(nesqp->hwqp.q2_vbase, 0, 256);
+
+	nesqp->nesqp_context = mem;
+	nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256;
+	memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context));
+	return 0;
+}
+
+
+/**
+ * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory.
+ */
+static inline void nes_free_qp_mem(struct nes_device *nesdev,
+		struct nes_qp *nesqp, int virt_wqs)
+{
+	unsigned long flags;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	if (!virt_wqs) {
+		pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
+				nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase);
+	}else {
+		spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+		nesadapter->free_256pbl++;
+		spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+		pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
+		pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase );
+		nesqp->pbl_vbase = NULL;
+		kunmap(nesqp->page);
+	}
+}
+
+
+/**
+ * nes_create_qp
+ */
+static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
+		struct ib_qp_init_attr *init_attr, struct ib_udata *udata)
+{
+	u64 u64temp= 0;
+	u64 u64nesqp = 0;
+	struct nes_pd *nespd = to_nespd(ibpd);
+	struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_qp *nesqp;
+	struct nes_cq *nescq;
+	struct nes_ucontext *nes_ucontext;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_cqp_request *cqp_request;
+	struct nes_create_qp_req req;
+	struct nes_create_qp_resp uresp;
+	struct nes_pbl  *nespbl = NULL;
+	u32 qp_num = 0;
+	u32 opcode = 0;
+	/* u32 counter = 0; */
+	void *mem;
+	unsigned long flags;
+	int ret;
+	int err;
+	int virt_wqs = 0;
+	int sq_size;
+	int rq_size;
+	u8 sq_encoded_size;
+	u8 rq_encoded_size;
+	/* int counter; */
+
+	atomic_inc(&qps_created);
+	switch (init_attr->qp_type) {
+		case IB_QPT_RC:
+			if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) {
+				init_attr->cap.max_inline_data = 0;
+			} else {
+				init_attr->cap.max_inline_data = 64;
+			}
+			sq_size = init_attr->cap.max_send_wr;
+			rq_size = init_attr->cap.max_recv_wr;
+
+			// check if the encoded sizes are OK or not...
+			sq_encoded_size = nes_get_encoded_size(&sq_size);
+			rq_encoded_size = nes_get_encoded_size(&rq_size);
+
+			if ((!sq_encoded_size) || (!rq_encoded_size)) {
+				nes_debug(NES_DBG_QP, "ERROR bad rq (%u) or sq (%u) size\n",
+						rq_size, sq_size);
+				return ERR_PTR(-EINVAL);
+			}
+
+			init_attr->cap.max_send_wr = sq_size -2;
+			init_attr->cap.max_recv_wr = rq_size -1;
+			nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size);
+
+			ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps,
+					nesadapter->max_qp, &qp_num, &nesadapter->next_qp);
+			if (ret) {
+				return ERR_PTR(ret);
+			}
+
+			/* Need 512 (actually now 1024) byte alignment on this structure */
+			mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL);
+			if (!mem) {
+				nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+				nes_debug(NES_DBG_QP, "Unable to allocate QP\n");
+				return ERR_PTR(-ENOMEM);
+			}
+			u64nesqp = (unsigned long)mem;
+			u64nesqp += ((u64)NES_SW_CONTEXT_ALIGN) - 1;
+			u64temp = ((u64)NES_SW_CONTEXT_ALIGN) - 1;
+			u64nesqp &= ~u64temp;
+			nesqp = (struct nes_qp *)(unsigned long)u64nesqp;
+			/* nes_debug(NES_DBG_QP, "nesqp=%p, allocated buffer=%p.  Rounded to closest %u\n",
+					nesqp, mem, NES_SW_CONTEXT_ALIGN); */
+			nesqp->allocated_buffer = mem;
+
+			if (udata) {
+				if (ib_copy_from_udata(&req, udata, sizeof(struct nes_create_qp_req))) {
+					nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+					kfree(nesqp->allocated_buffer);
+					nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n");
+					return NULL;
+				}
+				if (req.user_wqe_buffers) {
+					virt_wqs = 1;
+				}
+				if ((ibpd->uobject) && (ibpd->uobject->context)) {
+					nesqp->user_mode = 1;
+					nes_ucontext = to_nesucontext(ibpd->uobject->context);
+					if (virt_wqs) {
+						err = 1;
+						list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) {
+							if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) {
+								list_del(&nespbl->list);
+								err = 0;
+								nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n",
+									  nespbl, nespbl->user_base);
+								break;
+							}
+						}
+						if (err) {
+							nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n",
+								  (long long unsigned int)req.user_wqe_buffers);
+							nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+							kfree(nesqp->allocated_buffer);
+							return ERR_PTR(-ENOMEM);
+						}
+					}
+
+					nes_ucontext = to_nesucontext(ibpd->uobject->context);
+					nesqp->mmap_sq_db_index =
+						find_next_zero_bit(nes_ucontext->allocated_wqs,
+								   NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq);
+					/* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n",
+							nespd->mmap_db_index); */
+					if (nesqp->mmap_sq_db_index > NES_MAX_USER_WQ_REGIONS) {
+						nes_debug(NES_DBG_QP,
+							  "db index > max user regions, failing create QP\n");
+						nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+						if (virt_wqs) {
+							pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
+									    nespbl->pbl_pbase);
+							kfree(nespbl);
+						}
+						kfree(nesqp->allocated_buffer);
+						return ERR_PTR(-ENOMEM);
+					}
+					set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs);
+					nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp;
+					nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1;
+				} else {
+					nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+					kfree(nesqp->allocated_buffer);
+					return ERR_PTR(-EFAULT);
+				}
+			}
+			err = (!virt_wqs) ? nes_setup_mmap_qp(nesqp, nesvnic, sq_size, rq_size) :
+					nes_setup_virt_qp(nesqp, nespbl, nesvnic, sq_size, rq_size);
+			if (err) {
+				nes_debug(NES_DBG_QP,
+					  "error geting qp mem code = %d\n", err);
+				nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+				kfree(nesqp->allocated_buffer);
+				return ERR_PTR(-ENOMEM);
+			}
+
+			nesqp->hwqp.sq_size = sq_size;
+			nesqp->hwqp.sq_encoded_size = sq_encoded_size;
+			nesqp->hwqp.sq_head = 1;
+			nesqp->hwqp.rq_size = rq_size;
+			nesqp->hwqp.rq_encoded_size = rq_encoded_size;
+			/* nes_debug(NES_DBG_QP, "nesqp->nesqp_context_pbase = %p\n",
+					(void *)nesqp->nesqp_context_pbase);
+			*/
+			nesqp->hwqp.qp_id = qp_num;
+			nesqp->ibqp.qp_num = nesqp->hwqp.qp_id;
+			nesqp->nespd = nespd;
+
+			nescq = to_nescq(init_attr->send_cq);
+			nesqp->nesscq = nescq;
+			nescq = to_nescq(init_attr->recv_cq);
+			nesqp->nesrcq = nescq;
+
+			nesqp->nesqp_context->misc |= cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) <<
+					NES_QPCONTEXT_MISC_PCI_FCN_SHIFT);
+			nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.rq_encoded_size <<
+					NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT);
+			nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size <<
+					NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT);
+				nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN);
+				nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN);
+			nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number +
+					((u32)nesqp->nesrcq->hw_cq.cq_number << 16));
+			u64temp = (u64)nesqp->hwqp.sq_pbase;
+			nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp);
+			nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
+
+
+			if (!virt_wqs) {
+				u64temp = (u64)nesqp->hwqp.sq_pbase;
+				nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp);
+				nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
+				u64temp = (u64)nesqp->hwqp.rq_pbase;
+				nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp);
+				nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
+			} else {
+				u64temp = (u64)nesqp->pbl_pbase;
+				nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp);
+				nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
+			}
+
+			/* nes_debug(NES_DBG_QP, "next_qp_nic_index=%u, using nic_index=%d\n",
+					nesvnic->next_qp_nic_index,
+					nesvnic->qp_nic_index[nesvnic->next_qp_nic_index]); */
+			spin_lock_irqsave(&nesdev->cqp.lock, flags);
+			nesqp->nesqp_context->misc2 |= cpu_to_le32(
+					(u32)nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] <<
+					NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT);
+			nesvnic->next_qp_nic_index++;
+			if ((nesvnic->next_qp_nic_index > 3) ||
+					(nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] == 0xf)) {
+				nesvnic->next_qp_nic_index = 0;
+			}
+			spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+
+			nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32((u32)nesqp->nespd->pd_id << 16);
+			u64temp = (u64)nesqp->hwqp.q2_pbase;
+			nesqp->nesqp_context->q2_addr_low = cpu_to_le32((u32)u64temp);
+			nesqp->nesqp_context->q2_addr_high = cpu_to_le32((u32)(u64temp >> 32));
+			nesqp->nesqp_context->aeq_token_low =  cpu_to_le32((u32)((unsigned long)(nesqp)));
+			nesqp->nesqp_context->aeq_token_high =  cpu_to_le32((u32)(upper_32_bits((unsigned long)(nesqp))));
+			nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM |
+					((((u32)nesadapter->max_irrq_wr) <<
+					NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK));
+			if (disable_mpa_crc) {
+				nes_debug(NES_DBG_QP, "Disabling MPA crc checking due to module option.\n");
+				nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(NES_QPCONTEXT_ORDIRD_RNMC);
+			}
+
+
+			/* Create the QP */
+			cqp_request = nes_get_cqp_request(nesdev);
+			if (cqp_request == NULL) {
+				nes_debug(NES_DBG_QP, "Failed to get a cqp_request\n");
+				nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+				nes_free_qp_mem(nesdev, nesqp,virt_wqs);
+				kfree(nesqp->allocated_buffer);
+				return ERR_PTR(-ENOMEM);
+			}
+			cqp_request->waiting = 1;
+			cqp_wqe = &cqp_request->cqp_wqe;
+
+			if (!virt_wqs) {
+				opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP |
+					NES_CQP_QP_IWARP_STATE_IDLE;
+			} else {
+				opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_VIRT_WQS |
+					NES_CQP_QP_IWARP_STATE_IDLE;
+			}
+			opcode |= NES_CQP_QP_CQS_VALID;
+			nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+			set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+			set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
+
+			u64temp = (u64)nesqp->nesqp_context_pbase;
+			set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+
+			atomic_set(&cqp_request->refcount, 2);
+			nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+			/* Wait for CQP */
+			nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n",
+					nesqp->hwqp.qp_id);
+			ret = wait_event_timeout(cqp_request->waitq,
+					(cqp_request->request_done != 0), NES_EVENT_TIMEOUT);
+			nes_debug(NES_DBG_QP, "Create iwarp QP%u completed, wait_event_timeout ret=%u,"
+					" nesdev->cqp_head = %u, nesdev->cqp.sq_tail = %u,"
+					" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+					nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail,
+					cqp_request->major_code, cqp_request->minor_code);
+			if ((!ret) || (cqp_request->major_code)) {
+				if (atomic_dec_and_test(&cqp_request->refcount)) {
+					if (cqp_request->dynamic) {
+						kfree(cqp_request);
+					} else {
+						spin_lock_irqsave(&nesdev->cqp.lock, flags);
+						list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+						spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+					}
+				}
+				nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+				nes_free_qp_mem(nesdev, nesqp,virt_wqs);
+				kfree(nesqp->allocated_buffer);
+				if (!ret) {
+					return ERR_PTR(-ETIME);
+				} else {
+					return ERR_PTR(-EIO);
+				}
+			} else {
+				if (atomic_dec_and_test(&cqp_request->refcount)) {
+					if (cqp_request->dynamic) {
+						kfree(cqp_request);
+					} else {
+						spin_lock_irqsave(&nesdev->cqp.lock, flags);
+						list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+						spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+					}
+				}
+			}
+
+			if (ibpd->uobject) {
+				uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
+				uresp.actual_sq_size = sq_size;
+				uresp.actual_rq_size = rq_size;
+				uresp.qp_id = nesqp->hwqp.qp_id;
+				uresp.nes_drv_opt = nes_drv_opt;
+				if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+					nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+					nes_free_qp_mem(nesdev, nesqp,virt_wqs);
+					kfree(nesqp->allocated_buffer);
+					return ERR_PTR(-EFAULT);
+				}
+			}
+
+			nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n",
+					nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp));
+			spin_lock_init(&nesqp->lock);
+			init_waitqueue_head(&nesqp->state_waitq);
+			init_waitqueue_head(&nesqp->kick_waitq);
+			nes_add_ref(&nesqp->ibqp);
+			break;
+		default:
+			nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type);
+			return ERR_PTR(-EINVAL);
+			break;
+	}
+
+	/* update the QP table */
+	nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
+	nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
+			atomic_read(&nesvnic->netdev->refcnt));
+
+	return &nesqp->ibqp;
+}
+
+
+/**
+ * nes_destroy_qp
+ */
+static int nes_destroy_qp(struct ib_qp *ibqp)
+{
+	struct nes_qp *nesqp = to_nesqp(ibqp);
+	/* struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); */
+	struct nes_ucontext *nes_ucontext;
+	struct ib_qp_attr attr;
+	struct iw_cm_id *cm_id;
+	struct iw_cm_event cm_event;
+	int ret;
+
+	atomic_inc(&sw_qps_destroyed);
+	nesqp->destroyed = 1;
+
+	/* Blow away the connection if it exists. */
+	if (nesqp->ibqp_state >= IB_QPS_INIT && nesqp->ibqp_state <= IB_QPS_RTS) {
+		/* if (nesqp->ibqp_state == IB_QPS_RTS) { */
+		attr.qp_state = IB_QPS_ERR;
+		nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
+	}
+
+	if (((nesqp->ibqp_state == IB_QPS_INIT) ||
+			(nesqp->ibqp_state == IB_QPS_RTR)) && (nesqp->cm_id)) {
+		cm_id = nesqp->cm_id;
+		cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+		cm_event.status = IW_CM_EVENT_STATUS_TIMEOUT;
+		cm_event.local_addr = cm_id->local_addr;
+		cm_event.remote_addr = cm_id->remote_addr;
+		cm_event.private_data = NULL;
+		cm_event.private_data_len = 0;
+
+		nes_debug(NES_DBG_QP, "Generating a CM Timeout Event for "
+				"QP%u. cm_id = %p, refcount = %u. \n",
+				nesqp->hwqp.qp_id, cm_id, atomic_read(&nesqp->refcount));
+
+		cm_id->rem_ref(cm_id);
+		ret = cm_id->event_handler(cm_id, &cm_event);
+		if (ret)
+			nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret);
+	}
+
+
+	if (nesqp->user_mode) {
+		if ((ibqp->uobject)&&(ibqp->uobject->context)) {
+			nes_ucontext = to_nesucontext(ibqp->uobject->context);
+			clear_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs);
+			nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = NULL;
+			if (nes_ucontext->first_free_wq > nesqp->mmap_sq_db_index) {
+				nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index;
+			}
+		}
+		if (nesqp->pbl_pbase)
+			kunmap(nesqp->page);
+	}
+
+	nes_rem_ref(&nesqp->ibqp);
+	return 0;
+}
+
+
+/**
+ * nes_create_cq
+ */
+static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
+		int comp_vector,
+		struct ib_ucontext *context, struct ib_udata *udata)
+{
+	u64 u64temp;
+	struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_cq *nescq;
+	struct nes_ucontext *nes_ucontext = NULL;
+	struct nes_cqp_request *cqp_request;
+	void *mem = NULL;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_pbl *nespbl = NULL;
+	struct nes_create_cq_req req;
+	struct nes_create_cq_resp resp;
+	u32 cq_num = 0;
+	u32 opcode = 0;
+	u32 pbl_entries = 1;
+	int err;
+	unsigned long flags;
+	int ret;
+
+	err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
+			nesadapter->max_cq, &cq_num, &nesadapter->next_cq);
+	if (err) {
+		return ERR_PTR(err);
+	}
+
+	nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL);
+	if (!nescq) {
+		nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+		nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	nescq->hw_cq.cq_size = max(entries + 1, 5);
+	nescq->hw_cq.cq_number = cq_num;
+	nescq->ibcq.cqe = nescq->hw_cq.cq_size - 1;
+
+
+	if (context) {
+		nes_ucontext = to_nesucontext(context);
+		if (ib_copy_from_udata(&req, udata, sizeof (struct nes_create_cq_req))) {
+			nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+			kfree(nescq);
+			return ERR_PTR(-EFAULT);
+		}
+		nesvnic->mcrq_ucontext = nes_ucontext;
+		nes_ucontext->mcrqf = req.mcrqf;
+		if (nes_ucontext->mcrqf) {
+			if (nes_ucontext->mcrqf & 0x80000000)
+				nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 12 + (nes_ucontext->mcrqf & 0xf) - 1;
+			else if (nes_ucontext->mcrqf & 0x40000000)
+				nescq->hw_cq.cq_number = nes_ucontext->mcrqf & 0xffff;
+			else
+				nescq->hw_cq.cq_number = nesvnic->mcrq_qp_id + nes_ucontext->mcrqf-1;
+			nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+		}
+		nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n",
+				(unsigned long)req.user_cq_buffer, entries);
+		list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) {
+			if (nespbl->user_base == (unsigned long )req.user_cq_buffer) {
+				list_del(&nespbl->list);
+				err = 0;
+				nes_debug(NES_DBG_CQ, "Found PBL for virtual CQ. nespbl=%p.\n",
+						nespbl);
+				break;
+			}
+		}
+		if (err) {
+			nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+			kfree(nescq);
+			return ERR_PTR(err);
+		}
+
+		pbl_entries = nespbl->pbl_size >> 3;
+		nescq->cq_mem_size = 0;
+	} else {
+		nescq->cq_mem_size = nescq->hw_cq.cq_size * sizeof(struct nes_hw_cqe);
+		nes_debug(NES_DBG_CQ, "Attempting to allocate pci memory (%u entries, %u bytes) for CQ%u.\n",
+				entries, nescq->cq_mem_size, nescq->hw_cq.cq_number);
+
+		/* allocate the physical buffer space */
+		mem = pci_alloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
+				&nescq->hw_cq.cq_pbase);
+		if (!mem) {
+			printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n");
+			nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+			kfree(nescq);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		memset(mem, 0, nescq->cq_mem_size);
+		nescq->hw_cq.cq_vbase = mem;
+		nescq->hw_cq.cq_head = 0;
+		nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n",
+				nescq->hw_cq.cq_number, nescq->hw_cq.cq_vbase,
+				(u32)nescq->hw_cq.cq_pbase);
+	}
+
+	nescq->hw_cq.ce_handler = nes_iwarp_ce_handler;
+	spin_lock_init(&nescq->lock);
+
+	/* send CreateCQ request to CQP */
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n");
+		if (!context)
+			pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
+					nescq->hw_cq.cq_pbase);
+		nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+		kfree(nescq);
+		return ERR_PTR(-ENOMEM);
+	}
+	cqp_request->waiting = 1;
+	cqp_wqe = &cqp_request->cqp_wqe;
+
+	opcode = NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
+			NES_CQP_CQ_CHK_OVERFLOW |
+			NES_CQP_CQ_CEQE_MASK | ((u32)nescq->hw_cq.cq_size << 16);
+
+	spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+
+	if (pbl_entries != 1) {
+		if (pbl_entries > 32) {
+			/* use 4k pbl */
+			nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries);
+			if (nesadapter->free_4kpbl == 0) {
+				if (cqp_request->dynamic) {
+					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+					kfree(cqp_request);
+				} else {
+					list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				}
+				if (!context)
+					pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
+							nescq->hw_cq.cq_pbase);
+				nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+				kfree(nescq);
+				return ERR_PTR(-ENOMEM);
+			} else {
+				opcode |= (NES_CQP_CQ_VIRT | NES_CQP_CQ_4KB_CHUNK);
+				nescq->virtual_cq = 2;
+				nesadapter->free_4kpbl--;
+			}
+		} else {
+			/* use 256 byte pbl */
+			nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries);
+			if (nesadapter->free_256pbl == 0) {
+				if (cqp_request->dynamic) {
+					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+					kfree(cqp_request);
+				} else {
+					list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				}
+				if (!context)
+					pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
+							nescq->hw_cq.cq_pbase);
+				nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+				kfree(nescq);
+				return ERR_PTR(-ENOMEM);
+			} else {
+				opcode |= NES_CQP_CQ_VIRT;
+				nescq->virtual_cq = 1;
+				nesadapter->free_256pbl--;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+			(nescq->hw_cq.cq_number | ((u32)nesdev->ceq_index << 16)));
+
+	if (context) {
+		if (pbl_entries != 1)
+			u64temp = (u64)nespbl->pbl_pbase;
+		else
+			u64temp	= le64_to_cpu(nespbl->pbl_vbase[0]);
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX,
+				nes_ucontext->mmap_db_index[0]);
+	} else {
+		u64temp = (u64)nescq->hw_cq.cq_pbase;
+		cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
+	}
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
+	u64temp = (u64)(unsigned long)&nescq->hw_cq;
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] =
+			cpu_to_le32((u32)(u64temp >> 1));
+	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
+			cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
+
+	atomic_set(&cqp_request->refcount, 2);
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+	/* Wait for CQP */
+	nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n",
+			nescq->hw_cq.cq_number);
+	ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
+			NES_EVENT_TIMEOUT * 2);
+	nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n",
+			nescq->hw_cq.cq_number, ret);
+	if ((!ret) || (cqp_request->major_code)) {
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+		nes_debug(NES_DBG_CQ, "iWARP CQ%u create timeout expired, major code = 0x%04X,"
+				" minor code = 0x%04X\n",
+				nescq->hw_cq.cq_number, cqp_request->major_code, cqp_request->minor_code);
+		if (!context)
+			pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
+					nescq->hw_cq.cq_pbase);
+		nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+		kfree(nescq);
+		return ERR_PTR(-EIO);
+	} else {
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+	}
+
+	if (context) {
+		/* free the nespbl */
+		pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
+				nespbl->pbl_pbase);
+		kfree(nespbl);
+		resp.cq_id = nescq->hw_cq.cq_number;
+		resp.cq_size = nescq->hw_cq.cq_size;
+		resp.mmap_db_index = 0;
+		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+			nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+			kfree(nescq);
+			return ERR_PTR(-EFAULT);
+		}
+	}
+
+	return &nescq->ibcq;
+}
+
+
+/**
+ * nes_destroy_cq
+ */
+static int nes_destroy_cq(struct ib_cq *ib_cq)
+{
+	struct nes_cq *nescq;
+	struct nes_device *nesdev;
+	struct nes_vnic *nesvnic;
+	struct nes_adapter *nesadapter;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_cqp_request *cqp_request;
+	unsigned long flags;
+	u32 opcode = 0;
+	int ret;
+
+	if (ib_cq == NULL)
+		return 0;
+
+	nescq = to_nescq(ib_cq);
+	nesvnic = to_nesvnic(ib_cq->device);
+	nesdev = nesvnic->nesdev;
+	nesadapter = nesdev->nesadapter;
+
+	nes_debug(NES_DBG_CQ, "Destroy CQ%u\n", nescq->hw_cq.cq_number);
+
+	/* Send DestroyCQ request to CQP */
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n");
+		return -ENOMEM;
+	}
+	cqp_request->waiting = 1;
+	cqp_wqe = &cqp_request->cqp_wqe;
+	opcode = NES_CQP_DESTROY_CQ | (nescq->hw_cq.cq_size << 16);
+	spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+	if (nescq->virtual_cq == 1) {
+		nesadapter->free_256pbl++;
+		if (nesadapter->free_256pbl > nesadapter->max_256pbl) {
+			printk(KERN_ERR PFX "%s: free 256B PBLs(%u) has exceeded the max(%u)\n",
+					__FUNCTION__, nesadapter->free_256pbl, nesadapter->max_256pbl);
+		}
+	} else if (nescq->virtual_cq == 2) {
+		nesadapter->free_4kpbl++;
+		if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) {
+			printk(KERN_ERR PFX "%s: free 4K PBLs(%u) has exceeded the max(%u)\n",
+					__FUNCTION__, nesadapter->free_4kpbl, nesadapter->max_4kpbl);
+		}
+		opcode |= NES_CQP_CQ_4KB_CHUNK;
+	}
+
+	spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+		(nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16)));
+	nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number);
+	atomic_set(&cqp_request->refcount, 2);
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+	/* Wait for CQP */
+	nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n",
+			nescq->hw_cq.cq_number);
+	ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
+			NES_EVENT_TIMEOUT);
+	nes_debug(NES_DBG_CQ, "Destroy iWARP CQ%u completed, wait_event_timeout ret = %u,"
+			" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+			nescq->hw_cq.cq_number, ret, cqp_request->major_code,
+			cqp_request->minor_code);
+	if ((!ret) || (cqp_request->major_code)) {
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+		if (!ret) {
+			nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n",
+					nescq->hw_cq.cq_number);
+			ret = -ETIME;
+		} else {
+			nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n",
+					nescq->hw_cq.cq_number);
+			ret = -EIO;
+		}
+	} else {
+		ret = 0;
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+	}
+
+	if (nescq->cq_mem_size)
+		pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size,
+				(void *)nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase);
+	kfree(nescq);
+
+	return ret;
+}
+
+
+/**
+ * nes_reg_mr
+ */
+static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
+		u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl,
+		dma_addr_t single_buffer, u16 pbl_count, u16 residual_page_count,
+		int acc, u64 *iova_start)
+{
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_cqp_request *cqp_request;
+	unsigned long flags;
+	int ret;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	/* int count; */
+	u32 opcode = 0;
+	u16 major_code;
+
+	/* Register the region with the adapter */
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
+		return -ENOMEM;
+	}
+	cqp_request->waiting = 1;
+	cqp_wqe = &cqp_request->cqp_wqe;
+
+	spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+	/* track PBL resources */
+	if (pbl_count != 0) {
+		if (pbl_count > 1) {
+			/* Two level PBL */
+			if ((pbl_count+1) > nesadapter->free_4kpbl) {
+				nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n");
+				if (cqp_request->dynamic) {
+					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+					kfree(cqp_request);
+				} else {
+					list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				}
+				return -ENOMEM;
+			} else {
+				nesadapter->free_4kpbl -= pbl_count+1;
+			}
+		} else if (residual_page_count > 32) {
+			if (pbl_count > nesadapter->free_4kpbl) {
+				nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n");
+				if (cqp_request->dynamic) {
+					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+					kfree(cqp_request);
+				} else {
+					list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				}
+				return -ENOMEM;
+			} else {
+				nesadapter->free_4kpbl -= pbl_count;
+			}
+		} else {
+			if (pbl_count > nesadapter->free_256pbl) {
+				nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n");
+				if (cqp_request->dynamic) {
+					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+					kfree(cqp_request);
+				} else {
+					list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				}
+				return -ENOMEM;
+			} else {
+				nesadapter->free_256pbl -= pbl_count;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+
+	opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ |
+					NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
+	if (acc & IB_ACCESS_LOCAL_WRITE)
+		opcode |= NES_CQP_STAG_RIGHTS_LOCAL_WRITE;
+	if (acc & IB_ACCESS_REMOTE_WRITE)
+		opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_REM_ACC_EN;
+	if (acc & IB_ACCESS_REMOTE_READ)
+		opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_REM_ACC_EN;
+	if (acc & IB_ACCESS_MW_BIND)
+		opcode |= NES_CQP_STAG_RIGHTS_WINDOW_BIND | NES_CQP_STAG_REM_ACC_EN;
+
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, *iova_start);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, region_length);
+
+	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
+			cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
+	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
+			cpu_to_le32(nespd->pd_id & 0x00007fff);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+
+	if (pbl_count == 0) {
+		set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, single_buffer);
+	} else {
+		set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase);
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count);
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX,
+				(((pbl_count - 1) * 4096) + (residual_page_count*8)));
+
+		if ((pbl_count > 1) || (residual_page_count > 32))
+			cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
+	}
+	barrier();
+
+	atomic_set(&cqp_request->refcount, 2);
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+	/* Wait for CQP */
+	ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
+			NES_EVENT_TIMEOUT);
+	nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
+			" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+			stag, ret, cqp_request->major_code, cqp_request->minor_code);
+	major_code = cqp_request->major_code;
+	if (atomic_dec_and_test(&cqp_request->refcount)) {
+		if (cqp_request->dynamic) {
+			kfree(cqp_request);
+		} else {
+			spin_lock_irqsave(&nesdev->cqp.lock, flags);
+			list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+			spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+		}
+	}
+	if (!ret)
+		return -ETIME;
+	else if (major_code)
+		return -EIO;
+	else
+		return 0;
+
+	return 0;
+}
+
+
+/**
+ * nes_reg_phys_mr
+ */
+static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
+		struct ib_phys_buf *buffer_list, int num_phys_buf, int acc,
+		u64 * iova_start)
+{
+	u64 region_length;
+	struct nes_pd *nespd = to_nespd(ib_pd);
+	struct nes_vnic *nesvnic = to_nesvnic(ib_pd->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_mr *nesmr;
+	struct ib_mr *ibmr;
+	struct nes_vpbl vpbl;
+	struct nes_root_vpbl root_vpbl;
+	u32 stag;
+	u32 i;
+	u32 stag_index = 0;
+	u32 next_stag_index = 0;
+	u32 driver_key = 0;
+	u32 root_pbl_index = 0;
+	u32 cur_pbl_index = 0;
+	int err = 0, pbl_depth = 0;
+	int ret = 0;
+	u16 pbl_count = 0;
+	u8 single_page = 1;
+	u8 stag_key = 0;
+
+	pbl_depth = 0;
+	region_length = 0;
+	vpbl.pbl_vbase = NULL;
+	root_vpbl.pbl_vbase = NULL;
+	root_vpbl.pbl_pbase = 0;
+
+	get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+	stag_key = (u8)next_stag_index;
+
+	driver_key = 0;
+
+	next_stag_index >>= 8;
+	next_stag_index %= nesadapter->max_mr;
+	if (num_phys_buf > (1024*512)) {
+		return ERR_PTR(-E2BIG);
+	}
+
+	err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
+			&stag_index, &next_stag_index);
+	if (err) {
+		return ERR_PTR(err);
+	}
+
+	nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+	if (!nesmr) {
+		nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0; i < num_phys_buf; i++) {
+
+		if ((i & 0x01FF) == 0) {
+			if (root_pbl_index == 1) {
+				/* Allocate the root PBL */
+				root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
+						&root_vpbl.pbl_pbase);
+				nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
+						root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
+				if (!root_vpbl.pbl_vbase) {
+					pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+							vpbl.pbl_pbase);
+					nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+					kfree(nesmr);
+					return ERR_PTR(-ENOMEM);
+				}
+				root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
+				if (!root_vpbl.leaf_vpbl) {
+					pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+							root_vpbl.pbl_pbase);
+					pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+							vpbl.pbl_pbase);
+					nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+					kfree(nesmr);
+					return ERR_PTR(-ENOMEM);
+				}
+				root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
+				root_vpbl.pbl_vbase[0].pa_high =
+						cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
+				root_vpbl.leaf_vpbl[0] = vpbl;
+			}
+			/* Allocate a 4K buffer for the PBL */
+			vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+					&vpbl.pbl_pbase);
+			nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
+					vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
+			if (!vpbl.pbl_vbase) {
+				nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+				ibmr = ERR_PTR(-ENOMEM);
+				kfree(nesmr);
+				goto reg_phys_err;
+			}
+			/* Fill in the root table */
+			if (1 <= root_pbl_index) {
+				root_vpbl.pbl_vbase[root_pbl_index].pa_low =
+						cpu_to_le32((u32)vpbl.pbl_pbase);
+				root_vpbl.pbl_vbase[root_pbl_index].pa_high =
+						cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
+				root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
+			}
+			root_pbl_index++;
+			cur_pbl_index = 0;
+		}
+		if (buffer_list[i].addr & ~PAGE_MASK) {
+			/* TODO: Unwind allocated buffers */
+			nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+			nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
+					(unsigned int) buffer_list[i].addr);
+			ibmr = ERR_PTR(-EINVAL);
+			kfree(nesmr);
+			goto reg_phys_err;
+		}
+
+		if (!buffer_list[i].size) {
+			nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+			nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
+			ibmr = ERR_PTR(-EINVAL);
+			kfree(nesmr);
+			goto reg_phys_err;
+		}
+
+		region_length += buffer_list[i].size;
+		if ((i != 0) && (single_page)) {
+			if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr)
+				single_page = 0;
+		}
+		vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr);
+		vpbl.pbl_vbase[cur_pbl_index++].pa_high =
+				cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
+	}
+
+	stag = stag_index << 8;
+	stag |= driver_key;
+	stag += (u32)stag_key;
+
+	nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%016lX,"
+			" length = 0x%016lX, index = 0x%08X\n",
+			stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
+
+	region_length -= (*iova_start)&PAGE_MASK;
+
+	/* Make the leaf PBL the root if only one PBL */
+	if (root_pbl_index == 1) {
+		root_vpbl.pbl_pbase = vpbl.pbl_pbase;
+	}
+
+	if (single_page) {
+		pbl_count = 0;
+	} else {
+		pbl_count = root_pbl_index;
+	}
+	ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
+			buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start);
+
+	if (ret == 0) {
+		nesmr->ibmr.rkey = stag;
+		nesmr->ibmr.lkey = stag;
+		nesmr->mode = IWNES_MEMREG_TYPE_MEM;
+		ibmr = &nesmr->ibmr;
+		nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0;
+		nesmr->pbls_used = pbl_count;
+		if (pbl_count > 1) {
+			nesmr->pbls_used++;
+		}
+	} else {
+		kfree(nesmr);
+		ibmr = ERR_PTR(-ENOMEM);
+	}
+
+	reg_phys_err:
+	/* free the resources */
+	if (root_pbl_index == 1) {
+		/* single PBL case */
+		pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
+	} else {
+		for (i=0; i<root_pbl_index; i++) {
+			pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase,
+					root_vpbl.leaf_vpbl[i].pbl_pbase);
+		}
+		kfree(root_vpbl.leaf_vpbl);
+		pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+				root_vpbl.pbl_pbase);
+	}
+
+	return ibmr;
+}
+
+
+/**
+ * nes_get_dma_mr
+ */
+static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	struct ib_phys_buf bl;
+	u64 kva = 0;
+
+	nes_debug(NES_DBG_MR, "\n");
+
+	bl.size = (u64)0xffffffffffULL;
+	bl.addr = 0;
+	return nes_reg_phys_mr(pd, &bl, 1, acc, &kva);
+}
+
+
+/**
+ * nes_reg_user_mr
+ */
+static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+		u64 virt, int acc, struct ib_udata *udata)
+{
+	u64 iova_start;
+	__le64 *pbl;
+	u64 region_length;
+	dma_addr_t last_dma_addr = 0;
+	dma_addr_t first_dma_addr = 0;
+	struct nes_pd *nespd = to_nespd(pd);
+	struct nes_vnic *nesvnic = to_nesvnic(pd->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct ib_mr *ibmr = ERR_PTR(-EINVAL);
+	struct ib_umem_chunk *chunk;
+	struct nes_ucontext *nes_ucontext;
+	struct nes_pbl *nespbl;
+	struct nes_mr *nesmr;
+	struct ib_umem *region;
+	struct nes_mem_reg_req req;
+	struct nes_vpbl vpbl;
+	struct nes_root_vpbl root_vpbl;
+	int nmap_index, page_index;
+	int page_count = 0;
+	int err, pbl_depth = 0;
+	int chunk_pages;
+	int ret;
+	u32 stag;
+	u32 stag_index = 0;
+	u32 next_stag_index;
+	u32 driver_key;
+	u32 root_pbl_index = 0;
+	u32 cur_pbl_index = 0;
+	u32 skip_pages;
+	u16 pbl_count;
+	u8 single_page = 1;
+	u8 stag_key;
+
+	region = ib_umem_get(pd->uobject->context, start, length, acc);
+	if (IS_ERR(region)) {
+		return (struct ib_mr *)region;
+	}
+
+	nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
+			" offset = %u, page size = %u.\n",
+			(unsigned long int)start, (unsigned long int)virt, (u32)length,
+			region->offset, region->page_size);
+
+	skip_pages = ((u32)region->offset) >> 12;
+
+	if (ib_copy_from_udata(&req, udata, sizeof(req)))
+		return ERR_PTR(-EFAULT);
+	nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type);
+
+	switch (req.reg_type) {
+		case IWNES_MEMREG_TYPE_MEM:
+			pbl_depth = 0;
+			region_length = 0;
+			vpbl.pbl_vbase = NULL;
+			root_vpbl.pbl_vbase = NULL;
+			root_vpbl.pbl_pbase = 0;
+
+			get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+			stag_key = (u8)next_stag_index;
+
+			driver_key = next_stag_index & 0x70000000;
+
+			next_stag_index >>= 8;
+			next_stag_index %= nesadapter->max_mr;
+
+			err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
+					nesadapter->max_mr, &stag_index, &next_stag_index);
+			if (err) {
+				ib_umem_release(region);
+				return ERR_PTR(err);
+			}
+
+			nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+			if (!nesmr) {
+				ib_umem_release(region);
+				nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+				return ERR_PTR(-ENOMEM);
+			}
+			nesmr->region = region;
+
+			list_for_each_entry(chunk, &region->chunk_list, list) {
+				nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n",
+						chunk->nents, chunk->nmap);
+				for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
+					if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) {
+						ib_umem_release(region);
+						nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+						nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
+								(unsigned int) sg_dma_address(&chunk->page_list[nmap_index]));
+						ibmr = ERR_PTR(-EINVAL);
+						kfree(nesmr);
+						goto reg_user_mr_err;
+					}
+
+					if (!sg_dma_len(&chunk->page_list[nmap_index])) {
+						ib_umem_release(region);
+						nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+								stag_index);
+						nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
+						ibmr = ERR_PTR(-EINVAL);
+						kfree(nesmr);
+						goto reg_user_mr_err;
+					}
+
+					region_length += sg_dma_len(&chunk->page_list[nmap_index]);
+					chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
+					region_length -= skip_pages << 12;
+					for (page_index=skip_pages; page_index < chunk_pages; page_index++) {
+						skip_pages = 0;
+						if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length)
+							goto enough_pages;
+						if ((page_count&0x01FF) == 0) {
+							if (page_count>(1024*512)) {
+								ib_umem_release(region);
+								pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+										vpbl.pbl_pbase);
+								nes_free_resource(nesadapter,
+										nesadapter->allocated_mrs, stag_index);
+								kfree(nesmr);
+								ibmr = ERR_PTR(-E2BIG);
+								goto reg_user_mr_err;
+							}
+							if (root_pbl_index == 1) {
+								root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
+										8192, &root_vpbl.pbl_pbase);
+								nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
+										root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
+								if (!root_vpbl.pbl_vbase) {
+									ib_umem_release(region);
+									pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+											vpbl.pbl_pbase);
+									nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+											stag_index);
+									kfree(nesmr);
+									ibmr = ERR_PTR(-ENOMEM);
+									goto reg_user_mr_err;
+								}
+								root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
+										GFP_KERNEL);
+								if (!root_vpbl.leaf_vpbl) {
+									ib_umem_release(region);
+									pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+											root_vpbl.pbl_pbase);
+									pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+											vpbl.pbl_pbase);
+									nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+											stag_index);
+									kfree(nesmr);
+									ibmr = ERR_PTR(-ENOMEM);
+									goto reg_user_mr_err;
+								}
+								root_vpbl.pbl_vbase[0].pa_low =
+										cpu_to_le32((u32)vpbl.pbl_pbase);
+								root_vpbl.pbl_vbase[0].pa_high =
+										cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
+								root_vpbl.leaf_vpbl[0] = vpbl;
+							}
+							vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+									&vpbl.pbl_pbase);
+							nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
+									vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
+							if (!vpbl.pbl_vbase) {
+								ib_umem_release(region);
+								nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+								ibmr = ERR_PTR(-ENOMEM);
+								kfree(nesmr);
+								goto reg_user_mr_err;
+							}
+							if (1 <= root_pbl_index) {
+								root_vpbl.pbl_vbase[root_pbl_index].pa_low =
+										cpu_to_le32((u32)vpbl.pbl_pbase);
+								root_vpbl.pbl_vbase[root_pbl_index].pa_high =
+										cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
+								root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
+							}
+							root_pbl_index++;
+							cur_pbl_index = 0;
+						}
+						if (single_page) {
+							if (page_count != 0) {
+								if ((last_dma_addr+4096) !=
+										(sg_dma_address(&chunk->page_list[nmap_index])+
+										(page_index*4096)))
+									single_page = 0;
+								last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
+										(page_index*4096);
+							} else {
+								first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
+										(page_index*4096);
+								last_dma_addr = first_dma_addr;
+							}
+						}
+
+						vpbl.pbl_vbase[cur_pbl_index].pa_low =
+								cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+
+								(page_index*4096)));
+						vpbl.pbl_vbase[cur_pbl_index].pa_high =
+								cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+
+								(page_index*4096))) >> 32)));
+						cur_pbl_index++;
+						page_count++;
+					}
+				}
+			}
+			enough_pages:
+			nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x,"
+					" stag_key=0x%08x\n",
+					stag_index, driver_key, stag_key);
+			stag = stag_index << 8;
+			stag |= driver_key;
+			stag += (u32)stag_key;
+			if (stag == 0) {
+				stag = 1;
+			}
+
+			iova_start = virt;
+			/* Make the leaf PBL the root if only one PBL */
+			if (root_pbl_index == 1) {
+				root_vpbl.pbl_pbase = vpbl.pbl_pbase;
+			}
+
+			if (single_page) {
+				pbl_count = 0;
+			} else {
+				pbl_count = root_pbl_index;
+				first_dma_addr = 0;
+			}
+			nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%08X, length = 0x%08X,"
+					" index = 0x%08X, region->length=0x%08llx, pbl_count = %u\n",
+					stag, (unsigned int)iova_start,
+					(unsigned int)region_length, stag_index,
+					(unsigned long long)region->length, pbl_count);
+			ret = nes_reg_mr( nesdev, nespd, stag, region->length, &root_vpbl,
+					first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, &iova_start);
+
+			nes_debug(NES_DBG_MR, "ret=%d\n", ret);
+
+			if (ret == 0) {
+				nesmr->ibmr.rkey = stag;
+				nesmr->ibmr.lkey = stag;
+				nesmr->mode = IWNES_MEMREG_TYPE_MEM;
+				ibmr = &nesmr->ibmr;
+				nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0;
+				nesmr->pbls_used = pbl_count;
+				if (pbl_count > 1) {
+					nesmr->pbls_used++;
+				}
+			} else {
+				ib_umem_release(region);
+				kfree(nesmr);
+				ibmr = ERR_PTR(-ENOMEM);
+			}
+
+			reg_user_mr_err:
+			/* free the resources */
+			if (root_pbl_index == 1) {
+				pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+						vpbl.pbl_pbase);
+			} else {
+				for (page_index=0; page_index<root_pbl_index; page_index++) {
+					pci_free_consistent(nesdev->pcidev, 4096,
+							root_vpbl.leaf_vpbl[page_index].pbl_vbase,
+							root_vpbl.leaf_vpbl[page_index].pbl_pbase);
+				}
+				kfree(root_vpbl.leaf_vpbl);
+				pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+						root_vpbl.pbl_pbase);
+			}
+
+			nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr);
+
+			return ibmr;
+			break;
+		case IWNES_MEMREG_TYPE_QP:
+		case IWNES_MEMREG_TYPE_CQ:
+			nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
+			if (!nespbl) {
+				nes_debug(NES_DBG_MR, "Unable to allocate PBL\n");
+				ib_umem_release(region);
+				return ERR_PTR(-ENOMEM);
+			}
+			nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+			if (!nesmr) {
+				ib_umem_release(region);
+				kfree(nespbl);
+				nes_debug(NES_DBG_MR, "Unable to allocate nesmr\n");
+				return ERR_PTR(-ENOMEM);
+			}
+			nesmr->region = region;
+			nes_ucontext = to_nesucontext(pd->uobject->context);
+			pbl_depth = region->length >> 12;
+			pbl_depth += (region->length & (4096-1)) ? 1 : 0;
+			nespbl->pbl_size = pbl_depth*sizeof(u64);
+			if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
+				nes_debug(NES_DBG_MR, "Attempting to allocate QP PBL memory");
+			} else {
+				nes_debug(NES_DBG_MR, "Attempting to allocate CP PBL memory");
+			}
+
+			nes_debug(NES_DBG_MR, " %u bytes, %u entries.\n",
+					nespbl->pbl_size, pbl_depth);
+			pbl = pci_alloc_consistent(nesdev->pcidev, nespbl->pbl_size,
+					&nespbl->pbl_pbase);
+			if (!pbl) {
+				ib_umem_release(region);
+				kfree(nesmr);
+				kfree(nespbl);
+				nes_debug(NES_DBG_MR, "Unable to allocate PBL memory\n");
+				return ERR_PTR(-ENOMEM);
+			}
+
+			nespbl->pbl_vbase = (u64 *)pbl;
+			nespbl->user_base = start;
+			nes_debug(NES_DBG_MR, "Allocated PBL memory, %u bytes, pbl_pbase=%p,"
+					" pbl_vbase=%p user_base=0x%lx\n",
+					nespbl->pbl_size, (void *)nespbl->pbl_pbase,
+					(void*)nespbl->pbl_vbase, nespbl->user_base);
+
+			list_for_each_entry(chunk, &region->chunk_list, list) {
+				for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
+					chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
+					chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0;
+					nespbl->page = sg_page(&chunk->page_list[0]);
+					for (page_index=0; page_index<chunk_pages; page_index++) {
+						((__le32 *)pbl)[0] = cpu_to_le32((u32)
+								(sg_dma_address(&chunk->page_list[nmap_index])+
+								(page_index*4096)));
+						((__le32 *)pbl)[1] = cpu_to_le32(((u64)
+								(sg_dma_address(&chunk->page_list[nmap_index])+
+								(page_index*4096)))>>32);
+						nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
+								(unsigned long long)*pbl,
+								le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
+						pbl++;
+					}
+				}
+			}
+			if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
+				list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list);
+			} else {
+				list_add_tail(&nespbl->list, &nes_ucontext->cq_reg_mem_list);
+			}
+			nesmr->ibmr.rkey = -1;
+			nesmr->ibmr.lkey = -1;
+			nesmr->mode = req.reg_type;
+			return &nesmr->ibmr;
+			break;
+	}
+
+	return ERR_PTR(-ENOSYS);
+}
+
+
+/**
+ * nes_dereg_mr
+ */
+static int nes_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct nes_mr *nesmr = to_nesmr(ib_mr);
+	struct nes_vnic *nesvnic = to_nesvnic(ib_mr->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_cqp_request *cqp_request;
+	unsigned long flags;
+	int ret;
+	u16 major_code;
+	u16 minor_code;
+
+	if (nesmr->region) {
+		ib_umem_release(nesmr->region);
+	}
+	if (nesmr->mode != IWNES_MEMREG_TYPE_MEM) {
+		kfree(nesmr);
+		return 0;
+	}
+
+	/* Deallocate the region with the adapter */
+
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
+		return -ENOMEM;
+	}
+	cqp_request->waiting = 1;
+	cqp_wqe = &cqp_request->cqp_wqe;
+
+	spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+	if (nesmr->pbls_used != 0) {
+		if (nesmr->pbl_4k) {
+			nesadapter->free_4kpbl += nesmr->pbls_used;
+			if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) {
+				printk(KERN_ERR PFX "free 4KB PBLs(%u) has exceeded the max(%u)\n",
+						nesadapter->free_4kpbl, nesadapter->max_4kpbl);
+			}
+		} else {
+			nesadapter->free_256pbl += nesmr->pbls_used;
+			if (nesadapter->free_256pbl > nesadapter->max_256pbl) {
+				printk(KERN_ERR PFX "free 256B PBLs(%u) has exceeded the max(%u)\n",
+						nesadapter->free_256pbl, nesadapter->max_256pbl);
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+			NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO |
+			NES_CQP_STAG_DEALLOC_PBLS | NES_CQP_STAG_MR);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey);
+
+	atomic_set(&cqp_request->refcount, 2);
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+	/* Wait for CQP */
+	nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey);
+	ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+			NES_EVENT_TIMEOUT);
+	nes_debug(NES_DBG_MR, "Deallocate STag 0x%08X completed, wait_event_timeout ret = %u,"
+			" CQP Major:Minor codes = 0x%04X:0x%04X\n",
+			ib_mr->rkey, ret, cqp_request->major_code, cqp_request->minor_code);
+
+	nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+			(ib_mr->rkey & 0x0fffff00) >> 8);
+
+	kfree(nesmr);
+
+	major_code = cqp_request->major_code;
+	minor_code = cqp_request->minor_code;
+	if (atomic_dec_and_test(&cqp_request->refcount)) {
+		if (cqp_request->dynamic) {
+			kfree(cqp_request);
+		} else {
+			spin_lock_irqsave(&nesdev->cqp.lock, flags);
+			list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+			spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+		}
+	}
+	if (!ret) {
+		nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag,"
+				" ib_mr=%p, rkey = 0x%08X\n",
+				ib_mr, ib_mr->rkey);
+		return -ETIME;
+	} else if (major_code) {
+		nes_debug(NES_DBG_MR, "Error (0x%04X:0x%04X) while attempting"
+				" to destroy STag, ib_mr=%p, rkey = 0x%08X\n",
+				major_code, minor_code, ib_mr, ib_mr->rkey);
+		return -EIO;
+	} else
+		return 0;
+}
+
+
+/**
+ * show_rev
+ */
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+	struct nes_ib_device *nesibdev =
+			container_of(cdev, struct nes_ib_device, ibdev.class_dev);
+	struct nes_vnic *nesvnic = nesibdev->nesvnic;
+
+	nes_debug(NES_DBG_INIT, "\n");
+	return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev);
+}
+
+
+/**
+ * show_fw_ver
+ */
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+	struct nes_ib_device *nesibdev =
+			container_of(cdev, struct nes_ib_device, ibdev.class_dev);
+	struct nes_vnic *nesvnic = nesibdev->nesvnic;
+
+	nes_debug(NES_DBG_INIT, "\n");
+	return sprintf(buf, "%x.%x.%x\n",
+			(int)(nesvnic->nesdev->nesadapter->fw_ver >> 32),
+			(int)(nesvnic->nesdev->nesadapter->fw_ver >> 16) & 0xffff,
+			(int)(nesvnic->nesdev->nesadapter->fw_ver & 0xffff));
+}
+
+
+/**
+ * show_hca
+ */
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+	nes_debug(NES_DBG_INIT, "\n");
+	return sprintf(buf, "NES020\n");
+}
+
+
+/**
+ * show_board
+ */
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+	nes_debug(NES_DBG_INIT, "\n");
+	return sprintf(buf, "%.*s\n", 32, "NES020 Board ID");
+}
+
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct class_device_attribute *nes_class_attributes[] = {
+	&class_device_attr_hw_rev,
+	&class_device_attr_fw_ver,
+	&class_device_attr_hca_type,
+	&class_device_attr_board_id
+};
+
+
+/**
+ * nes_query_qp
+ */
+static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+	struct nes_qp *nesqp = to_nesqp(ibqp);
+
+	nes_debug(NES_DBG_QP, "\n");
+
+	attr->qp_access_flags = 0;
+	attr->cap.max_send_wr = nesqp->hwqp.sq_size;
+	attr->cap.max_recv_wr = nesqp->hwqp.rq_size;
+	attr->cap.max_recv_sge = 1;
+	if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) {
+		init_attr->cap.max_inline_data = 0;
+	} else {
+		init_attr->cap.max_inline_data = 64;
+	}
+
+	init_attr->event_handler = nesqp->ibqp.event_handler;
+	init_attr->qp_context = nesqp->ibqp.qp_context;
+	init_attr->send_cq = nesqp->ibqp.send_cq;
+	init_attr->recv_cq = nesqp->ibqp.recv_cq;
+	init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq;
+	init_attr->cap = attr->cap;
+
+	return 0;
+}
+
+
+/**
+ * nes_hw_modify_qp
+ */
+int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
+		u32 next_iwarp_state, u32 wait_completion)
+{
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	/* struct iw_cm_id *cm_id = nesqp->cm_id; */
+	/* struct iw_cm_event cm_event; */
+	struct nes_cqp_request *cqp_request;
+	unsigned long flags;
+	int ret;
+	u16 major_code;
+
+	nes_debug(NES_DBG_MOD_QP, "QP%u, refcount=%d\n",
+			nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
+
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_MOD_QP, "Failed to get a cqp_request.\n");
+		return -ENOMEM;
+	}
+	if (wait_completion) {
+		cqp_request->waiting = 1;
+	} else {
+		cqp_request->waiting = 0;
+	}
+	cqp_wqe = &cqp_request->cqp_wqe;
+
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+			NES_CQP_MODIFY_QP | NES_CQP_QP_TYPE_IWARP | next_iwarp_state);
+	nes_debug(NES_DBG_MOD_QP, "using next_iwarp_state=%08x, wqe_words=%08x\n",
+			next_iwarp_state, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]));
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
+
+	atomic_set(&cqp_request->refcount, 2);
+	nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+
+	/* Wait for CQP */
+	if (wait_completion) {
+		/* nes_debug(NES_DBG_MOD_QP, "Waiting for modify iWARP QP%u to complete.\n",
+				nesqp->hwqp.qp_id); */
+		ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+				NES_EVENT_TIMEOUT);
+		nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u completed, wait_event_timeout ret=%u, "
+				"CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+				nesqp->hwqp.qp_id, ret, cqp_request->major_code, cqp_request->minor_code);
+		major_code = cqp_request->major_code;
+		if (major_code) {
+			nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u failed"
+					"CQP Major:Minor codes = 0x%04X:0x%04X, intended next state = 0x%08X.\n",
+					nesqp->hwqp.qp_id, cqp_request->major_code,
+					cqp_request->minor_code, next_iwarp_state);
+		}
+		if (atomic_dec_and_test(&cqp_request->refcount)) {
+			if (cqp_request->dynamic) {
+				kfree(cqp_request);
+			} else {
+				spin_lock_irqsave(&nesdev->cqp.lock, flags);
+				list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+				spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			}
+		}
+		if (!ret)
+			return -ETIME;
+		else if (major_code)
+			return -EIO;
+		else
+			return 0;
+	} else {
+		return 0;
+	}
+}
+
+
+/**
+ * nes_modify_qp
+ */
+int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		int attr_mask, struct ib_udata *udata)
+{
+	struct nes_qp *nesqp = to_nesqp(ibqp);
+	struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	/* u32 cqp_head; */
+	/* u32 counter; */
+	u32 next_iwarp_state = 0;
+	int err;
+	unsigned long qplockflags;
+	int ret;
+	u16 original_last_aeq;
+	u8 issue_modify_qp = 0;
+	u8 issue_disconnect = 0;
+	u8 dont_wait = 0;
+
+	nes_debug(NES_DBG_MOD_QP, "QP%u: QP State=%u, cur QP State=%u,"
+			" iwarp_state=0x%X, refcount=%d\n",
+			nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state,
+			nesqp->iwarp_state, atomic_read(&nesqp->refcount));
+
+	nes_add_ref(&nesqp->ibqp);
+	spin_lock_irqsave(&nesqp->lock, qplockflags);
+
+	nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X,"
+			" QP Access Flags=0x%X, attr_mask = 0x%0x\n",
+			nesqp->hwqp.qp_id, nesqp->hw_iwarp_state,
+			nesqp->hw_tcp_state, attr->qp_access_flags, attr_mask);
+
+	if (attr_mask & IB_QP_STATE) {
+		switch (attr->qp_state) {
+			case IB_QPS_INIT:
+				nes_debug(NES_DBG_MOD_QP, "QP%u: new state = init\n",
+						nesqp->hwqp.qp_id);
+				if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) {
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_rem_ref(&nesqp->ibqp);
+					return -EINVAL;
+				}
+				next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
+				issue_modify_qp = 1;
+				break;
+			case IB_QPS_RTR:
+				nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rtr\n",
+						nesqp->hwqp.qp_id);
+				if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) {
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_rem_ref(&nesqp->ibqp);
+					return -EINVAL;
+				}
+				next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
+				issue_modify_qp = 1;
+				break;
+			case IB_QPS_RTS:
+				nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rts\n",
+						nesqp->hwqp.qp_id);
+				if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) {
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_rem_ref(&nesqp->ibqp);
+					return -EINVAL;
+				}
+				if (nesqp->cm_id == NULL) {
+					nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n",
+							nesqp->hwqp.qp_id );
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_rem_ref(&nesqp->ibqp);
+					return -EINVAL;
+				}
+				next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS;
+				if (nesqp->iwarp_state != NES_CQP_QP_IWARP_STATE_RTS)
+					next_iwarp_state |= NES_CQP_QP_CONTEXT_VALID |
+							NES_CQP_QP_ARP_VALID | NES_CQP_QP_ORD_VALID;
+				issue_modify_qp = 1;
+				nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_ESTABLISHED;
+				nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_RTS;
+				nesqp->hte_added = 1;
+				break;
+			case IB_QPS_SQD:
+				issue_modify_qp = 1;
+				nes_debug(NES_DBG_MOD_QP, "QP%u: new state=closing. SQ head=%u, SQ tail=%u\n",
+						nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail);
+				if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_rem_ref(&nesqp->ibqp);
+					return 0;
+				} else {
+					if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
+						nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing"
+								" ignored due to current iWARP state\n",
+								nesqp->hwqp.qp_id);
+						spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+						nes_rem_ref(&nesqp->ibqp);
+						return -EINVAL;
+					}
+					if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) {
+						nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing"
+								" already done based on hw state.\n",
+								nesqp->hwqp.qp_id);
+						issue_modify_qp = 0;
+						nesqp->in_disconnect = 0;
+					}
+					switch (nesqp->hw_iwarp_state) {
+						case NES_AEQE_IWARP_STATE_CLOSING:
+							next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
+						case NES_AEQE_IWARP_STATE_TERMINATE:
+							next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
+							break;
+						case NES_AEQE_IWARP_STATE_ERROR:
+							next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
+							break;
+						default:
+							next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
+							nesqp->in_disconnect = 1;
+							nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
+							break;
+					}
+				}
+				break;
+			case IB_QPS_SQE:
+				nes_debug(NES_DBG_MOD_QP, "QP%u: new state = terminate\n",
+						nesqp->hwqp.qp_id);
+				if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) {
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_rem_ref(&nesqp->ibqp);
+					return -EINVAL;
+				}
+				/* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
+				next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
+				nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
+				issue_modify_qp = 1;
+				nesqp->in_disconnect = 1;
+				break;
+			case IB_QPS_ERR:
+			case IB_QPS_RESET:
+				if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) {
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_rem_ref(&nesqp->ibqp);
+					return -EINVAL;
+				}
+				nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
+						nesqp->hwqp.qp_id);
+				next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
+				/* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
+					if (nesqp->hte_added) {
+						nes_debug(NES_DBG_MOD_QP, "set CQP_QP_DEL_HTE\n");
+						next_iwarp_state |= NES_CQP_QP_DEL_HTE;
+						nesqp->hte_added = 0;
+					}
+				if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) &&
+						(nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) {
+					next_iwarp_state |= NES_CQP_QP_RESET;
+					nesqp->in_disconnect = 1;
+				} else {
+					nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n",
+							nesqp->hwqp.qp_id, nesqp->hw_tcp_state);
+					dont_wait = 1;
+				}
+				issue_modify_qp = 1;
+				nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
+				break;
+			default:
+				spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+				nes_rem_ref(&nesqp->ibqp);
+				return -EINVAL;
+				break;
+		}
+
+		nesqp->ibqp_state = attr->qp_state;
+		if (((nesqp->iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) ==
+				(u32)NES_CQP_QP_IWARP_STATE_RTS) &&
+				((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) >
+				(u32)NES_CQP_QP_IWARP_STATE_RTS)) {
+			nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
+			nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
+					nesqp->iwarp_state);
+			issue_disconnect = 1;
+		} else {
+			nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
+			nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
+					nesqp->iwarp_state);
+		}
+	}
+
+	if (attr_mask & IB_QP_ACCESS_FLAGS) {
+		if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) {
+			nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN |
+					NES_QPCONTEXT_MISC_RDMA_READ_EN);
+			issue_modify_qp = 1;
+		}
+		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) {
+			nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN);
+			issue_modify_qp = 1;
+		}
+		if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) {
+			nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_READ_EN);
+			issue_modify_qp = 1;
+		}
+		if (attr->qp_access_flags & IB_ACCESS_MW_BIND) {
+			nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WBIND_EN);
+			issue_modify_qp = 1;
+		}
+
+		if (nesqp->user_mode) {
+			nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN |
+					NES_QPCONTEXT_MISC_RDMA_READ_EN);
+			issue_modify_qp = 1;
+		}
+	}
+
+	original_last_aeq = nesqp->last_aeq;
+	spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+
+	nes_debug(NES_DBG_MOD_QP, "issue_modify_qp=%u\n", issue_modify_qp);
+
+	ret = 0;
+
+
+	if (issue_modify_qp) {
+		nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n");
+		ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 1);
+		if (ret)
+			nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)"
+					" failed for QP%u.\n",
+					next_iwarp_state, nesqp->hwqp.qp_id);
+
+	}
+
+	if ((issue_modify_qp) && (nesqp->ibqp_state > IB_QPS_RTS)) {
+		nes_debug(NES_DBG_MOD_QP, "QP%u Issued ModifyQP refcount (%d),"
+				" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
+				nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+				original_last_aeq, nesqp->last_aeq);
+		if ((!ret) ||
+				((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) &&
+				(ret))) {
+			if (dont_wait) {
+				if (nesqp->cm_id && nesqp->hw_tcp_state != 0) {
+					nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d),"
+							" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
+							nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+							original_last_aeq, nesqp->last_aeq);
+					/* this one is for the cm_disconnect thread */
+					nes_add_ref(&nesqp->ibqp);
+					spin_lock_irqsave(&nesqp->lock, qplockflags);
+					nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+					nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_cm_disconn(nesqp);
+				} else {
+					nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n",
+							nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
+					nes_rem_ref(&nesqp->ibqp);
+				}
+			} else {
+				spin_lock_irqsave(&nesqp->lock, qplockflags);
+				if (nesqp->cm_id) {
+					/* These two are for the timer thread */
+					if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
+						nes_add_ref(&nesqp->ibqp);
+						nesqp->cm_id->add_ref(nesqp->cm_id);
+						nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
+								" need ae to finish up, original_last_aeq = 0x%04X."
+								" last_aeq = 0x%04X, scheduling timer.\n",
+								nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+								original_last_aeq, nesqp->last_aeq);
+						schedule_nes_timer(nesqp->cm_node, (struct sk_buff *) nesqp, NES_TIMER_TYPE_CLOSE, 1, 0);
+					}
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+				} else {
+					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+					nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
+							" need ae to finish up, original_last_aeq = 0x%04X."
+							" last_aeq = 0x%04X.\n",
+							nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+							original_last_aeq, nesqp->last_aeq);
+				}
+			}
+		} else {
+			nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
+					" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
+					nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+					original_last_aeq, nesqp->last_aeq);
+			nes_rem_ref(&nesqp->ibqp);
+		}
+	} else {
+		nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
+				" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
+				nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+				original_last_aeq, nesqp->last_aeq);
+		nes_rem_ref(&nesqp->ibqp);
+	}
+
+	err = 0;
+
+	nes_debug(NES_DBG_MOD_QP, "QP%u Leaving, refcount=%d\n",
+			nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
+
+	return err;
+}
+
+
+/**
+ * nes_muticast_attach
+ */
+static int nes_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	nes_debug(NES_DBG_INIT, "\n");
+	return -ENOSYS;
+}
+
+
+/**
+ * nes_multicast_detach
+ */
+static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	nes_debug(NES_DBG_INIT, "\n");
+	return -ENOSYS;
+}
+
+
+/**
+ * nes_process_mad
+ */
+static int nes_process_mad(struct ib_device *ibdev, int mad_flags,
+		u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh,
+		struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	nes_debug(NES_DBG_INIT, "\n");
+	return -ENOSYS;
+}
+
+static inline void
+fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey)
+{
+	int sge_index;
+	int total_payload_length = 0;
+	for (sge_index = 0; sge_index < ib_wr->num_sge; sge_index++) {
+		set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX+(sge_index*4),
+			ib_wr->sg_list[sge_index].addr);
+		set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_LENGTH0_IDX + (sge_index*4),
+			ib_wr->sg_list[sge_index].length);
+		if (uselkey)
+			set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4),
+						(ib_wr->sg_list[sge_index].lkey));
+		else
+			set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4), 0);
+
+		total_payload_length += ib_wr->sg_list[sge_index].length;
+	}
+	nes_debug(NES_DBG_IW_TX, "UC UC UC, sending total_payload_length=%u \n",
+			total_payload_length);
+	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+				total_payload_length);
+}
+
+/**
+ * nes_post_send
+ */
+static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+		struct ib_send_wr **bad_wr)
+{
+	u64 u64temp;
+	unsigned long flags = 0;
+	struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_qp *nesqp = to_nesqp(ibqp);
+	struct nes_hw_qp_wqe *wqe;
+	int err;
+	u32 qsize = nesqp->hwqp.sq_size;
+	u32 head;
+	u32 wqe_misc;
+	u32 wqe_count;
+	u32 counter;
+	u32 total_payload_length;
+
+	err = 0;
+	wqe_misc = 0;
+	wqe_count = 0;
+	total_payload_length = 0;
+
+	if (nesqp->ibqp_state > IB_QPS_RTS)
+		return -EINVAL;
+
+		spin_lock_irqsave(&nesqp->lock, flags);
+
+	head = nesqp->hwqp.sq_head;
+
+	while (ib_wr) {
+		/* Check for SQ overflow */
+		if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
+			err = -EINVAL;
+			break;
+		}
+
+		wqe = &nesqp->hwqp.sq_vbase[head];
+		/* nes_debug(NES_DBG_IW_TX, "processing sq wqe for QP%u at %p, head = %u.\n",
+				nesqp->hwqp.qp_id, wqe, head); */
+		nes_fill_init_qp_wqe(wqe, nesqp, head);
+		u64temp = (u64)(ib_wr->wr_id);
+		set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
+					u64temp);
+			switch (ib_wr->opcode) {
+				case IB_WR_SEND:
+					if (ib_wr->send_flags & IB_SEND_SOLICITED) {
+						wqe_misc = NES_IWARP_SQ_OP_SENDSE;
+					} else {
+						wqe_misc = NES_IWARP_SQ_OP_SEND;
+					}
+					if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+						err = -EINVAL;
+						break;
+					}
+					if (ib_wr->send_flags & IB_SEND_FENCE) {
+						wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+					}
+					if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+							((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+							(ib_wr->sg_list[0].length <= 64)) {
+						memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+							       (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+						set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+								ib_wr->sg_list[0].length);
+						wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+					} else {
+						fill_wqe_sg_send(wqe, ib_wr, 1);
+					}
+
+					break;
+				case IB_WR_RDMA_WRITE:
+					wqe_misc = NES_IWARP_SQ_OP_RDMAW;
+					if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+						nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
+								ib_wr->num_sge,
+								nesdev->nesadapter->max_sge);
+						err = -EINVAL;
+						break;
+					}
+					if (ib_wr->send_flags & IB_SEND_FENCE) {
+						wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+					}
+
+					set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+							ib_wr->wr.rdma.rkey);
+					set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+							ib_wr->wr.rdma.remote_addr);
+
+					if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+							((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+							(ib_wr->sg_list[0].length <= 64)) {
+						memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+							       (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+						set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+								ib_wr->sg_list[0].length);
+						wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+					} else {
+						fill_wqe_sg_send(wqe, ib_wr, 1);
+					}
+					wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
+							wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
+					break;
+				case IB_WR_RDMA_READ:
+					/* iWARP only supports 1 sge for RDMA reads */
+					if (ib_wr->num_sge > 1) {
+						nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
+								ib_wr->num_sge);
+						err = -EINVAL;
+						break;
+					}
+					wqe_misc = NES_IWARP_SQ_OP_RDMAR;
+					set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+							ib_wr->wr.rdma.remote_addr);
+					set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+							ib_wr->wr.rdma.rkey);
+					set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
+							ib_wr->sg_list->length);
+					set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
+							ib_wr->sg_list->addr);
+					set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
+							ib_wr->sg_list->lkey);
+					break;
+				default:
+					/* error */
+					err = -EINVAL;
+					break;
+			}
+
+		if (ib_wr->send_flags & IB_SEND_SIGNALED) {
+			wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+		}
+		wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc);
+
+		ib_wr = ib_wr->next;
+		head++;
+		wqe_count++;
+		if (head >= qsize)
+			head = 0;
+
+	}
+
+	nesqp->hwqp.sq_head = head;
+	barrier();
+	while (wqe_count) {
+		counter = min(wqe_count, ((u32)255));
+		wqe_count -= counter;
+		nes_write32(nesdev->regs + NES_WQE_ALLOC,
+				(counter << 24) | 0x00800000 | nesqp->hwqp.qp_id);
+	}
+
+		spin_unlock_irqrestore(&nesqp->lock, flags);
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+
+/**
+ * nes_post_recv
+ */
+static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+		struct ib_recv_wr **bad_wr)
+{
+	u64 u64temp;
+	unsigned long flags = 0;
+	struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_qp *nesqp = to_nesqp(ibqp);
+	struct nes_hw_qp_wqe *wqe;
+	int err = 0;
+	int sge_index;
+	u32 qsize = nesqp->hwqp.rq_size;
+	u32 head;
+	u32 wqe_count = 0;
+	u32 counter;
+	u32 total_payload_length;
+
+	if (nesqp->ibqp_state > IB_QPS_RTS)
+		return -EINVAL;
+
+		spin_lock_irqsave(&nesqp->lock, flags);
+
+	head = nesqp->hwqp.rq_head;
+
+	while (ib_wr) {
+		if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+			err = -EINVAL;
+			break;
+		}
+		/* Check for RQ overflow */
+		if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) {
+			err = -EINVAL;
+			break;
+		}
+
+		nes_debug(NES_DBG_IW_RX, "ibwr sge count = %u.\n", ib_wr->num_sge);
+		wqe = &nesqp->hwqp.rq_vbase[head];
+
+		/* nes_debug(NES_DBG_IW_RX, "QP%u:processing rq wqe at %p, head = %u.\n",
+				nesqp->hwqp.qp_id, wqe, head); */
+		nes_fill_init_qp_wqe(wqe, nesqp, head);
+		u64temp = (u64)(ib_wr->wr_id);
+		set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
+					u64temp);
+		total_payload_length = 0;
+		for (sge_index=0; sge_index < ib_wr->num_sge; sge_index++) {
+			set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_FRAG0_LOW_IDX+(sge_index*4),
+					ib_wr->sg_list[sge_index].addr);
+			set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_LENGTH0_IDX+(sge_index*4),
+					ib_wr->sg_list[sge_index].length);
+			set_wqe_32bit_value(wqe->wqe_words,NES_IWARP_RQ_WQE_STAG0_IDX+(sge_index*4),
+					ib_wr->sg_list[sge_index].lkey);
+
+			total_payload_length += ib_wr->sg_list[sge_index].length;
+		}
+		set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX,
+					total_payload_length);
+
+		ib_wr = ib_wr->next;
+		head++;
+		wqe_count++;
+		if (head >= qsize)
+			head = 0;
+	}
+
+	nesqp->hwqp.rq_head = head;
+	barrier();
+	while (wqe_count) {
+		counter = min(wqe_count, ((u32)255));
+		wqe_count -= counter;
+		nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter<<24) | nesqp->hwqp.qp_id);
+	}
+
+		spin_unlock_irqrestore(&nesqp->lock, flags);
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+
+/**
+ * nes_poll_cq
+ */
+static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+	u64 u64temp;
+	u64 wrid;
+	/* u64 u64temp; */
+	unsigned long flags = 0;
+	struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_cq *nescq = to_nescq(ibcq);
+	struct nes_qp *nesqp;
+	struct nes_hw_cqe cqe;
+	u32 head;
+	u32 wq_tail;
+	u32 cq_size;
+	u32 cqe_count = 0;
+	u32 wqe_index;
+	u32 u32temp;
+	/* u32 counter; */
+
+	nes_debug(NES_DBG_CQ, "\n");
+
+		spin_lock_irqsave(&nescq->lock, flags);
+
+	head = nescq->hw_cq.cq_head;
+	cq_size = nescq->hw_cq.cq_size;
+
+	while (cqe_count < num_entries) {
+		if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
+				NES_CQE_VALID) {
+			cqe = nescq->hw_cq.cq_vbase[head];
+			nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
+			u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+			wqe_index = u32temp &
+					(nesdev->nesadapter->max_qp_wr - 1);
+			u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
+			/* parse CQE, get completion context from WQE (either rq or sq */
+			u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
+					((u64)u32temp);
+			nesqp = *((struct nes_qp **)&u64temp);
+			memset(entry, 0, sizeof *entry);
+			if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) {
+				entry->status = IB_WC_SUCCESS;
+			} else {
+				entry->status = IB_WC_WR_FLUSH_ERR;
+			}
+
+			entry->qp = &nesqp->ibqp;
+			entry->src_qp = nesqp->hwqp.qp_id;
+
+			if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) {
+				if (nesqp->skip_lsmm) {
+					nesqp->skip_lsmm = 0;
+					wq_tail = nesqp->hwqp.sq_tail++;
+				}
+
+				/* Working on a SQ Completion*/
+				wq_tail = wqe_index;
+				nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
+				wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+						wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
+						((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+						wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX])));
+				entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+						wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]);
+
+				switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+						wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) {
+					case NES_IWARP_SQ_OP_RDMAW:
+						nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n");
+						entry->opcode = IB_WC_RDMA_WRITE;
+						break;
+					case NES_IWARP_SQ_OP_RDMAR:
+						nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n");
+						entry->opcode = IB_WC_RDMA_READ;
+						entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+								wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]);
+						break;
+					case NES_IWARP_SQ_OP_SENDINV:
+					case NES_IWARP_SQ_OP_SENDSEINV:
+					case NES_IWARP_SQ_OP_SEND:
+					case NES_IWARP_SQ_OP_SENDSE:
+						nes_debug(NES_DBG_CQ, "Operation = Send.\n");
+						entry->opcode = IB_WC_SEND;
+						break;
+				}
+			} else {
+				/* Working on a RQ Completion*/
+				wq_tail = wqe_index;
+					nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
+				entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]);
+				wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
+					((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
+					entry->opcode = IB_WC_RECV;
+			}
+			entry->wr_id = wrid;
+
+			if (++head >= cq_size)
+				head = 0;
+			cqe_count++;
+			nescq->polled_completions++;
+			if ((nescq->polled_completions > (cq_size / 2)) ||
+					(nescq->polled_completions == 255)) {
+				nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes"
+						" are pending %u of %u.\n",
+						nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
+				nes_write32(nesdev->regs+NES_CQE_ALLOC,
+						nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
+				nescq->polled_completions = 0;
+			}
+			entry++;
+		} else
+			break;
+	}
+
+	if (nescq->polled_completions) {
+		nes_write32(nesdev->regs+NES_CQE_ALLOC,
+				nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
+		nescq->polled_completions = 0;
+	}
+
+	nescq->hw_cq.cq_head = head;
+	nes_debug(NES_DBG_CQ, "Reporting %u completions for CQ%u.\n",
+			cqe_count, nescq->hw_cq.cq_number);
+
+		spin_unlock_irqrestore(&nescq->lock, flags);
+
+	return cqe_count;
+}
+
+
+/**
+ * nes_req_notify_cq
+ */
+static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
+		{
+	struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_cq *nescq = to_nescq(ibcq);
+	u32 cq_arm;
+
+	nes_debug(NES_DBG_CQ, "Requesting notification for CQ%u.\n",
+			nescq->hw_cq.cq_number);
+
+	cq_arm = nescq->hw_cq.cq_number;
+	if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
+		cq_arm |= NES_CQE_ALLOC_NOTIFY_NEXT;
+	else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
+		cq_arm |= NES_CQE_ALLOC_NOTIFY_SE;
+	else
+		return -EINVAL;
+
+	nes_write32(nesdev->regs+NES_CQE_ALLOC, cq_arm);
+	nes_read32(nesdev->regs+NES_CQE_ALLOC);
+
+	return 0;
+}
+
+
+/**
+ * nes_init_ofa_device
+ */
+struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
+{
+	struct nes_ib_device *nesibdev;
+	struct nes_vnic *nesvnic = netdev_priv(netdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+
+	nesibdev = (struct nes_ib_device *)ib_alloc_device(sizeof(struct nes_ib_device));
+	if (nesibdev == NULL) {
+		return NULL;
+	}
+	strlcpy(nesibdev->ibdev.name, "nes%d", IB_DEVICE_NAME_MAX);
+	nesibdev->ibdev.owner = THIS_MODULE;
+
+	nesibdev->ibdev.node_type = RDMA_NODE_RNIC;
+	memset(&nesibdev->ibdev.node_guid, 0, sizeof(nesibdev->ibdev.node_guid));
+	memcpy(&nesibdev->ibdev.node_guid, netdev->dev_addr, 6);
+
+	nesibdev->ibdev.uverbs_cmd_mask =
+			(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+			(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+			(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+			(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+			(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+			(1ull << IB_USER_VERBS_CMD_REG_MR) |
+			(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+			(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+			(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+			(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+			(1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+			(1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
+			(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+			(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+			(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+			(1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+			(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+			(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
+			(1ull << IB_USER_VERBS_CMD_BIND_MW) |
+			(1ull << IB_USER_VERBS_CMD_DEALLOC_MW) |
+			(1ull << IB_USER_VERBS_CMD_POST_RECV) |
+			(1ull << IB_USER_VERBS_CMD_POST_SEND);
+
+	nesibdev->ibdev.phys_port_cnt = 1;
+	nesibdev->ibdev.num_comp_vectors = 1;
+	nesibdev->ibdev.dma_device = &nesdev->pcidev->dev;
+	nesibdev->ibdev.class_dev.dev = &nesdev->pcidev->dev;
+	nesibdev->ibdev.query_device = nes_query_device;
+	nesibdev->ibdev.query_port = nes_query_port;
+	nesibdev->ibdev.modify_port = nes_modify_port;
+	nesibdev->ibdev.query_pkey = nes_query_pkey;
+	nesibdev->ibdev.query_gid = nes_query_gid;
+	nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext;
+	nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext;
+	nesibdev->ibdev.mmap = nes_mmap;
+	nesibdev->ibdev.alloc_pd = nes_alloc_pd;
+	nesibdev->ibdev.dealloc_pd = nes_dealloc_pd;
+	nesibdev->ibdev.create_ah = nes_create_ah;
+	nesibdev->ibdev.destroy_ah = nes_destroy_ah;
+	nesibdev->ibdev.create_qp = nes_create_qp;
+	nesibdev->ibdev.modify_qp = nes_modify_qp;
+	nesibdev->ibdev.query_qp = nes_query_qp;
+	nesibdev->ibdev.destroy_qp = nes_destroy_qp;
+	nesibdev->ibdev.create_cq = nes_create_cq;
+	nesibdev->ibdev.destroy_cq = nes_destroy_cq;
+	nesibdev->ibdev.poll_cq = nes_poll_cq;
+	nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
+	nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr;
+	nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
+	nesibdev->ibdev.dereg_mr = nes_dereg_mr;
+	nesibdev->ibdev.alloc_mw = nes_alloc_mw;
+	nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
+	nesibdev->ibdev.bind_mw = nes_bind_mw;
+
+	nesibdev->ibdev.alloc_fmr = nes_alloc_fmr;
+	nesibdev->ibdev.unmap_fmr = nes_unmap_fmr;
+	nesibdev->ibdev.dealloc_fmr = nes_dealloc_fmr;
+	nesibdev->ibdev.map_phys_fmr = nes_map_phys_fmr;
+
+	nesibdev->ibdev.attach_mcast = nes_multicast_attach;
+	nesibdev->ibdev.detach_mcast = nes_multicast_detach;
+	nesibdev->ibdev.process_mad = nes_process_mad;
+
+	nesibdev->ibdev.req_notify_cq = nes_req_notify_cq;
+	nesibdev->ibdev.post_send = nes_post_send;
+	nesibdev->ibdev.post_recv = nes_post_recv;
+
+	nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL);
+	if (nesibdev->ibdev.iwcm == NULL) {
+		ib_dealloc_device(&nesibdev->ibdev);
+		return NULL;
+	}
+	nesibdev->ibdev.iwcm->add_ref = nes_add_ref;
+	nesibdev->ibdev.iwcm->rem_ref = nes_rem_ref;
+	nesibdev->ibdev.iwcm->get_qp = nes_get_qp;
+	nesibdev->ibdev.iwcm->connect = nes_connect;
+	nesibdev->ibdev.iwcm->accept = nes_accept;
+	nesibdev->ibdev.iwcm->reject = nes_reject;
+	nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
+	nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
+
+	return nesibdev;
+}
+
+
+/**
+ * nes_destroy_ofa_device
+ */
+void nes_destroy_ofa_device(struct nes_ib_device *nesibdev)
+{
+	if (nesibdev == NULL)
+		return;
+
+	nes_unregister_ofa_device(nesibdev);
+
+	kfree(nesibdev->ibdev.iwcm);
+	ib_dealloc_device(&nesibdev->ibdev);
+}
+
+
+/**
+ * nes_register_ofa_device
+ */
+int nes_register_ofa_device(struct nes_ib_device *nesibdev)
+{
+	struct nes_vnic *nesvnic = nesibdev->nesvnic;
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	int i, ret;
+
+	ret = ib_register_device(&nesvnic->nesibdev->ibdev);
+	if (ret) {
+		return ret;
+	}
+
+	/* Get the resources allocated to this device */
+	nesibdev->max_cq = (nesadapter->max_cq-NES_FIRST_QPN) / nesadapter->port_count;
+	nesibdev->max_mr = nesadapter->max_mr / nesadapter->port_count;
+	nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count;
+	nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count;
+
+	for (i = 0; i < ARRAY_SIZE(nes_class_attributes); ++i) {
+		ret = class_device_create_file(&nesibdev->ibdev.class_dev, nes_class_attributes[i]);
+		if (ret) {
+			while (i > 0) {
+				i--;
+				class_device_remove_file(&nesibdev->ibdev.class_dev,
+						nes_class_attributes[i]);
+			}
+			ib_unregister_device(&nesibdev->ibdev);
+			return ret;
+		}
+	}
+
+	nesvnic->of_device_registered = 1;
+
+	return 0;
+}
+
+
+/**
+ * nes_unregister_ofa_device
+ */
+void nes_unregister_ofa_device(struct nes_ib_device *nesibdev)
+{
+	struct nes_vnic *nesvnic = nesibdev->nesvnic;
+	int i;
+
+	if (nesibdev == NULL)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(nes_class_attributes); ++i) {
+		class_device_remove_file(&nesibdev->ibdev.class_dev, nes_class_attributes[i]);
+	}
+
+	if (nesvnic->of_device_registered) {
+		ib_unregister_device(&nesibdev->ibdev);
+	}
+
+	nesvnic->of_device_registered = 0;
+}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
new file mode 100644
index 0000000..6c6b4da
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef NES_VERBS_H
+#define NES_VERBS_H
+
+struct nes_device;
+
+#define NES_MAX_USER_DB_REGIONS  4096
+#define NES_MAX_USER_WQ_REGIONS  4096
+
+struct nes_ucontext {
+	struct ib_ucontext ibucontext;
+	struct nes_device  *nesdev;
+	unsigned long      mmap_wq_offset;
+	unsigned long      mmap_cq_offset; /* to be removed */
+	int                index;		/* rnic index (minor) */
+	unsigned long      allocated_doorbells[BITS_TO_LONGS(NES_MAX_USER_DB_REGIONS)];
+	u16                mmap_db_index[NES_MAX_USER_DB_REGIONS];
+	u16                first_free_db;
+	unsigned long      allocated_wqs[BITS_TO_LONGS(NES_MAX_USER_WQ_REGIONS)];
+	struct nes_qp      *mmap_nesqp[NES_MAX_USER_WQ_REGIONS];
+	u16                first_free_wq;
+	struct list_head   cq_reg_mem_list;
+	struct list_head   qp_reg_mem_list;
+	u32                mcrqf;
+	atomic_t	   usecnt;
+};
+
+struct nes_pd {
+	struct ib_pd ibpd;
+	u16          pd_id;
+	atomic_t     sqp_count;
+	u16          mmap_db_index;
+};
+
+struct nes_mr {
+	union {
+		struct ib_mr  ibmr;
+		struct ib_mw  ibmw;
+		struct ib_fmr ibfmr;
+	};
+	struct ib_umem    *region;
+	u16               pbls_used;
+	u8                mode;
+	u8                pbl_4k;
+};
+
+struct nes_hw_pb {
+	__le32 pa_low;
+	__le32 pa_high;
+};
+
+struct nes_vpbl {
+	dma_addr_t       pbl_pbase;
+	struct nes_hw_pb *pbl_vbase;
+};
+
+struct nes_root_vpbl {
+	dma_addr_t       pbl_pbase;
+	struct nes_hw_pb *pbl_vbase;
+	struct nes_vpbl  *leaf_vpbl;
+};
+
+struct nes_fmr {
+	struct nes_mr        nesmr;
+	u32                  leaf_pbl_cnt;
+	struct nes_root_vpbl root_vpbl;
+	struct ib_qp         *ib_qp;
+	int                  access_rights;
+	struct ib_fmr_attr   attr;
+};
+
+struct nes_av;
+
+struct nes_cq {
+	struct ib_cq     ibcq;
+	struct nes_hw_cq hw_cq;
+	u32              polled_completions;
+	u32              cq_mem_size;
+	spinlock_t       lock;
+	u8               virtual_cq;
+	u8               pad[3];
+};
+
+struct nes_wq {
+	spinlock_t lock;
+};
+
+struct iw_cm_id;
+struct ietf_mpa_frame;
+
+struct nes_qp {
+	struct ib_qp          ibqp;
+	void                  *allocated_buffer;
+	struct iw_cm_id       *cm_id;
+	struct workqueue_struct *wq;
+	struct work_struct    disconn_work;
+	struct nes_cq         *nesscq;
+	struct nes_cq         *nesrcq;
+	struct nes_pd         *nespd;
+	void *cm_node; /* handle of the node this QP is associated with */
+	struct ietf_mpa_frame *ietf_frame;
+	dma_addr_t            ietf_frame_pbase;
+	wait_queue_head_t     state_waitq;
+	unsigned long         socket;
+	struct nes_hw_qp      hwqp;
+	struct work_struct    work;
+	struct work_struct    ae_work;
+	enum ib_qp_state      ibqp_state;
+	u32                   iwarp_state;
+	u32                   hte_index;
+	u32                   last_aeq;
+	u32                   qp_mem_size;
+	atomic_t              refcount;
+	atomic_t              close_timer_started;
+	u32                   mmap_sq_db_index;
+	u32                   mmap_rq_db_index;
+	spinlock_t            lock;
+	struct nes_qp_context *nesqp_context;
+	dma_addr_t            nesqp_context_pbase;
+	void	              *pbl_vbase;
+	dma_addr_t            pbl_pbase;
+	struct page           *page;
+	wait_queue_head_t     kick_waitq;
+	u16                   in_disconnect;
+	u16                   private_data_len;
+	u8                    active_conn;
+	u8                    skip_lsmm;
+	u8                    user_mode;
+	u8                    hte_added;
+	u8                    hw_iwarp_state;
+	u8                    flush_issued;
+	u8                    hw_tcp_state;
+	u8                    disconn_pending;
+	u8                    destroyed;
+};
+#endif			/* NES_VERBS_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index a082466..09f5371 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -680,12 +680,7 @@
 
 		neigh = *to_ipoib_neigh(skb->dst->neighbour);
 
-		if (ipoib_cm_get(neigh)) {
-			if (ipoib_cm_up(neigh)) {
-				ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
-				goto out;
-			}
-		} else if (neigh->ah) {
+		if (neigh->ah)
 			if (unlikely((memcmp(&neigh->dgid.raw,
 					    skb->dst->neighbour->ha + 4,
 					    sizeof(union ib_gid))) ||
@@ -706,6 +701,12 @@
 				goto out;
 			}
 
+		if (ipoib_cm_get(neigh)) {
+			if (ipoib_cm_up(neigh)) {
+				ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
+				goto out;
+			}
+		} else if (neigh->ah) {
 			ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
 			goto out;
 		}
@@ -813,11 +814,9 @@
 	struct ipoib_ah *ah = NULL;
 
 	neigh = *to_ipoib_neigh(n);
-	if (neigh) {
+	if (neigh)
 		priv = netdev_priv(neigh->dev);
-		ipoib_dbg(priv, "neigh_destructor for bonding device: %s\n",
-			  n->dev->name);
-	} else
+	else
 		return;
 	ipoib_dbg(priv,
 		  "neigh_cleanup for %06x " IPOIB_GID_FMT "\n",
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 195ce7c..fd4a49f 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -204,6 +204,22 @@
 	return ret;
 }
 
+static int srp_new_cm_id(struct srp_target_port *target)
+{
+	struct ib_cm_id *new_cm_id;
+
+	new_cm_id = ib_create_cm_id(target->srp_host->dev->dev,
+				    srp_cm_handler, target);
+	if (IS_ERR(new_cm_id))
+		return PTR_ERR(new_cm_id);
+
+	if (target->cm_id)
+		ib_destroy_cm_id(target->cm_id);
+	target->cm_id = new_cm_id;
+
+	return 0;
+}
+
 static int srp_create_target_ib(struct srp_target_port *target)
 {
 	struct ib_qp_init_attr *init_attr;
@@ -436,6 +452,7 @@
 
 static int srp_connect_target(struct srp_target_port *target)
 {
+	int retries = 3;
 	int ret;
 
 	ret = srp_lookup_path(target);
@@ -468,6 +485,21 @@
 		case SRP_DLID_REDIRECT:
 			break;
 
+		case SRP_STALE_CONN:
+			/* Our current CM id was stale, and is now in timewait.
+			 * Try to reconnect with a new one.
+			 */
+			if (!retries-- || srp_new_cm_id(target)) {
+				shost_printk(KERN_ERR, target->scsi_host, PFX
+					     "giving up on stale connection\n");
+				target->status = -ECONNRESET;
+				return target->status;
+			}
+
+			shost_printk(KERN_ERR, target->scsi_host, PFX
+				     "retrying stale connection\n");
+			break;
+
 		default:
 			return target->status;
 		}
@@ -507,7 +539,6 @@
 
 static int srp_reconnect_target(struct srp_target_port *target)
 {
-	struct ib_cm_id *new_cm_id;
 	struct ib_qp_attr qp_attr;
 	struct srp_request *req, *tmp;
 	struct ib_wc wc;
@@ -526,14 +557,9 @@
 	 * Now get a new local CM ID so that we avoid confusing the
 	 * target in case things are really fouled up.
 	 */
-	new_cm_id = ib_create_cm_id(target->srp_host->dev->dev,
-				    srp_cm_handler, target);
-	if (IS_ERR(new_cm_id)) {
-		ret = PTR_ERR(new_cm_id);
+	ret = srp_new_cm_id(target);
+	if (ret)
 		goto err;
-	}
-	ib_destroy_cm_id(target->cm_id);
-	target->cm_id = new_cm_id;
 
 	qp_attr.qp_state = IB_QPS_RESET;
 	ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE);
@@ -1171,6 +1197,11 @@
 		target->status = -ECONNRESET;
 		break;
 
+	case IB_CM_REJ_STALE_CONN:
+		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
+		target->status = SRP_STALE_CONN;
+		break;
+
 	default:
 		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
 			     event->param.rej_rcvd.reason);
@@ -1862,11 +1893,9 @@
 	if (ret)
 		goto err;
 
-	target->cm_id = ib_create_cm_id(host->dev->dev, srp_cm_handler, target);
-	if (IS_ERR(target->cm_id)) {
-		ret = PTR_ERR(target->cm_id);
+	ret = srp_new_cm_id(target);
+	if (ret)
 		goto err_free;
-	}
 
 	target->qp_in_error = 0;
 	ret = srp_connect_target(target);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 4a3c1f3..cb6eb81 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -54,6 +54,7 @@
 
 	SRP_PORT_REDIRECT	= 1,
 	SRP_DLID_REDIRECT	= 2,
+	SRP_STALE_CONN		= 3,
 
 	SRP_MAX_LUN		= 512,
 	SRP_DEF_SG_TABLESIZE	= 12,
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 64c66b3..4a93878 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -137,12 +137,14 @@
 EXPORT_SYMBOL_GPL(led_classdev_register);
 
 /**
- * led_classdev_unregister - unregisters a object of led_properties class.
+ * __led_classdev_unregister - unregisters a object of led_properties class.
  * @led_cdev: the led device to unregister
+ * @suspended: indicates whether system-wide suspend or resume is in progress
  *
  * Unregisters a previously registered via led_classdev_register object.
  */
-void led_classdev_unregister(struct led_classdev *led_cdev)
+void __led_classdev_unregister(struct led_classdev *led_cdev,
+				      bool suspended)
 {
 	device_remove_file(led_cdev->dev, &dev_attr_brightness);
 #ifdef CONFIG_LEDS_TRIGGERS
@@ -153,13 +155,16 @@
 	up_write(&led_cdev->trigger_lock);
 #endif
 
-	device_unregister(led_cdev->dev);
+	if (suspended)
+		device_pm_schedule_removal(led_cdev->dev);
+	else
+		device_unregister(led_cdev->dev);
 
 	down_write(&leds_list_lock);
 	list_del(&led_cdev->node);
 	up_write(&leds_list_lock);
 }
-EXPORT_SYMBOL_GPL(led_classdev_unregister);
+EXPORT_SYMBOL_GPL(__led_classdev_unregister);
 
 static int __init leds_init(void)
 {
diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 01b8eca..6e6dd17 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -111,7 +111,7 @@
 static struct adb_request *current_req; /* first request struct in the queue */
 static struct adb_request *last_req;     /* last request struct in the queue */
 static unsigned char reply_buf[16];        /* storage for autopolled replies */
-static unsigned char *reply_ptr;      /* next byte in req->data or reply_buf */
+static unsigned char *reply_ptr;     /* next byte in reply_buf or req->reply */
 static int reading_reply;        /* store reply in reply_buf else req->reply */
 static int data_index;      /* index of the next byte to send from req->data */
 static int reply_len; /* number of bytes received in reply_buf or req->reply */
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index f234ba3..7d170cd 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -920,8 +920,7 @@
 	---help---
 	  Support for the Microchip EN28J60 ethernet chip.
 
-	  To compile this driver as a module, choose M here and read
-	  <file:Documentation/networking/net-modules.txt>.  The module will be
+	  To compile this driver as a module, choose M here. The module will be
 	  called enc28j60.
 
 config ENC28J60_WRITEVERIFY
@@ -2041,8 +2040,7 @@
          More specific information on configuring the driver is in
          <file:Documentation/networking/e1000.txt>.
 
-         To compile this driver as a module, choose M here and read
-         <file:Documentation/networking/net-modules.txt>.  The module
+         To compile this driver as a module, choose M here. The module
          will be called igb.
 
 source "drivers/net/ixp2000/Kconfig"
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index d48c396..901c824 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -1070,9 +1070,7 @@
  */
 void cxgb_free_mem(void *addr)
 {
-	unsigned long p = (unsigned long)addr;
-
-	if (p >= VMALLOC_START && p < VMALLOC_END)
+	if (is_vmalloc_addr(addr))
 		vfree(addr);
 	else
 		kfree(addr);
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 11b83da..e04bf99 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -262,8 +262,8 @@
 
 static int io[MAX_NUM_DEVS] __initdata = { 0, };
 
-/* Beware! hw[] is also used in cleanup_module(). */
-static struct scc_hardware hw[NUM_TYPES] __initdata_or_module = HARDWARE;
+/* Beware! hw[] is also used in dmascc_exit(). */
+static struct scc_hardware hw[NUM_TYPES] = HARDWARE;
 
 
 /* Global variables */
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 535a446..61dc495 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -617,9 +617,6 @@
 	int err;
 
 #define QUERY_ADAPTER_OUT_SIZE             0x100
-#define QUERY_ADAPTER_VENDOR_ID_OFFSET     0x00
-#define QUERY_ADAPTER_DEVICE_ID_OFFSET     0x04
-#define QUERY_ADAPTER_REVISION_ID_OFFSET   0x08
 #define QUERY_ADAPTER_INTA_PIN_OFFSET      0x10
 #define QUERY_ADAPTER_VSD_OFFSET           0x20
 
@@ -633,9 +630,6 @@
 	if (err)
 		goto out;
 
-	MLX4_GET(adapter->vendor_id, outbox,   QUERY_ADAPTER_VENDOR_ID_OFFSET);
-	MLX4_GET(adapter->device_id, outbox,   QUERY_ADAPTER_DEVICE_ID_OFFSET);
-	MLX4_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
 	MLX4_GET(adapter->inta_pin, outbox,    QUERY_ADAPTER_INTA_PIN_OFFSET);
 
 	get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4,
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index 7e1dd9e..e16dec8 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -99,9 +99,6 @@
 };
 
 struct mlx4_adapter {
-	u32  vendor_id;
-	u32  device_id;
-	u32  revision_id;
 	char board_id[MLX4_BOARD_ID_LEN];
 	u8   inta_pin;
 };
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 89b3f0b..08bfc13 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -71,7 +71,7 @@
 
 #endif /* CONFIG_PCI_MSI */
 
-static const char mlx4_version[] __devinitdata =
+static char mlx4_version[] __devinitdata =
 	DRV_NAME ": Mellanox ConnectX core driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 
@@ -163,7 +163,7 @@
 	return 0;
 }
 
-static int __devinit mlx4_load_fw(struct mlx4_dev *dev)
+static int mlx4_load_fw(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int err;
@@ -197,8 +197,8 @@
 	return err;
 }
 
-static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
-					  int cmpt_entry_sz)
+static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
+				int cmpt_entry_sz)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int err;
@@ -534,7 +534,6 @@
 	}
 
 	priv->eq_table.inta_pin = adapter.inta_pin;
-	dev->rev_id		= adapter.revision_id;
 	memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
 
 	return 0;
@@ -688,7 +687,7 @@
 	return err;
 }
 
-static void __devinit mlx4_enable_msi_x(struct mlx4_dev *dev)
+static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct msix_entry entries[MLX4_NUM_EQ];
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 0c05a10..9c9e308 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -122,7 +122,7 @@
 	spin_unlock(&buddy->lock);
 }
 
-static int __devinit mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
+static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
 {
 	int i, s;
 
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index 36a7ba3..3b78a38 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -230,10 +230,11 @@
 static int tc574_config(struct pcmcia_device *link);
 static void tc574_release(struct pcmcia_device *link);
 
-static void mdio_sync(kio_addr_t ioaddr, int bits);
-static int mdio_read(kio_addr_t ioaddr, int phy_id, int location);
-static void mdio_write(kio_addr_t ioaddr, int phy_id, int location, int value);
-static unsigned short read_eeprom(kio_addr_t ioaddr, int index);
+static void mdio_sync(unsigned int ioaddr, int bits);
+static int mdio_read(unsigned int ioaddr, int phy_id, int location);
+static void mdio_write(unsigned int ioaddr, int phy_id, int location,
+		       int value);
+static unsigned short read_eeprom(unsigned int ioaddr, int index);
 static void tc574_wait_for_completion(struct net_device *dev, int cmd);
 
 static void tc574_reset(struct net_device *dev);
@@ -341,7 +342,7 @@
 	tuple_t tuple;
 	__le16 buf[32];
 	int last_fn, last_ret, i, j;
-	kio_addr_t ioaddr;
+	unsigned int ioaddr;
 	__be16 *phys_addr;
 	char *cardname;
 	__u32 config;
@@ -515,7 +516,7 @@
 
 static void dump_status(struct net_device *dev)
 {
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	EL3WINDOW(1);
 	printk(KERN_INFO "  irq status %04x, rx status %04x, tx status "
 		   "%02x, tx free %04x\n", inw(ioaddr+EL3_STATUS),
@@ -544,7 +545,7 @@
 /* Read a word from the EEPROM using the regular EEPROM access register.
    Assume that we are in register window zero.
  */
-static unsigned short read_eeprom(kio_addr_t ioaddr, int index)
+static unsigned short read_eeprom(unsigned int ioaddr, int index)
 {
 	int timer;
 	outw(EEPROM_Read + index, ioaddr + Wn0EepromCmd);
@@ -572,9 +573,9 @@
 
 /* Generate the preamble required for initial synchronization and
    a few older transceivers. */
-static void mdio_sync(kio_addr_t ioaddr, int bits)
+static void mdio_sync(unsigned int ioaddr, int bits)
 {
-	kio_addr_t mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+	unsigned int mdio_addr = ioaddr + Wn4_PhysicalMgmt;
 
 	/* Establish sync by sending at least 32 logic ones. */
 	while (-- bits >= 0) {
@@ -583,12 +584,12 @@
 	}
 }
 
-static int mdio_read(kio_addr_t ioaddr, int phy_id, int location)
+static int mdio_read(unsigned int ioaddr, int phy_id, int location)
 {
 	int i;
 	int read_cmd = (0xf6 << 10) | (phy_id << 5) | location;
 	unsigned int retval = 0;
-	kio_addr_t mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+	unsigned int mdio_addr = ioaddr + Wn4_PhysicalMgmt;
 
 	if (mii_preamble_required)
 		mdio_sync(ioaddr, 32);
@@ -608,10 +609,10 @@
 	return (retval>>1) & 0xffff;
 }
 
-static void mdio_write(kio_addr_t ioaddr, int phy_id, int location, int value)
+static void mdio_write(unsigned int ioaddr, int phy_id, int location, int value)
 {
 	int write_cmd = 0x50020000 | (phy_id << 23) | (location << 18) | value;
-	kio_addr_t mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+	unsigned int mdio_addr = ioaddr + Wn4_PhysicalMgmt;
 	int i;
 
 	if (mii_preamble_required)
@@ -637,7 +638,7 @@
 {
 	struct el3_private *lp = netdev_priv(dev);
 	int i;
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	unsigned long flags;
 
 	tc574_wait_for_completion(dev, TotalReset|0x10);
@@ -695,7 +696,7 @@
 	mdio_write(ioaddr, lp->phys, 4, lp->advertising);
 	if (!auto_polarity) {
 		/* works for TDK 78Q2120 series MII's */
-		int i = mdio_read(ioaddr, lp->phys, 16) | 0x20;
+		i = mdio_read(ioaddr, lp->phys, 16) | 0x20;
 		mdio_write(ioaddr, lp->phys, 16, i);
 	}
 
@@ -741,7 +742,7 @@
 static void el3_tx_timeout(struct net_device *dev)
 {
 	struct el3_private *lp = netdev_priv(dev);
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	
 	printk(KERN_NOTICE "%s: Transmit timed out!\n", dev->name);
 	dump_status(dev);
@@ -756,7 +757,7 @@
 static void pop_tx_status(struct net_device *dev)
 {
 	struct el3_private *lp = netdev_priv(dev);
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	int i;
     
 	/* Clear the Tx status stack. */
@@ -779,7 +780,7 @@
 
 static int el3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	struct el3_private *lp = netdev_priv(dev);
 	unsigned long flags;
 
@@ -813,7 +814,7 @@
 {
 	struct net_device *dev = (struct net_device *) dev_id;
 	struct el3_private *lp = netdev_priv(dev);
-	kio_addr_t ioaddr;
+	unsigned int ioaddr;
 	unsigned status;
 	int work_budget = max_interrupt_work;
 	int handled = 0;
@@ -907,7 +908,7 @@
 {
 	struct net_device *dev = (struct net_device *) arg;
 	struct el3_private *lp = netdev_priv(dev);
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	unsigned long flags;
 	unsigned short /* cable, */ media, partner;
 
@@ -996,7 +997,7 @@
 static void update_stats(struct net_device *dev)
 {
 	struct el3_private *lp = netdev_priv(dev);
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	u8 rx, tx, up;
 
 	DEBUG(2, "%s: updating the statistics.\n", dev->name);
@@ -1033,7 +1034,7 @@
 static int el3_rx(struct net_device *dev, int worklimit)
 {
 	struct el3_private *lp = netdev_priv(dev);
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	short rx_status;
 	
 	DEBUG(3, "%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n",
@@ -1094,7 +1095,7 @@
 static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
 	struct el3_private *lp = netdev_priv(dev);
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	u16 *data = (u16 *)&rq->ifr_ifru;
 	int phy = lp->phys & 0x1f;
 
@@ -1148,7 +1149,7 @@
 
 static void set_rx_mode(struct net_device *dev)
 {
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 
 	if (dev->flags & IFF_PROMISC)
 		outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast | RxProm,
@@ -1161,7 +1162,7 @@
 
 static int el3_close(struct net_device *dev)
 {
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	struct el3_private *lp = netdev_priv(dev);
 	struct pcmcia_device *link = lp->p_dev;
 
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index e862d14..1b1abb1 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -145,7 +145,7 @@
 static int tc589_config(struct pcmcia_device *link);
 static void tc589_release(struct pcmcia_device *link);
 
-static u16 read_eeprom(kio_addr_t ioaddr, int index);
+static u16 read_eeprom(unsigned int ioaddr, int index);
 static void tc589_reset(struct net_device *dev);
 static void media_check(unsigned long arg);
 static int el3_config(struct net_device *dev, struct ifmap *map);
@@ -254,7 +254,7 @@
     __le16 buf[32];
     __be16 *phys_addr;
     int last_fn, last_ret, i, j, multi = 0, fifo;
-    kio_addr_t ioaddr;
+    unsigned int ioaddr;
     char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"};
     DECLARE_MAC_BUF(mac);
     
@@ -403,7 +403,7 @@
   Read a word from the EEPROM using the regular EEPROM access register.
   Assume that we are in register window zero.
 */
-static u16 read_eeprom(kio_addr_t ioaddr, int index)
+static u16 read_eeprom(unsigned int ioaddr, int index)
 {
     int i;
     outw(EEPROM_READ + index, ioaddr + 10);
@@ -421,7 +421,7 @@
 static void tc589_set_xcvr(struct net_device *dev, int if_port)
 {
     struct el3_private *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     
     EL3WINDOW(0);
     switch (if_port) {
@@ -443,7 +443,7 @@
 
 static void dump_status(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     EL3WINDOW(1);
     printk(KERN_INFO "  irq status %04x, rx status %04x, tx status "
 	   "%02x  tx free %04x\n", inw(ioaddr+EL3_STATUS),
@@ -459,7 +459,7 @@
 /* Reset and restore all of the 3c589 registers. */
 static void tc589_reset(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int i;
     
     EL3WINDOW(0);
@@ -567,7 +567,7 @@
 static void el3_tx_timeout(struct net_device *dev)
 {
     struct el3_private *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     
     printk(KERN_WARNING "%s: Transmit timed out!\n", dev->name);
     dump_status(dev);
@@ -582,7 +582,7 @@
 static void pop_tx_status(struct net_device *dev)
 {
     struct el3_private *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int i;
     
     /* Clear the Tx status stack. */
@@ -604,7 +604,7 @@
 
 static int el3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     struct el3_private *priv = netdev_priv(dev);
     unsigned long flags;
 
@@ -641,7 +641,7 @@
 {
     struct net_device *dev = (struct net_device *) dev_id;
     struct el3_private *lp = netdev_priv(dev);
-    kio_addr_t ioaddr;
+    unsigned int ioaddr;
     __u16 status;
     int i = 0, handled = 1;
     
@@ -727,7 +727,7 @@
 {
     struct net_device *dev = (struct net_device *)(arg);
     struct el3_private *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u16 media, errs;
     unsigned long flags;
 
@@ -828,7 +828,7 @@
 static void update_stats(struct net_device *dev)
 {
     struct el3_private *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     DEBUG(2, "%s: updating the statistics.\n", dev->name);
     /* Turn off statistics updates while reading. */
@@ -855,7 +855,7 @@
 static int el3_rx(struct net_device *dev)
 {
     struct el3_private *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int worklimit = 32;
     short rx_status;
     
@@ -909,7 +909,7 @@
 {
     struct el3_private *lp = netdev_priv(dev);
     struct pcmcia_device *link = lp->p_dev;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u16 opts = SetRxFilter | RxStation | RxBroadcast;
 
     if (!pcmcia_dev_present(link)) return;
@@ -924,7 +924,7 @@
 {
     struct el3_private *lp = netdev_priv(dev);
     struct pcmcia_device *link = lp->p_dev;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     
     DEBUG(1, "%s: shutting down ethercard.\n", dev->name);
 
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 6d342f6..e8a63e4 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -96,8 +96,8 @@
 static void ei_watchdog(u_long arg);
 static void axnet_reset_8390(struct net_device *dev);
 
-static int mdio_read(kio_addr_t addr, int phy_id, int loc);
-static void mdio_write(kio_addr_t addr, int phy_id, int loc, int value);
+static int mdio_read(unsigned int addr, int phy_id, int loc);
+static void mdio_write(unsigned int addr, int phy_id, int loc, int value);
 
 static void get_8390_hdr(struct net_device *,
 			 struct e8390_pkt_hdr *, int);
@@ -203,7 +203,7 @@
 static int get_prom(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int i, j;
 
     /* This is based on drivers/net/ne.c */
@@ -473,7 +473,7 @@
 #define MDIO_MASK		0x0f
 #define MDIO_ENB_IN		0x02
 
-static void mdio_sync(kio_addr_t addr)
+static void mdio_sync(unsigned int addr)
 {
     int bits;
     for (bits = 0; bits < 32; bits++) {
@@ -482,7 +482,7 @@
     }
 }
 
-static int mdio_read(kio_addr_t addr, int phy_id, int loc)
+static int mdio_read(unsigned int addr, int phy_id, int loc)
 {
     u_int cmd = (0xf6<<10)|(phy_id<<5)|loc;
     int i, retval = 0;
@@ -501,7 +501,7 @@
     return (retval>>1) & 0xffff;
 }
 
-static void mdio_write(kio_addr_t addr, int phy_id, int loc, int value)
+static void mdio_write(unsigned int addr, int phy_id, int loc, int value)
 {
     u_int cmd = (0x05<<28)|(phy_id<<23)|(loc<<18)|(1<<17)|value;
     int i;
@@ -575,7 +575,7 @@
 
 static void axnet_reset_8390(struct net_device *dev)
 {
-    kio_addr_t nic_base = dev->base_addr;
+    unsigned int nic_base = dev->base_addr;
     int i;
 
     ei_status.txing = ei_status.dmaing = 0;
@@ -610,8 +610,8 @@
 {
     struct net_device *dev = (struct net_device *)(arg);
     axnet_dev_t *info = PRIV(dev);
-    kio_addr_t nic_base = dev->base_addr;
-    kio_addr_t mii_addr = nic_base + AXNET_MII_EEP;
+    unsigned int nic_base = dev->base_addr;
+    unsigned int mii_addr = nic_base + AXNET_MII_EEP;
     u_short link;
 
     if (!netif_device_present(dev)) goto reschedule;
@@ -681,7 +681,7 @@
 {
     axnet_dev_t *info = PRIV(dev);
     u16 *data = (u16 *)&rq->ifr_ifru;
-    kio_addr_t mii_addr = dev->base_addr + AXNET_MII_EEP;
+    unsigned int mii_addr = dev->base_addr + AXNET_MII_EEP;
     switch (cmd) {
     case SIOCGMIIPHY:
 	data[0] = info->phy_id;
@@ -703,7 +703,7 @@
 			 struct e8390_pkt_hdr *hdr,
 			 int ring_page)
 {
-    kio_addr_t nic_base = dev->base_addr;
+    unsigned int nic_base = dev->base_addr;
 
     outb_p(0, nic_base + EN0_RSARLO);		/* On page boundary */
     outb_p(ring_page, nic_base + EN0_RSARHI);
@@ -721,7 +721,7 @@
 static void block_input(struct net_device *dev, int count,
 			struct sk_buff *skb, int ring_offset)
 {
-    kio_addr_t nic_base = dev->base_addr;
+    unsigned int nic_base = dev->base_addr;
     int xfer_count = count;
     char *buf = skb->data;
 
@@ -744,7 +744,7 @@
 static void block_output(struct net_device *dev, int count,
 			 const u_char *buf, const int start_page)
 {
-    kio_addr_t nic_base = dev->base_addr;
+    unsigned int nic_base = dev->base_addr;
 
 #ifdef PCMCIA_DEBUG
     if (ei_debug > 4)
@@ -991,7 +991,7 @@
  *
  * Opposite of ax_open(). Only used when "ifconfig <devname> down" is done.
  */
-int ax_close(struct net_device *dev)
+static int ax_close(struct net_device *dev)
 {
 	unsigned long flags;
 
@@ -1014,7 +1014,7 @@
  * completed (or failed) - i.e. never posted a Tx related interrupt.
  */
 
-void ei_tx_timeout(struct net_device *dev)
+static void ei_tx_timeout(struct net_device *dev)
 {
 	long e8390_base = dev->base_addr;
 	struct ei_device *ei_local = (struct ei_device *) netdev_priv(dev);
@@ -1087,8 +1087,8 @@
 	
 	ei_local->irqlock = 1;
 
-	send_length = ETH_ZLEN < length ? length : ETH_ZLEN;
-	
+	send_length = max(length, ETH_ZLEN);
+
 	/*
 	 * We have two Tx slots available for use. Find the first free
 	 * slot, and then perform some sanity checks. With two Tx bufs,
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 949c6df..8f328a0 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -298,7 +298,8 @@
 static int mfc_try_io_port(struct pcmcia_device *link)
 {
     int i, ret;
-    static const kio_addr_t serial_base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
+    static const unsigned int serial_base[5] =
+	{ 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
 
     for (i = 0; i < 5; i++) {
 	link->io.BasePort2 = serial_base[i];
@@ -316,7 +317,7 @@
 static int ungermann_try_io_port(struct pcmcia_device *link)
 {
     int ret;
-    kio_addr_t ioaddr;
+    unsigned int ioaddr;
     /*
 	Ungermann-Bass Access/CARD accepts 0x300,0x320,0x340,0x360
 	0x380,0x3c0 only for ioport.
@@ -342,7 +343,7 @@
     cisparse_t parse;
     u_short buf[32];
     int i, last_fn = 0, last_ret = 0, ret;
-    kio_addr_t ioaddr;
+    unsigned int ioaddr;
     cardtype_t cardtype;
     char *card_name = "unknown";
     u_char *node_id;
@@ -610,7 +611,7 @@
     u_char __iomem *base;
     int i, j;
     struct net_device *dev = link->priv;
-    kio_addr_t ioaddr;
+    unsigned int ioaddr;
 
     /* Allocate a small memory window */
     req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
@@ -735,7 +736,7 @@
 {
     struct net_device *dev = dev_id;
     local_info_t *lp = netdev_priv(dev);
-    kio_addr_t ioaddr;
+    unsigned int ioaddr;
     unsigned short tx_stat, rx_stat;
 
     ioaddr = dev->base_addr;
@@ -789,7 +790,7 @@
 static void fjn_tx_timeout(struct net_device *dev)
 {
     struct local_info_t *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     printk(KERN_NOTICE "%s: transmit timed out with status %04x, %s?\n",
 	   dev->name, htons(inw(ioaddr + TX_STATUS)),
@@ -819,7 +820,7 @@
 static int fjn_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
     struct local_info_t *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     short length = skb->len;
     
     if (length < ETH_ZLEN)
@@ -892,7 +893,7 @@
 static void fjn_reset(struct net_device *dev)
 {
     struct local_info_t *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int i;
 
     DEBUG(4, "fjn_reset(%s) called.\n",dev->name);
@@ -971,7 +972,7 @@
 static void fjn_rx(struct net_device *dev)
 {
     struct local_info_t *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int boguscount = 10;	/* 5 -> 10: by agy 19940922 */
 
     DEBUG(4, "%s: in rx_packet(), rx_status %02x.\n",
@@ -1125,7 +1126,7 @@
 {
     struct local_info_t *lp = netdev_priv(dev);
     struct pcmcia_device *link = lp->p_dev;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     DEBUG(4, "fjn_close('%s').\n", dev->name);
 
@@ -1168,7 +1169,7 @@
 
 static void set_rx_mode(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u_char mc_filter[8];		 /* Multicast hash filter */
     u_long flags;
     int i;
@@ -1197,8 +1198,7 @@
 	outb(1, ioaddr + RX_MODE);	/* Ignore almost all multicasts. */
     } else {
 	struct dev_mc_list *mclist;
-	int i;
-	
+
 	memset(mc_filter, 0, sizeof(mc_filter));
 	for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
 	     i++, mclist = mclist->next) {
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index a355a93..cfcbea9 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -518,7 +518,7 @@
 	assuming that during normal operation, the MACE is always in
 	bank 0.
 ---------------------------------------------------------------------------- */
-static int mace_read(mace_private *lp, kio_addr_t ioaddr, int reg)
+static int mace_read(mace_private *lp, unsigned int ioaddr, int reg)
 {
   int data = 0xFF;
   unsigned long flags;
@@ -545,7 +545,8 @@
 	are assuming that during normal operation, the MACE is always in
 	bank 0.
 ---------------------------------------------------------------------------- */
-static void mace_write(mace_private *lp, kio_addr_t ioaddr, int reg, int data)
+static void mace_write(mace_private *lp, unsigned int ioaddr, int reg,
+		       int data)
 {
   unsigned long flags;
 
@@ -567,7 +568,7 @@
 mace_init
 	Resets the MACE chip.
 ---------------------------------------------------------------------------- */
-static int mace_init(mace_private *lp, kio_addr_t ioaddr, char *enet_addr)
+static int mace_init(mace_private *lp, unsigned int ioaddr, char *enet_addr)
 {
   int i;
   int ct = 0;
@@ -657,7 +658,7 @@
   tuple_t tuple;
   u_char buf[64];
   int i, last_ret, last_fn;
-  kio_addr_t ioaddr;
+  unsigned int ioaddr;
   DECLARE_MAC_BUF(mac);
 
   DEBUG(0, "nmclan_config(0x%p)\n", link);
@@ -839,7 +840,7 @@
 ---------------------------------------------------------------------------- */
 static int mace_open(struct net_device *dev)
 {
-  kio_addr_t ioaddr = dev->base_addr;
+  unsigned int ioaddr = dev->base_addr;
   mace_private *lp = netdev_priv(dev);
   struct pcmcia_device *link = lp->p_dev;
 
@@ -862,7 +863,7 @@
 ---------------------------------------------------------------------------- */
 static int mace_close(struct net_device *dev)
 {
-  kio_addr_t ioaddr = dev->base_addr;
+  unsigned int ioaddr = dev->base_addr;
   mace_private *lp = netdev_priv(dev);
   struct pcmcia_device *link = lp->p_dev;
 
@@ -935,7 +936,7 @@
 static int mace_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
   mace_private *lp = netdev_priv(dev);
-  kio_addr_t ioaddr = dev->base_addr;
+  unsigned int ioaddr = dev->base_addr;
 
   netif_stop_queue(dev);
 
@@ -996,7 +997,7 @@
 {
   struct net_device *dev = (struct net_device *) dev_id;
   mace_private *lp = netdev_priv(dev);
-  kio_addr_t ioaddr;
+  unsigned int ioaddr;
   int status;
   int IntrCnt = MACE_MAX_IR_ITERATIONS;
 
@@ -1140,7 +1141,7 @@
 static int mace_rx(struct net_device *dev, unsigned char RxCnt)
 {
   mace_private *lp = netdev_priv(dev);
-  kio_addr_t ioaddr = dev->base_addr;
+  unsigned int ioaddr = dev->base_addr;
   unsigned char rx_framecnt;
   unsigned short rx_status;
 
@@ -1302,7 +1303,7 @@
 	card's SRAM fast enough.  If this happens, something is
 	seriously wrong with the hardware.
 ---------------------------------------------------------------------------- */
-static void update_stats(kio_addr_t ioaddr, struct net_device *dev)
+static void update_stats(unsigned int ioaddr, struct net_device *dev)
 {
   mace_private *lp = netdev_priv(dev);
 
@@ -1448,7 +1449,7 @@
   mace_private *lp = netdev_priv(dev);
   int num_addrs = lp->multicast_num_addrs;
   int *ladrf = lp->multicast_ladrf;
-  kio_addr_t ioaddr = dev->base_addr;
+  unsigned int ioaddr = dev->base_addr;
   int i;
 
   DEBUG(2, "%s: restoring Rx mode to %d addresses.\n",
@@ -1540,7 +1541,7 @@
 
 static void restore_multicast_list(struct net_device *dev)
 {
-  kio_addr_t ioaddr = dev->base_addr;
+  unsigned int ioaddr = dev->base_addr;
   mace_private *lp = netdev_priv(dev);
 
   DEBUG(2, "%s: restoring Rx mode to %d addresses.\n", dev->name,
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 9ba56aa..6323988 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -349,7 +349,7 @@
 static hw_info_t *get_prom(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u_char prom[32];
     int i, j;
 
@@ -425,7 +425,7 @@
 static hw_info_t *get_ax88190(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int i, j;
 
     /* Not much of a test, but the alternatives are messy */
@@ -521,7 +521,7 @@
     int i, last_ret, last_fn, start_pg, stop_pg, cm_offset;
     int has_shmem = 0;
     u_short buf[64];
-    hw_info_t *hw_info;
+    hw_info_t *local_hw_info;
     DECLARE_MAC_BUF(mac);
 
     DEBUG(0, "pcnet_config(0x%p)\n", link);
@@ -590,23 +590,23 @@
 	dev->if_port = 0;
     }
 
-    hw_info = get_hwinfo(link);
-    if (hw_info == NULL)
-	hw_info = get_prom(link);
-    if (hw_info == NULL)
-	hw_info = get_dl10019(link);
-    if (hw_info == NULL)
-	hw_info = get_ax88190(link);
-    if (hw_info == NULL)
-	hw_info = get_hwired(link);
+    local_hw_info = get_hwinfo(link);
+    if (local_hw_info == NULL)
+	local_hw_info = get_prom(link);
+    if (local_hw_info == NULL)
+	local_hw_info = get_dl10019(link);
+    if (local_hw_info == NULL)
+	local_hw_info = get_ax88190(link);
+    if (local_hw_info == NULL)
+	local_hw_info = get_hwired(link);
 
-    if (hw_info == NULL) {
+    if (local_hw_info == NULL) {
 	printk(KERN_NOTICE "pcnet_cs: unable to read hardware net"
 	       " address for io base %#3lx\n", dev->base_addr);
 	goto failed;
     }
 
-    info->flags = hw_info->flags;
+    info->flags = local_hw_info->flags;
     /* Check for user overrides */
     info->flags |= (delay_output) ? DELAY_OUTPUT : 0;
     if ((link->manf_id == MANFID_SOCKET) &&
@@ -756,7 +756,7 @@
 #define MDIO_DATA_READ		0x10
 #define MDIO_MASK		0x0f
 
-static void mdio_sync(kio_addr_t addr)
+static void mdio_sync(unsigned int addr)
 {
     int bits, mask = inb(addr) & MDIO_MASK;
     for (bits = 0; bits < 32; bits++) {
@@ -765,7 +765,7 @@
     }
 }
 
-static int mdio_read(kio_addr_t addr, int phy_id, int loc)
+static int mdio_read(unsigned int addr, int phy_id, int loc)
 {
     u_int cmd = (0x06<<10)|(phy_id<<5)|loc;
     int i, retval = 0, mask = inb(addr) & MDIO_MASK;
@@ -784,7 +784,7 @@
     return (retval>>1) & 0xffff;
 }
 
-static void mdio_write(kio_addr_t addr, int phy_id, int loc, int value)
+static void mdio_write(unsigned int addr, int phy_id, int loc, int value)
 {
     u_int cmd = (0x05<<28)|(phy_id<<23)|(loc<<18)|(1<<17)|value;
     int i, mask = inb(addr) & MDIO_MASK;
@@ -818,10 +818,10 @@
 
 #define DL19FDUPLX	0x0400	/* DL10019 Full duplex mode */
 
-static int read_eeprom(kio_addr_t ioaddr, int location)
+static int read_eeprom(unsigned int ioaddr, int location)
 {
     int i, retval = 0;
-    kio_addr_t ee_addr = ioaddr + DLINK_EEPROM;
+    unsigned int ee_addr = ioaddr + DLINK_EEPROM;
     int read_cmd = location | (EE_READ_CMD << 8);
 
     outb(0, ee_addr);
@@ -852,10 +852,10 @@
     In ASIC mode, EE_ADOT is used to output the data to the ASIC.
 */
 
-static void write_asic(kio_addr_t ioaddr, int location, short asic_data)
+static void write_asic(unsigned int ioaddr, int location, short asic_data)
 {
 	int i;
-	kio_addr_t ee_addr = ioaddr + DLINK_EEPROM;
+	unsigned int ee_addr = ioaddr + DLINK_EEPROM;
 	short dataval;
 	int read_cmd = location | (EE_READ_CMD << 8);
 
@@ -897,7 +897,7 @@
 
 static void set_misc_reg(struct net_device *dev)
 {
-    kio_addr_t nic_base = dev->base_addr;
+    unsigned int nic_base = dev->base_addr;
     pcnet_dev_t *info = PRIV(dev);
     u_char tmp;
 
@@ -936,7 +936,7 @@
 static void mii_phy_probe(struct net_device *dev)
 {
     pcnet_dev_t *info = PRIV(dev);
-    kio_addr_t mii_addr = dev->base_addr + DLINK_GPIO;
+    unsigned int mii_addr = dev->base_addr + DLINK_GPIO;
     int i;
     u_int tmp, phyid;
 
@@ -1014,7 +1014,7 @@
 
 static void pcnet_reset_8390(struct net_device *dev)
 {
-    kio_addr_t nic_base = dev->base_addr;
+    unsigned int nic_base = dev->base_addr;
     int i;
 
     ei_status.txing = ei_status.dmaing = 0;
@@ -1074,8 +1074,8 @@
 {
     struct net_device *dev = (struct net_device *)arg;
     pcnet_dev_t *info = PRIV(dev);
-    kio_addr_t nic_base = dev->base_addr;
-    kio_addr_t mii_addr = nic_base + DLINK_GPIO;
+    unsigned int nic_base = dev->base_addr;
+    unsigned int mii_addr = nic_base + DLINK_GPIO;
     u_short link;
 
     if (!netif_device_present(dev)) goto reschedule;
@@ -1177,7 +1177,7 @@
 {
     pcnet_dev_t *info = PRIV(dev);
     u16 *data = (u16 *)&rq->ifr_ifru;
-    kio_addr_t mii_addr = dev->base_addr + DLINK_GPIO;
+    unsigned int mii_addr = dev->base_addr + DLINK_GPIO;
     switch (cmd) {
     case SIOCGMIIPHY:
 	data[0] = info->phy_id;
@@ -1199,7 +1199,7 @@
 			     struct e8390_pkt_hdr *hdr,
 			     int ring_page)
 {
-    kio_addr_t nic_base = dev->base_addr;
+    unsigned int nic_base = dev->base_addr;
 
     if (ei_status.dmaing) {
 	printk(KERN_NOTICE "%s: DMAing conflict in dma_block_input."
@@ -1230,7 +1230,7 @@
 static void dma_block_input(struct net_device *dev, int count,
 			    struct sk_buff *skb, int ring_offset)
 {
-    kio_addr_t nic_base = dev->base_addr;
+    unsigned int nic_base = dev->base_addr;
     int xfer_count = count;
     char *buf = skb->data;
 
@@ -1285,7 +1285,7 @@
 static void dma_block_output(struct net_device *dev, int count,
 			     const u_char *buf, const int start_page)
 {
-    kio_addr_t nic_base = dev->base_addr;
+    unsigned int nic_base = dev->base_addr;
     pcnet_dev_t *info = PRIV(dev);
 #ifdef PCMCIA_DEBUG
     int retries = 0;
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index c9868e9..f18eca9 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -295,7 +295,7 @@
 static void smc_set_xcvr(struct net_device *dev, int if_port);
 static void smc_reset(struct net_device *dev);
 static void media_check(u_long arg);
-static void mdio_sync(kio_addr_t addr);
+static void mdio_sync(unsigned int addr);
 static int mdio_read(struct net_device *dev, int phy_id, int loc);
 static void mdio_write(struct net_device *dev, int phy_id, int loc, int value);
 static int smc_link_ok(struct net_device *dev);
@@ -601,8 +601,8 @@
 {
     struct net_device *dev = link->priv;
     struct smc_private *smc = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
-    kio_addr_t iouart = link->io.BasePort2;
+    unsigned int ioaddr = dev->base_addr;
+    unsigned int iouart = link->io.BasePort2;
 
     /* Set UART base address and force map with COR bit 1 */
     writeb(iouart & 0xff,        smc->base + MOT_UART + CISREG_IOBASE_0);
@@ -621,7 +621,7 @@
 static int mot_setup(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int i, wait, loop;
     u_int addr;
 
@@ -754,7 +754,7 @@
 static int osi_config(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    static const kio_addr_t com[4] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
+    static const unsigned int com[4] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
     int i, j;
 
     link->conf.Attributes |= CONF_ENABLE_SPKR;
@@ -900,7 +900,7 @@
 static int check_sig(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int width;
     u_short s;
 
@@ -960,7 +960,7 @@
     struct smc_private *smc = netdev_priv(dev);
     char *name;
     int i, j, rev;
-    kio_addr_t ioaddr;
+    unsigned int ioaddr;
     u_long mir;
     DECLARE_MAC_BUF(mac);
 
@@ -1136,7 +1136,7 @@
 #define MDIO_DATA_WRITE1	(MDIO_DIR_WRITE | MDIO_DATA_OUT)
 #define MDIO_DATA_READ		0x02
 
-static void mdio_sync(kio_addr_t addr)
+static void mdio_sync(unsigned int addr)
 {
     int bits;
     for (bits = 0; bits < 32; bits++) {
@@ -1147,7 +1147,7 @@
 
 static int mdio_read(struct net_device *dev, int phy_id, int loc)
 {
-    kio_addr_t addr = dev->base_addr + MGMT;
+    unsigned int addr = dev->base_addr + MGMT;
     u_int cmd = (0x06<<10)|(phy_id<<5)|loc;
     int i, retval = 0;
 
@@ -1167,7 +1167,7 @@
 
 static void mdio_write(struct net_device *dev, int phy_id, int loc, int value)
 {
-    kio_addr_t addr = dev->base_addr + MGMT;
+    unsigned int addr = dev->base_addr + MGMT;
     u_int cmd = (0x05<<28)|(phy_id<<23)|(loc<<18)|(1<<17)|value;
     int i;
 
@@ -1193,7 +1193,7 @@
 #ifdef PCMCIA_DEBUG
 static void smc_dump(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u_short i, w, save;
     save = inw(ioaddr + BANK_SELECT);
     for (w = 0; w < 4; w++) {
@@ -1248,7 +1248,7 @@
 {
     struct smc_private *smc = netdev_priv(dev);
     struct pcmcia_device *link = smc->p_dev;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     DEBUG(0, "%s: smc_close(), status %4.4x.\n",
 	  dev->name, inw(ioaddr + BANK_SELECT));
@@ -1285,7 +1285,7 @@
 {
     struct smc_private *smc = netdev_priv(dev);
     struct sk_buff *skb = smc->saved_skb;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u_char packet_no;
 
     if (!skb) {
@@ -1349,7 +1349,7 @@
 static void smc_tx_timeout(struct net_device *dev)
 {
     struct smc_private *smc = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     printk(KERN_NOTICE "%s: SMC91c92 transmit timed out, "
 	   "Tx_status %2.2x status %4.4x.\n",
@@ -1364,7 +1364,7 @@
 static int smc_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
     struct smc_private *smc = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u_short num_pages;
     short time_out, ir;
     unsigned long flags;
@@ -1434,7 +1434,7 @@
 static void smc_tx_err(struct net_device * dev)
 {
     struct smc_private *smc = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int saved_packet = inw(ioaddr + PNR_ARR) & 0xff;
     int packet_no = inw(ioaddr + FIFO_PORTS) & 0x7f;
     int tx_status;
@@ -1478,7 +1478,7 @@
 static void smc_eph_irq(struct net_device *dev)
 {
     struct smc_private *smc = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u_short card_stats, ephs;
 
     SMC_SELECT_BANK(0);
@@ -1513,7 +1513,7 @@
 {
     struct net_device *dev = dev_id;
     struct smc_private *smc = netdev_priv(dev);
-    kio_addr_t ioaddr;
+    unsigned int ioaddr;
     u_short saved_bank, saved_pointer, mask, status;
     unsigned int handled = 1;
     char bogus_cnt = INTR_WORK;		/* Work we are willing to do. */
@@ -1633,7 +1633,7 @@
 static void smc_rx(struct net_device *dev)
 {
     struct smc_private *smc = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int rx_status;
     int packet_length;	/* Caution: not frame length, rather words
 			   to transfer from the chip. */
@@ -1738,7 +1738,7 @@
 
 static void set_rx_mode(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     struct smc_private *smc = netdev_priv(dev);
     u_int multicast_table[ 2 ] = { 0, };
     unsigned long flags;
@@ -1804,7 +1804,7 @@
 static void smc_set_xcvr(struct net_device *dev, int if_port)
 {
     struct smc_private *smc = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u_short saved_bank;
 
     saved_bank = inw(ioaddr + BANK_SELECT);
@@ -1827,7 +1827,7 @@
 
 static void smc_reset(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     struct smc_private *smc = netdev_priv(dev);
     int i;
 
@@ -1904,7 +1904,7 @@
 {
     struct net_device *dev = (struct net_device *) arg;
     struct smc_private *smc = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u_short i, media, saved_bank;
     u_short link;
     unsigned long flags;
@@ -2021,7 +2021,7 @@
 
 static int smc_link_ok(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     struct smc_private *smc = netdev_priv(dev);
 
     if (smc->cfg & CFG_MII_SELECT) {
@@ -2035,7 +2035,7 @@
 static int smc_netdev_get_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd)
 {
     u16 tmp;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     ecmd->supported = (SUPPORTED_TP | SUPPORTED_AUI |
 	SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full);
@@ -2057,7 +2057,7 @@
 static int smc_netdev_set_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd)
 {
     u16 tmp;
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     if (ecmd->speed != SPEED_10)
     	return -EINVAL;
@@ -2100,7 +2100,7 @@
 static int smc_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 {
 	struct smc_private *smc = netdev_priv(dev);
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	u16 saved_bank = inw(ioaddr + BANK_SELECT);
 	int ret;
 
@@ -2118,7 +2118,7 @@
 static int smc_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 {
 	struct smc_private *smc = netdev_priv(dev);
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	u16 saved_bank = inw(ioaddr + BANK_SELECT);
 	int ret;
 
@@ -2136,7 +2136,7 @@
 static u32 smc_get_link(struct net_device *dev)
 {
 	struct smc_private *smc = netdev_priv(dev);
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 	u16 saved_bank = inw(ioaddr + BANK_SELECT);
 	u32 ret;
 
@@ -2164,7 +2164,7 @@
 {
 	struct smc_private *smc = netdev_priv(dev);
 	if (smc->cfg & CFG_MII_SELECT) {
-		kio_addr_t ioaddr = dev->base_addr;
+		unsigned int ioaddr = dev->base_addr;
 		u16 saved_bank = inw(ioaddr + BANK_SELECT);
 		int res;
 
@@ -2196,7 +2196,7 @@
 	struct mii_ioctl_data *mii = if_mii(rq);
 	int rc = 0;
 	u16 saved_bank;
-	kio_addr_t ioaddr = dev->base_addr;
+	unsigned int ioaddr = dev->base_addr;
 
 	if (!netif_running(dev))
 		return -EINVAL;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 1f09bea..d041f83 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -273,12 +273,12 @@
 static unsigned maxrx_bytes = 22000;
 
 /* MII management prototypes */
-static void mii_idle(kio_addr_t ioaddr);
-static void mii_putbit(kio_addr_t ioaddr, unsigned data);
-static int  mii_getbit(kio_addr_t ioaddr);
-static void mii_wbits(kio_addr_t ioaddr, unsigned data, int len);
-static unsigned mii_rd(kio_addr_t ioaddr, u_char phyaddr, u_char phyreg);
-static void mii_wr(kio_addr_t ioaddr, u_char phyaddr, u_char phyreg,
+static void mii_idle(unsigned int ioaddr);
+static void mii_putbit(unsigned int ioaddr, unsigned data);
+static int  mii_getbit(unsigned int ioaddr);
+static void mii_wbits(unsigned int ioaddr, unsigned data, int len);
+static unsigned mii_rd(unsigned int ioaddr, u_char phyaddr, u_char phyreg);
+static void mii_wr(unsigned int ioaddr, u_char phyaddr, u_char phyreg,
 		   unsigned data, int len);
 
 /*
@@ -403,7 +403,7 @@
 static void
 PrintRegisters(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     if (pc_debug > 1) {
 	int i, page;
@@ -439,7 +439,7 @@
  * Turn around for read
  */
 static void
-mii_idle(kio_addr_t ioaddr)
+mii_idle(unsigned int ioaddr)
 {
     PutByte(XIRCREG2_GPR2, 0x04|0); /* drive MDCK low */
     udelay(1);
@@ -451,7 +451,7 @@
  * Write a bit to MDI/O
  */
 static void
-mii_putbit(kio_addr_t ioaddr, unsigned data)
+mii_putbit(unsigned int ioaddr, unsigned data)
 {
   #if 1
     if (data) {
@@ -484,7 +484,7 @@
  * Get a bit from MDI/O
  */
 static int
-mii_getbit(kio_addr_t ioaddr)
+mii_getbit(unsigned int ioaddr)
 {
     unsigned d;
 
@@ -497,7 +497,7 @@
 }
 
 static void
-mii_wbits(kio_addr_t ioaddr, unsigned data, int len)
+mii_wbits(unsigned int ioaddr, unsigned data, int len)
 {
     unsigned m = 1 << (len-1);
     for (; m; m >>= 1)
@@ -505,7 +505,7 @@
 }
 
 static unsigned
-mii_rd(kio_addr_t ioaddr,	u_char phyaddr, u_char phyreg)
+mii_rd(unsigned int ioaddr,	u_char phyaddr, u_char phyreg)
 {
     int i;
     unsigned data=0, m;
@@ -527,7 +527,8 @@
 }
 
 static void
-mii_wr(kio_addr_t ioaddr, u_char phyaddr, u_char phyreg, unsigned data, int len)
+mii_wr(unsigned int ioaddr, u_char phyaddr, u_char phyreg, unsigned data,
+       int len)
 {
     int i;
 
@@ -726,7 +727,7 @@
     local_info_t *local = netdev_priv(dev);
     tuple_t tuple;
     cisparse_t parse;
-    kio_addr_t ioaddr;
+    unsigned int ioaddr;
     int err, i;
     u_char buf[64];
     cistpl_lan_node_id_t *node_id = (cistpl_lan_node_id_t*)parse.funce.data;
@@ -1104,7 +1105,7 @@
 {
     struct net_device *dev = (struct net_device *)dev_id;
     local_info_t *lp = netdev_priv(dev);
-    kio_addr_t ioaddr;
+    unsigned int ioaddr;
     u_char saved_page;
     unsigned bytes_rcvd;
     unsigned int_status, eth_status, rx_status, tx_status;
@@ -1209,7 +1210,7 @@
 		    unsigned i;
 		    u_long *p = skb_put(skb, pktlen);
 		    register u_long a;
-		    kio_addr_t edpreg = ioaddr+XIRCREG_EDP-2;
+		    unsigned int edpreg = ioaddr+XIRCREG_EDP-2;
 		    for (i=0; i < len ; i += 4, p++) {
 			a = inl(edpreg);
 			__asm__("rorl $16,%0\n\t"
@@ -1346,7 +1347,7 @@
 do_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
     local_info_t *lp = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     int okay;
     unsigned freespace;
     unsigned pktlen = skb->len;
@@ -1415,7 +1416,7 @@
 static void
 set_addresses(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     local_info_t *lp = netdev_priv(dev);
     struct dev_mc_list *dmi = dev->mc_list;
     unsigned char *addr;
@@ -1459,7 +1460,7 @@
 static void
 set_multicast_list(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     SelectPage(0x42);
     if (dev->flags & IFF_PROMISC) { /* snoop */
@@ -1543,7 +1544,7 @@
 do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
     local_info_t *local = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     u16 *data = (u16 *)&rq->ifr_ifru;
 
     DEBUG(1, "%s: ioctl(%-.6s, %#04x) %04x %04x %04x %04x\n",
@@ -1575,7 +1576,7 @@
 hardreset(struct net_device *dev)
 {
     local_info_t *local = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     SelectPage(4);
     udelay(1);
@@ -1592,7 +1593,7 @@
 do_reset(struct net_device *dev, int full)
 {
     local_info_t *local = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     unsigned value;
 
     DEBUG(0, "%s: do_reset(%p,%d)\n", dev? dev->name:"eth?", dev, full);
@@ -1753,7 +1754,7 @@
 init_mii(struct net_device *dev)
 {
     local_info_t *local = netdev_priv(dev);
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     unsigned control, status, linkpartner;
     int i;
 
@@ -1826,7 +1827,7 @@
 do_powerdown(struct net_device *dev)
 {
 
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
 
     DEBUG(0, "do_powerdown(%p)\n", dev);
 
@@ -1838,7 +1839,7 @@
 static int
 do_stop(struct net_device *dev)
 {
-    kio_addr_t ioaddr = dev->base_addr;
+    unsigned int ioaddr = dev->base_addr;
     local_info_t *lp = netdev_priv(dev);
     struct pcmcia_device *link = lp->p_dev;
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 46339f6..038c1ef 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -529,9 +529,13 @@
 
 	if (ifr->ifr_flags & IFF_NO_PI)
 		tun->flags |= TUN_NO_PI;
+	else
+		tun->flags &= ~TUN_NO_PI;
 
 	if (ifr->ifr_flags & IFF_ONE_QUEUE)
 		tun->flags |= TUN_ONE_QUEUE;
+	else
+		tun->flags &= ~TUN_ONE_QUEUE;
 
 	file->private_data = tun;
 	tun->attached = 1;
diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h
index 32a24f5..08a011f 100644
--- a/drivers/net/wireless/b43/b43.h
+++ b/drivers/net/wireless/b43/b43.h
@@ -724,6 +724,7 @@
 	bool short_preamble;	/* TRUE, if short preamble is enabled. */
 	bool short_slot;	/* TRUE, if short slot timing is enabled. */
 	bool radio_hw_enable;	/* saved state of radio hardware enabled state */
+	bool suspend_in_progress;	/* TRUE, if we are in a suspend/resume cycle */
 
 	/* PHY/Radio device. */
 	struct b43_phy phy;
diff --git a/drivers/net/wireless/b43/leds.c b/drivers/net/wireless/b43/leds.c
index 4b590d8..0908335 100644
--- a/drivers/net/wireless/b43/leds.c
+++ b/drivers/net/wireless/b43/leds.c
@@ -116,7 +116,10 @@
 {
 	if (!led->dev)
 		return;
-	led_classdev_unregister(&led->led_dev);
+	if (led->dev->suspend_in_progress)
+		led_classdev_unregister_suspended(&led->led_dev);
+	else
+		led_classdev_unregister(&led->led_dev);
 	b43_led_turn_off(led->dev, led->index, led->activelow);
 	led->dev = NULL;
 }
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 64c154d..ef65c41 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -38,6 +38,7 @@
 #include <linux/wireless.h>
 #include <linux/workqueue.h>
 #include <linux/skbuff.h>
+#include <linux/io.h>
 #include <linux/dma-mapping.h>
 #include <asm/unaligned.h>
 
@@ -2554,10 +2555,10 @@
 	return (sizeof(u16));
 }
 
-static void b43_rng_exit(struct b43_wl *wl)
+static void b43_rng_exit(struct b43_wl *wl, bool suspended)
 {
 	if (wl->rng_initialized)
-		hwrng_unregister(&wl->rng);
+		__hwrng_unregister(&wl->rng, suspended);
 }
 
 static int b43_rng_init(struct b43_wl *wl)
@@ -3417,8 +3418,10 @@
 	macctl |= B43_MACCTL_PSM_JMP0;
 	b43_write32(dev, B43_MMIO_MACCTL, macctl);
 
-	b43_leds_exit(dev);
-	b43_rng_exit(dev->wl);
+	if (!dev->suspend_in_progress) {
+		b43_leds_exit(dev);
+		b43_rng_exit(dev->wl, false);
+	}
 	b43_dma_free(dev);
 	b43_chip_exit(dev);
 	b43_radio_turn_off(dev, 1);
@@ -3534,11 +3537,13 @@
 	ssb_bus_powerup(bus, 1);	/* Enable dynamic PCTL */
 	b43_upload_card_macaddress(dev);
 	b43_security_init(dev);
-	b43_rng_init(wl);
+	if (!dev->suspend_in_progress)
+		b43_rng_init(wl);
 
 	b43_set_status(dev, B43_STAT_INITIALIZED);
 
-	b43_leds_init(dev);
+	if (!dev->suspend_in_progress)
+		b43_leds_init(dev);
 out:
 	return err;
 
@@ -4135,6 +4140,7 @@
 	b43dbg(wl, "Suspending...\n");
 
 	mutex_lock(&wl->mutex);
+	wldev->suspend_in_progress = true;
 	wldev->suspend_init_status = b43_status(wldev);
 	if (wldev->suspend_init_status >= B43_STAT_STARTED)
 		b43_wireless_core_stop(wldev);
@@ -4166,15 +4172,17 @@
 	if (wldev->suspend_init_status >= B43_STAT_STARTED) {
 		err = b43_wireless_core_start(wldev);
 		if (err) {
+			b43_leds_exit(wldev);
+			b43_rng_exit(wldev->wl, true);
 			b43_wireless_core_exit(wldev);
 			b43err(wl, "Resume failed at core start\n");
 			goto out;
 		}
 	}
-	mutex_unlock(&wl->mutex);
-
 	b43dbg(wl, "Device resumed.\n");
-      out:
+ out:
+	wldev->suspend_in_progress = false;
+	mutex_unlock(&wl->mutex);
 	return err;
 }
 
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index 2ab107f..5bf9e00 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -162,7 +162,7 @@
 #include <linux/firmware.h>
 #include <linux/acpi.h>
 #include <linux/ctype.h>
-#include <linux/latency.h>
+#include <linux/pm_qos_params.h>
 
 #include "ipw2100.h"
 
@@ -1701,7 +1701,7 @@
 	/* the ipw2100 hardware really doesn't want power management delays
 	 * longer than 175usec
 	 */
-	modify_acceptable_latency("ipw2100", 175);
+	pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100", 175);
 
 	/* If the interrupt is enabled, turn it off... */
 	spin_lock_irqsave(&priv->low_lock, flags);
@@ -1856,7 +1856,8 @@
 	ipw2100_disable_interrupts(priv);
 	spin_unlock_irqrestore(&priv->low_lock, flags);
 
-	modify_acceptable_latency("ipw2100", INFINITE_LATENCY);
+	pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100",
+			PM_QOS_DEFAULT_VALUE);
 
 	/* We have to signal any supplicant if we are disassociating */
 	if (associated)
@@ -6554,7 +6555,8 @@
 	if (ret)
 		goto out;
 
-	set_acceptable_latency("ipw2100", INFINITE_LATENCY);
+	pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100",
+			PM_QOS_DEFAULT_VALUE);
 #ifdef CONFIG_IPW2100_DEBUG
 	ipw2100_debug_level = debug;
 	ret = driver_create_file(&ipw2100_pci_driver.driver,
@@ -6576,7 +6578,7 @@
 			   &driver_attr_debug_level);
 #endif
 	pci_unregister_driver(&ipw2100_pci_driver);
-	remove_acceptable_latency("ipw2100");
+	pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100");
 }
 
 module_init(ipw2100_init);
diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c
index d2fa079..f479c1a 100644
--- a/drivers/net/wireless/netwave_cs.c
+++ b/drivers/net/wireless/netwave_cs.c
@@ -195,7 +195,7 @@
 static void netwave_detach(struct pcmcia_device *p_dev);    /* Destroy instance */
 
 /* Hardware configuration */
-static void netwave_doreset(kio_addr_t iobase, u_char __iomem *ramBase);
+static void netwave_doreset(unsigned int iobase, u_char __iomem *ramBase);
 static void netwave_reset(struct net_device *dev);
 
 /* Misc device stuff */
@@ -309,7 +309,7 @@
 }
 
 static void netwave_snapshot(netwave_private *priv, u_char __iomem *ramBase, 
-			     kio_addr_t iobase) {
+			     unsigned int iobase) {
     u_short resultBuffer;
 
     /* if time since last snapshot is > 1 sec. (100 jiffies?)  then take 
@@ -340,7 +340,7 @@
 static struct iw_statistics *netwave_get_wireless_stats(struct net_device *dev)
 {	
     unsigned long flags;
-    kio_addr_t iobase = dev->base_addr;
+    unsigned int iobase = dev->base_addr;
     netwave_private *priv = netdev_priv(dev);
     u_char __iomem *ramBase = priv->ramBase;
     struct iw_statistics* wstats;
@@ -471,7 +471,7 @@
 			    char *extra)
 {
 	unsigned long flags;
-	kio_addr_t iobase = dev->base_addr;
+	unsigned int iobase = dev->base_addr;
 	netwave_private *priv = netdev_priv(dev);
 	u_char __iomem *ramBase = priv->ramBase;
 
@@ -518,7 +518,7 @@
 				char *key)
 {
 	unsigned long flags;
-	kio_addr_t iobase = dev->base_addr;
+	unsigned int iobase = dev->base_addr;
 	netwave_private *priv = netdev_priv(dev);
 	u_char __iomem *ramBase = priv->ramBase;
 
@@ -621,7 +621,7 @@
 			    char *extra)
 {
 	unsigned long flags;
-	kio_addr_t iobase = dev->base_addr;
+	unsigned int iobase = dev->base_addr;
 	netwave_private *priv = netdev_priv(dev);
 	u_char __iomem *ramBase = priv->ramBase;
 
@@ -874,7 +874,7 @@
  *
  *    Proper hardware reset of the card.
  */
-static void netwave_doreset(kio_addr_t ioBase, u_char __iomem *ramBase)
+static void netwave_doreset(unsigned int ioBase, u_char __iomem *ramBase)
 {
     /* Reset card */
     wait_WOC(ioBase);
@@ -892,7 +892,7 @@
     /* u_char state; */
     netwave_private *priv = netdev_priv(dev);
     u_char __iomem *ramBase = priv->ramBase;
-    kio_addr_t iobase = dev->base_addr;
+    unsigned int iobase = dev->base_addr;
 
     DEBUG(0, "netwave_reset: Done with hardware reset\n");
 
@@ -973,7 +973,7 @@
 	
     netwave_private *priv = netdev_priv(dev);
     u_char __iomem * ramBase = priv->ramBase;
-    kio_addr_t iobase = dev->base_addr;
+    unsigned int iobase = dev->base_addr;
 
     /* Disable interrupts & save flags */
     spin_lock_irqsave(&priv->spinlock, flags);
@@ -1065,7 +1065,7 @@
  */
 static irqreturn_t netwave_interrupt(int irq, void* dev_id)
 {
-    kio_addr_t iobase;
+    unsigned int iobase;
     u_char __iomem *ramBase;
     struct net_device *dev = (struct net_device *)dev_id;
     struct netwave_private *priv = netdev_priv(dev);
@@ -1235,7 +1235,7 @@
 {
     netwave_private *priv = netdev_priv(dev);
     u_char __iomem *ramBase = priv->ramBase;
-    kio_addr_t iobase = dev->base_addr;
+    unsigned int iobase = dev->base_addr;
     u_char rxStatus;
     struct sk_buff *skb = NULL;
     unsigned int curBuffer,
@@ -1388,7 +1388,7 @@
  */
 static void set_multicast_list(struct net_device *dev)
 {
-    kio_addr_t iobase = dev->base_addr;
+    unsigned int iobase = dev->base_addr;
     netwave_private *priv = netdev_priv(dev);
     u_char __iomem * ramBase = priv->ramBase;
     u_char  rcvMode = 0;
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index c2037b2..06eea6a 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -149,7 +149,7 @@
   net_local *lp = netdev_priv(dev);
   u_char __iomem *ptr = lp->mem + PSA_ADDR + (o << 1);
   int		count = 0;
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
   /* As there seem to have no flag PSA_BUSY as in the ISA model, we are
    * oblige to verify this address to know when the PSA is ready... */
   volatile u_char __iomem *verify = lp->mem + PSA_ADDR +
@@ -708,7 +708,7 @@
 /* Perform a handover to a new WavePoint */
 static void wv_roam_handover(wavepoint_history *wavepoint, net_local *lp)
 {
-  kio_addr_t		base = lp->dev->base_addr;
+  unsigned int		base = lp->dev->base_addr;
   mm_t                  m;
   unsigned long         flags;
 
@@ -821,7 +821,7 @@
 	     int	cmd,
 	     int	result)
 {
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
   int		status;
   int		wait_completed;
   long		spin;
@@ -945,7 +945,7 @@
 	     char *	buf,
 	     int	len)
 {
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
   int		ring_ptr = addr;
   int		chunk_len;
   char *	buf_ptr = buf;
@@ -1096,7 +1096,7 @@
 static void
 wv_mmc_show(struct net_device *	dev)
 {
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
   net_local *	lp = netdev_priv(dev);
   mmr_t		m;
 
@@ -1275,7 +1275,7 @@
 static inline void
 wv_init_info(struct net_device *	dev)
 {
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
   psa_t		psa;
   DECLARE_MAC_BUF(mac);
 
@@ -1294,7 +1294,7 @@
 
 #ifdef DEBUG_BASIC_SHOW
   /* Now, let's go for the basic stuff */
-  printk(KERN_NOTICE "%s: WaveLAN: port %#lx, irq %d, "
+  printk(KERN_NOTICE "%s: WaveLAN: port %#x, irq %d, "
 	 "hw_addr %s",
 	 dev->name, base, dev->irq,
 	 print_mac(mac, dev->dev_addr));
@@ -1828,7 +1828,7 @@
 			    union iwreq_data *wrqu,
 			    char *extra)
 {
-	kio_addr_t base = dev->base_addr;
+	unsigned int base = dev->base_addr;
 	net_local *lp = netdev_priv(dev);
 	psa_t psa;
 	mm_t m;
@@ -1918,7 +1918,7 @@
 			    union iwreq_data *wrqu,
 			    char *extra)
 {
-	kio_addr_t base = dev->base_addr;
+	unsigned int base = dev->base_addr;
 	net_local *lp = netdev_priv(dev);
 	unsigned long flags;
 	int ret;
@@ -1948,7 +1948,7 @@
 			    union iwreq_data *wrqu,
 			    char *extra)
 {
-	kio_addr_t base = dev->base_addr;
+	unsigned int base = dev->base_addr;
 	net_local *lp = netdev_priv(dev);
 	psa_t psa;
 	unsigned long flags;
@@ -1994,7 +1994,7 @@
 			    union iwreq_data *wrqu,
 			    char *extra)
 {
-	kio_addr_t base = dev->base_addr;
+	unsigned int base = dev->base_addr;
 	net_local *lp = netdev_priv(dev);
 	psa_t psa;
 	unsigned long flags;
@@ -2060,7 +2060,7 @@
 			      union iwreq_data *wrqu,
 			      char *extra)
 {
-	kio_addr_t base = dev->base_addr;
+	unsigned int base = dev->base_addr;
 	net_local *lp = netdev_priv(dev);
 	unsigned long flags;
 	psa_t psa;
@@ -2130,7 +2130,7 @@
 			      union iwreq_data *wrqu,
 			      char *extra)
 {
-	kio_addr_t base = dev->base_addr;
+	unsigned int base = dev->base_addr;
 	net_local *lp = netdev_priv(dev);
 	psa_t psa;
 	unsigned long flags;
@@ -2349,7 +2349,7 @@
 			     union iwreq_data *wrqu,
 			     char *extra)
 {
-	kio_addr_t base = dev->base_addr;
+	unsigned int base = dev->base_addr;
 	net_local *lp = netdev_priv(dev);
 	struct iw_range *range = (struct iw_range *) extra;
 	unsigned long flags;
@@ -2425,7 +2425,7 @@
 			    union iwreq_data *wrqu,
 			    char *extra)
 {
-	kio_addr_t base = dev->base_addr;
+	unsigned int base = dev->base_addr;
 	net_local *lp = netdev_priv(dev);
 	psa_t psa;
 	unsigned long flags;
@@ -2701,7 +2701,7 @@
 static iw_stats *
 wavelan_get_wireless_stats(struct net_device *	dev)
 {
-  kio_addr_t		base = dev->base_addr;
+  unsigned int		base = dev->base_addr;
   net_local *		lp = netdev_priv(dev);
   mmr_t			m;
   iw_stats *		wstats;
@@ -2764,7 +2764,7 @@
 		  int		rfp,	/* end of frame */
 		  int		wrap)	/* start of buffer */
 {
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
   int		rp;
   int		len;
 
@@ -2925,7 +2925,7 @@
 static inline void
 wv_packet_rcv(struct net_device *	dev)
 {
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
   net_local *	lp = netdev_priv(dev);
   int		newrfp;
   int		rp;
@@ -3062,7 +3062,7 @@
 		short		length)
 {
   net_local *		lp = netdev_priv(dev);
-  kio_addr_t		base = dev->base_addr;
+  unsigned int		base = dev->base_addr;
   unsigned long		flags;
   int			clen = length;
   register u_short	xmtdata_base = TX_BASE;
@@ -3183,7 +3183,7 @@
 static inline int
 wv_mmc_init(struct net_device *	dev)
 {
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
   psa_t		psa;
   mmw_t		m;
   int		configured;
@@ -3377,7 +3377,7 @@
 static int
 wv_ru_stop(struct net_device *	dev)
 {
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
   net_local *	lp = netdev_priv(dev);
   unsigned long	flags;
   int		status;
@@ -3440,7 +3440,7 @@
 static int
 wv_ru_start(struct net_device *	dev)
 {
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
   net_local *	lp = netdev_priv(dev);
   unsigned long	flags;
 
@@ -3528,7 +3528,7 @@
 static int
 wv_82593_config(struct net_device *	dev)
 {
-  kio_addr_t			base = dev->base_addr;
+  unsigned int			base = dev->base_addr;
   net_local *			lp = netdev_priv(dev);
   struct i82593_conf_block	cfblk;
   int				ret = TRUE;
@@ -3765,7 +3765,7 @@
 wv_hw_config(struct net_device *	dev)
 {
   net_local *		lp = netdev_priv(dev);
-  kio_addr_t		base = dev->base_addr;
+  unsigned int		base = dev->base_addr;
   unsigned long		flags;
   int			ret = FALSE;
 
@@ -4047,7 +4047,7 @@
 {
   struct net_device *	dev = dev_id;
   net_local *	lp;
-  kio_addr_t	base;
+  unsigned int	base;
   int		status0;
   u_int		tx_status;
 
@@ -4306,7 +4306,7 @@
 wavelan_watchdog(struct net_device *	dev)
 {
   net_local *		lp = netdev_priv(dev);
-  kio_addr_t		base = dev->base_addr;
+  unsigned int		base = dev->base_addr;
   unsigned long		flags;
   int			aborted = FALSE;
 
@@ -4382,7 +4382,7 @@
 {
   net_local *	lp = netdev_priv(dev);
   struct pcmcia_device *	link = lp->link;
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
 
 #ifdef DEBUG_CALLBACK_TRACE
   printk(KERN_DEBUG "%s: ->wavelan_open(dev=0x%x)\n", dev->name,
@@ -4436,7 +4436,7 @@
 wavelan_close(struct net_device *	dev)
 {
   struct pcmcia_device *	link = ((net_local *)netdev_priv(dev))->link;
-  kio_addr_t	base = dev->base_addr;
+  unsigned int	base = dev->base_addr;
 
 #ifdef DEBUG_CALLBACK_TRACE
   printk(KERN_DEBUG "%s: ->wavelan_close(dev=0x%x)\n", dev->name,
diff --git a/drivers/nubus/Makefile b/drivers/nubus/Makefile
index f5ef03c..21bda20 100644
--- a/drivers/nubus/Makefile
+++ b/drivers/nubus/Makefile
@@ -4,5 +4,4 @@
 
 obj-y   := nubus.o
 
-obj-$(CONFIG_MODULES) += nubus_syms.o 
 obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c
index f4076ae..2f047e5 100644
--- a/drivers/nubus/nubus.c
+++ b/drivers/nubus/nubus.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/delay.h>
+#include <linux/module.h>
 #include <asm/setup.h>
 #include <asm/system.h>
 #include <asm/page.h>
@@ -186,6 +187,7 @@
 		len--;
 	}
 }
+EXPORT_SYMBOL(nubus_get_rsrc_mem);
 
 void nubus_get_rsrc_str(void *dest, const struct nubus_dirent* dirent,
 			int len)
@@ -200,6 +202,7 @@
 		len--;
 	}
 }
+EXPORT_SYMBOL(nubus_get_rsrc_str);
 
 int nubus_get_root_dir(const struct nubus_board* board,
 		       struct nubus_dir* dir)
@@ -209,6 +212,7 @@
 	dir->mask = board->lanes;
 	return 0;
 }
+EXPORT_SYMBOL(nubus_get_root_dir);
 
 /* This is a slyly renamed version of the above */
 int nubus_get_func_dir(const struct nubus_dev* dev,
@@ -219,6 +223,7 @@
 	dir->mask = dev->board->lanes;
 	return 0;
 }
+EXPORT_SYMBOL(nubus_get_func_dir);
 
 int nubus_get_board_dir(const struct nubus_board* board,
 			struct nubus_dir* dir)
@@ -237,6 +242,7 @@
 		return -1;
 	return 0;
 }
+EXPORT_SYMBOL(nubus_get_board_dir);
 
 int nubus_get_subdir(const struct nubus_dirent *ent,
 		     struct nubus_dir *dir)
@@ -246,6 +252,7 @@
 	dir->mask = ent->mask;
 	return 0;
 }
+EXPORT_SYMBOL(nubus_get_subdir);
 
 int nubus_readdir(struct nubus_dir *nd, struct nubus_dirent *ent)
 {
@@ -274,12 +281,14 @@
 	ent->mask  = nd->mask;
 	return 0;
 }
+EXPORT_SYMBOL(nubus_readdir);
 
 int nubus_rewinddir(struct nubus_dir* dir)
 {
 	dir->ptr = dir->base;
 	return 0;
 }
+EXPORT_SYMBOL(nubus_rewinddir);
 
 /* Driver interface functions, more or less like in pci.c */
 
@@ -303,6 +312,7 @@
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(nubus_find_device);
 
 struct nubus_dev*
 nubus_find_type(unsigned short category,
@@ -320,6 +330,7 @@
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(nubus_find_type);
 
 struct nubus_dev*
 nubus_find_slot(unsigned int slot,
@@ -335,6 +346,7 @@
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(nubus_find_slot);
 
 int
 nubus_find_rsrc(struct nubus_dir* dir, unsigned char rsrc_type,
@@ -346,6 +358,7 @@
 	}	
 	return -1;
 }
+EXPORT_SYMBOL(nubus_find_rsrc);
 
 /* Initialization functions - decide which slots contain stuff worth
    looking at, and print out lots and lots of information from the
diff --git a/drivers/nubus/nubus_syms.c b/drivers/nubus/nubus_syms.c
deleted file mode 100644
index 9204f04..0000000
--- a/drivers/nubus/nubus_syms.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Exported symbols for NuBus services
-
-   (c) 1999 David Huggins-Daines <dhd@debian.org> */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/nubus.h>
-
-#ifdef CONFIG_PROC_FS
-EXPORT_SYMBOL(nubus_proc_attach_device);
-EXPORT_SYMBOL(nubus_proc_detach_device);
-#endif
-
-MODULE_LICENSE("GPL");
-
-EXPORT_SYMBOL(nubus_find_device);
-EXPORT_SYMBOL(nubus_find_type);
-EXPORT_SYMBOL(nubus_find_slot);
-EXPORT_SYMBOL(nubus_get_root_dir);
-EXPORT_SYMBOL(nubus_get_board_dir);
-EXPORT_SYMBOL(nubus_get_func_dir);
-EXPORT_SYMBOL(nubus_readdir);
-EXPORT_SYMBOL(nubus_find_rsrc);
-EXPORT_SYMBOL(nubus_rewinddir);
-EXPORT_SYMBOL(nubus_get_subdir);
-EXPORT_SYMBOL(nubus_get_rsrc_mem);
-EXPORT_SYMBOL(nubus_get_rsrc_str);
-
diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c
index 5271a4a..e07492b 100644
--- a/drivers/nubus/proc.c
+++ b/drivers/nubus/proc.c
@@ -22,6 +22,8 @@
 #include <linux/nubus.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
+#include <linux/module.h>
+
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
 
@@ -140,6 +142,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(nubus_proc_attach_device);
 
 /* FIXME: this is certainly broken! */
 int nubus_proc_detach_device(struct nubus_dev *dev)
@@ -154,6 +157,7 @@
 	}
 	return 0;
 }
+EXPORT_SYMBOL(nubus_proc_detach_device);
 
 void __init proc_bus_nubus_add_devices(void)
 {
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index ca52307..d08b284 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -941,7 +941,7 @@
 	** w/o this association, we wouldn't have coherent DMA!
 	** Access to the virtual address is what forces a two pass algorithm.
 	*/
-	coalesced = iommu_coalesce_chunks(ioc, sglist, nents, ccio_alloc_range);
+	coalesced = iommu_coalesce_chunks(ioc, dev, sglist, nents, ccio_alloc_range);
 
 	/*
 	** Program the I/O Pdir
diff --git a/drivers/parisc/iommu-helpers.h b/drivers/parisc/iommu-helpers.h
index 0a1f99a..97ba828 100644
--- a/drivers/parisc/iommu-helpers.h
+++ b/drivers/parisc/iommu-helpers.h
@@ -95,12 +95,14 @@
 */
 
 static inline unsigned int
-iommu_coalesce_chunks(struct ioc *ioc, struct scatterlist *startsg, int nents,
+iommu_coalesce_chunks(struct ioc *ioc, struct device *dev,
+		      struct scatterlist *startsg, int nents,
 		      int (*iommu_alloc_range)(struct ioc *, size_t))
 {
 	struct scatterlist *contig_sg;	   /* contig chunk head */
 	unsigned long dma_offset, dma_len; /* start/len of DMA stream */
 	unsigned int n_mappings = 0;
+	unsigned int max_seg_size = dma_get_max_seg_size(dev);
 
 	while (nents > 0) {
 
@@ -142,6 +144,9 @@
 					    IOVP_SIZE) > DMA_CHUNK_SIZE))
 				break;
 
+			if (startsg->length + dma_len > max_seg_size)
+				break;
+
 			/*
 			** Next see if we can append the next chunk (i.e.
 			** it must end on one page and begin on another
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index e527a0e..d06627c 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -946,7 +946,7 @@
 	** w/o this association, we wouldn't have coherent DMA!
 	** Access to the virtual address is what forces a two pass algorithm.
 	*/
-	coalesced = iommu_coalesce_chunks(ioc, sglist, nents, sba_alloc_range);
+	coalesced = iommu_coalesce_chunks(ioc, dev, sglist, nents, sba_alloc_range);
 
 	/*
 	** Program the I/O Pdir
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 04aac77..ae3df46 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1451,6 +1451,22 @@
 }
 #endif
 
+#ifndef HAVE_ARCH_PCI_SET_DMA_MAX_SEGMENT_SIZE
+int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size)
+{
+	return dma_set_max_seg_size(&dev->dev, size);
+}
+EXPORT_SYMBOL(pci_set_dma_max_seg_size);
+#endif
+
+#ifndef HAVE_ARCH_PCI_SET_DMA_SEGMENT_BOUNDARY
+int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask)
+{
+	return dma_set_seg_boundary(&dev->dev, mask);
+}
+EXPORT_SYMBOL(pci_set_dma_seg_boundary);
+#endif
+
 /**
  * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count
  * @dev: PCI device to query
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7f5dab3..4d23b9f 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -933,8 +933,12 @@
 
 	set_dev_node(&dev->dev, pcibus_to_node(bus));
 	dev->dev.dma_mask = &dev->dma_mask;
+	dev->dev.dma_parms = &dev->dma_parms;
 	dev->dev.coherent_dma_mask = 0xffffffffull;
 
+	pci_set_dma_max_seg_size(dev, 65536);
+	pci_set_dma_seg_boundary(dev, 0xffffffff);
+
 	/* Fix up broken headers */
 	pci_fixup_device(pci_fixup_header, dev);
 
diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index eb6abd3..385e145 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -21,9 +21,9 @@
 #include <asm/hardware.h>
 #include <asm/io.h>
 #include <asm/sizes.h>
+#include <asm/gpio.h>
 
 #include <asm/arch/board.h>
-#include <asm/arch/gpio.h>
 #include <asm/arch/at91rm9200_mc.h>
 
 
@@ -56,7 +56,7 @@
 
 static inline int at91_cf_present(struct at91_cf_socket *cf)
 {
-	return !at91_get_gpio_value(cf->board->det_pin);
+	return !gpio_get_value(cf->board->det_pin);
 }
 
 /*--------------------------------------------------------------------------*/
@@ -100,9 +100,9 @@
 		int vcc	= cf->board->vcc_pin;
 
 		*sp = SS_DETECT | SS_3VCARD;
-		if (!rdy || at91_get_gpio_value(rdy))
+		if (!rdy || gpio_get_value(rdy))
 			*sp |= SS_READY;
-		if (!vcc || at91_get_gpio_value(vcc))
+		if (!vcc || gpio_get_value(vcc))
 			*sp |= SS_POWERON;
 	} else
 		*sp = 0;
@@ -121,10 +121,10 @@
 	if (cf->board->vcc_pin) {
 		switch (s->Vcc) {
 			case 0:
-				at91_set_gpio_value(cf->board->vcc_pin, 0);
+				gpio_set_value(cf->board->vcc_pin, 0);
 				break;
 			case 33:
-				at91_set_gpio_value(cf->board->vcc_pin, 1);
+				gpio_set_value(cf->board->vcc_pin, 1);
 				break;
 			default:
 				return -EINVAL;
@@ -132,7 +132,7 @@
 	}
 
 	/* toggle reset if needed */
-	at91_set_gpio_value(cf->board->rst_pin, s->flags & SS_RESET);
+	gpio_set_value(cf->board->rst_pin, s->flags & SS_RESET);
 
 	pr_debug("%s: Vcc %d, io_irq %d, flags %04x csc %04x\n",
 		driver_name, s->Vcc, s->io_irq, s->flags, s->csc_mask);
@@ -239,11 +239,24 @@
 	platform_set_drvdata(pdev, cf);
 
 	/* must be a GPIO; ergo must trigger on both edges */
-	status = request_irq(board->det_pin, at91_cf_irq, 0, driver_name, cf);
+	status = gpio_request(board->det_pin, "cf_det");
 	if (status < 0)
 		goto fail0;
+	status = request_irq(board->det_pin, at91_cf_irq, 0, driver_name, cf);
+	if (status < 0)
+		goto fail00;
 	device_init_wakeup(&pdev->dev, 1);
 
+	status = gpio_request(board->rst_pin, "cf_rst");
+	if (status < 0)
+		goto fail0a;
+
+	if (board->vcc_pin) {
+		status = gpio_request(board->vcc_pin, "cf_vcc");
+		if (status < 0)
+			goto fail0b;
+	}
+
 	/*
 	 * The card driver will request this irq later as needed.
 	 * but it causes lots of "irqNN: nobody cared" messages
@@ -251,16 +264,20 @@
 	 * (Note:  DK board doesn't wire the IRQ pin...)
 	 */
 	if (board->irq_pin) {
+		status = gpio_request(board->irq_pin, "cf_irq");
+		if (status < 0)
+			goto fail0c;
 		status = request_irq(board->irq_pin, at91_cf_irq,
 				IRQF_SHARED, driver_name, cf);
 		if (status < 0)
-			goto fail0a;
+			goto fail0d;
 		cf->socket.pci_irq = board->irq_pin;
 	} else
 		cf->socket.pci_irq = NR_IRQS + 1;
 
 	/* pcmcia layer only remaps "real" memory not iospace */
-	cf->socket.io_offset = (unsigned long) ioremap(cf->phys_baseaddr + CF_IO_PHYS, SZ_2K);
+	cf->socket.io_offset = (unsigned long)
+			ioremap(cf->phys_baseaddr + CF_IO_PHYS, SZ_2K);
 	if (!cf->socket.io_offset) {
 		status = -ENXIO;
 		goto fail1;
@@ -296,11 +313,21 @@
 fail1:
 	if (cf->socket.io_offset)
 		iounmap((void __iomem *) cf->socket.io_offset);
-	if (board->irq_pin)
+	if (board->irq_pin) {
 		free_irq(board->irq_pin, cf);
+fail0d:
+		gpio_free(board->irq_pin);
+	}
+fail0c:
+	if (board->vcc_pin)
+		gpio_free(board->vcc_pin);
+fail0b:
+	gpio_free(board->rst_pin);
 fail0a:
 	device_init_wakeup(&pdev->dev, 0);
 	free_irq(board->det_pin, cf);
+fail00:
+	gpio_free(board->det_pin);
 fail0:
 	kfree(cf);
 	return status;
@@ -313,13 +340,18 @@
 	struct resource		*io = cf->socket.io[0].res;
 
 	pcmcia_unregister_socket(&cf->socket);
-	if (board->irq_pin)
+	release_mem_region(io->start, io->end + 1 - io->start);
+	iounmap((void __iomem *) cf->socket.io_offset);
+	if (board->irq_pin) {
 		free_irq(board->irq_pin, cf);
+		gpio_free(board->irq_pin);
+	}
+	if (board->vcc_pin)
+		gpio_free(board->vcc_pin);
+	gpio_free(board->rst_pin);
 	device_init_wakeup(&pdev->dev, 0);
 	free_irq(board->det_pin, cf);
-	iounmap((void __iomem *) cf->socket.io_offset);
-	release_mem_region(io->start, io->end + 1 - io->start);
-
+	gpio_free(board->det_pin);
 	kfree(cf);
 	return 0;
 }
diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
index a1bd763..714baae 100644
--- a/drivers/pcmcia/cardbus.c
+++ b/drivers/pcmcia/cardbus.c
@@ -143,7 +143,7 @@
 	/* Config space? */
 	if (space == 0) {
 		if (addr + len > 0x100)
-			goto fail;
+			goto failput;
 		for (; len; addr++, ptr++, len--)
 			pci_read_config_byte(dev, addr, ptr);
 		return 0;
@@ -171,6 +171,8 @@
 	memcpy_fromio(ptr, s->cb_cis_virt + addr, len);
 	return 0;
 
+failput:
+	pci_dev_put(dev);
 fail:
 	memset(ptr, 0xff, len);
 	return -1;
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 15c18f5..5a85871 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -865,11 +865,12 @@
 	ds_dbg(1, "trying to load CIS file %s\n", filename);
 
 	if (strlen(filename) > 14) {
-		printk(KERN_WARNING "pcmcia: CIS filename is too long\n");
+		printk(KERN_WARNING "pcmcia: CIS filename is too long [%s]\n",
+			filename);
 		return -EINVAL;
 	}
 
-	snprintf(path, 20, "%s", filename);
+	snprintf(path, sizeof(path), "%s", filename);
 
 	if (request_firmware(&fw, path, &dev->dev) == 0) {
 		if (fw->size >= CISTPL_MAX_CIS_SIZE) {
@@ -1130,8 +1131,6 @@
 	down(&dev->sem);
 	rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND);
 	up(&dev->sem);
-	if (!rc)
-		dev->power.power_state.event = PM_EVENT_SUSPEND;
 	return rc;
 }
 
@@ -1142,8 +1141,6 @@
 	down(&dev->sem);
 	rc = pcmcia_dev_resume(dev);
 	up(&dev->sem);
-	if (!rc)
-		dev->power.power_state.event = PM_EVENT_ON;
 }
 
 /************************ per-device sysfs output ***************************/
@@ -1265,6 +1262,9 @@
 	struct pcmcia_driver *p_drv = NULL;
 	int ret = 0;
 
+	if (p_dev->suspended)
+		return 0;
+
 	ds_dbg(2, "suspending %s\n", dev->bus_id);
 
 	if (dev->driver)
@@ -1301,6 +1301,9 @@
         struct pcmcia_driver *p_drv = NULL;
 	int ret = 0;
 
+	if (!p_dev->suspended)
+		return 0;
+
 	ds_dbg(2, "resuming %s\n", dev->bus_id);
 
 	if (dev->driver)
diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c
index df21e2d..7495155 100644
--- a/drivers/pcmcia/i82092.c
+++ b/drivers/pcmcia/i82092.c
@@ -82,7 +82,7 @@
 				    1 = empty socket, 
 				    2 = card but not initialized,
 				    3 = operational card */
-	kio_addr_t io_base; 	/* base io address of the socket */
+	unsigned int io_base; 	/* base io address of the socket */
 	
 	struct pcmcia_socket socket;
 	struct pci_dev *dev;	/* The PCI device for the socket */
diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
index 839bb1c..32a2ab1 100644
--- a/drivers/pcmcia/i82365.c
+++ b/drivers/pcmcia/i82365.c
@@ -164,7 +164,7 @@
     u_short		type, flags;
     struct pcmcia_socket	socket;
     unsigned int	number;
-    kio_addr_t		ioaddr;
+    unsigned int	ioaddr;
     u_short		psock;
     u_char		cs_irq, intr;
     union {
@@ -238,7 +238,7 @@
     unsigned long flags;
     spin_lock_irqsave(&bus_lock,flags);
     {
-	kio_addr_t port = socket[sock].ioaddr;
+	unsigned int port = socket[sock].ioaddr;
 	u_char val;
 	reg = I365_REG(socket[sock].psock, reg);
 	outb(reg, port); val = inb(port+1);
@@ -252,7 +252,7 @@
     unsigned long flags;
     spin_lock_irqsave(&bus_lock,flags);
     {
-	kio_addr_t port = socket[sock].ioaddr;
+	unsigned int port = socket[sock].ioaddr;
 	u_char val = I365_REG(socket[sock].psock, reg);
 	outb(val, port); outb(data, port+1);
 	spin_unlock_irqrestore(&bus_lock,flags);
@@ -588,7 +588,7 @@
 
 /*====================================================================*/
 
-static int __init identify(kio_addr_t port, u_short sock)
+static int __init identify(unsigned int port, u_short sock)
 {
     u_char val;
     int type = -1;
@@ -659,7 +659,7 @@
 static int __init is_alive(u_short sock)
 {
     u_char stat;
-    kio_addr_t start, stop;
+    unsigned int start, stop;
     
     stat = i365_get(sock, I365_STATUS);
     start = i365_get_pair(sock, I365_IO(0)+I365_W_START);
@@ -678,7 +678,7 @@
 
 /*====================================================================*/
 
-static void __init add_socket(kio_addr_t port, int psock, int type)
+static void __init add_socket(unsigned int port, int psock, int type)
 {
     socket[sockets].ioaddr = port;
     socket[sockets].psock = psock;
@@ -698,7 +698,7 @@
     base = sockets-ns;
     if (base == 0) printk("\n");
     printk(KERN_INFO "  %s", pcic[type].name);
-    printk(" ISA-to-PCMCIA at port %#lx ofs 0x%02x",
+    printk(" ISA-to-PCMCIA at port %#x ofs 0x%02x",
 	       t->ioaddr, t->psock*0x40);
     printk(", %d socket%s\n", ns, ((ns > 1) ? "s" : ""));
 
@@ -772,7 +772,7 @@
 static void __init isa_probe(void)
 {
     int i, j, sock, k, ns, id;
-    kio_addr_t port;
+    unsigned int port;
 #ifdef CONFIG_PNP
     struct isapnp_device_id *devid;
     struct pnp_dev *dev;
@@ -1053,7 +1053,7 @@
     u_char map, ioctl;
     
     debug(1, "SetIOMap(%d, %d, %#2.2x, %d ns, "
-	  "%#lx-%#lx)\n", sock, io->map, io->flags,
+	  "%#x-%#x)\n", sock, io->map, io->flags,
 	  io->speed, io->start, io->stop);
     map = io->map;
     if ((map > 1) || (io->start > 0xffff) || (io->stop > 0xffff) ||
diff --git a/drivers/pcmcia/m32r_cfc.c b/drivers/pcmcia/m32r_cfc.c
index 91da15b..3616da2 100644
--- a/drivers/pcmcia/m32r_cfc.c
+++ b/drivers/pcmcia/m32r_cfc.c
@@ -58,7 +58,7 @@
 	u_short			type, flags;
 	struct pcmcia_socket	socket;
 	unsigned int		number;
- 	kio_addr_t		ioaddr;
+	unsigned int		ioaddr;
 	u_long			mapaddr;
 	u_long			base;	/* PCC register base */
 	u_char			cs_irq1, cs_irq2, intr;
@@ -298,7 +298,8 @@
 	return 0;
 }
 
-static void add_pcc_socket(ulong base, int irq, ulong mapaddr, kio_addr_t ioaddr)
+static void add_pcc_socket(ulong base, int irq, ulong mapaddr,
+			   unsigned int ioaddr)
 {
 	pcc_socket_t *t = &socket[pcc_sockets];
 
@@ -738,7 +739,7 @@
 #else	/* CONFIG_PLAT_USRV */
 	{
 		ulong base, mapaddr;
-		kio_addr_t ioaddr;
+		unsigned int ioaddr;
 
 		for (i = 0 ; i < M32R_MAX_PCC ; i++) {
 			base = (ulong)PLD_CFRSTCR;
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c
index ec4c125..2b42b71 100644
--- a/drivers/pcmcia/m32r_pcc.c
+++ b/drivers/pcmcia/m32r_pcc.c
@@ -65,7 +65,7 @@
 	u_short			type, flags;
 	struct pcmcia_socket	socket;
 	unsigned int		number;
- 	kio_addr_t		ioaddr;
+	unsigned int		ioaddr;
 	u_long			mapaddr;
 	u_long			base;	/* PCC register base */
 	u_char			cs_irq, intr;
@@ -310,7 +310,8 @@
 	return 0;
 }
 
-static void add_pcc_socket(ulong base, int irq, ulong mapaddr, kio_addr_t ioaddr)
+static void add_pcc_socket(ulong base, int irq, ulong mapaddr,
+			   unsigned int ioaddr)
 {
   	pcc_socket_t *t = &socket[pcc_sockets];
 
@@ -491,7 +492,7 @@
 	u_char map;
 
 	debug(3, "m32r-pcc: SetIOMap(%d, %d, %#2.2x, %d ns, "
-		  "%#lx-%#lx)\n", sock, io->map, io->flags,
+		  "%#x-%#x)\n", sock, io->map, io->flags,
 		  io->speed, io->start, io->stop);
 	map = io->map;
 
diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c
index 4ea426a..ac70d2c 100644
--- a/drivers/pcmcia/m8xx_pcmcia.c
+++ b/drivers/pcmcia/m8xx_pcmcia.c
@@ -1174,8 +1174,10 @@
 
 	pcmcia_schlvl = irq_of_parse_and_map(np, 0);
 	hwirq = irq_map[pcmcia_schlvl].hwirq;
-	if (pcmcia_schlvl < 0)
+	if (pcmcia_schlvl < 0) {
+		iounmap(pcmcia);
 		return -EINVAL;
+	}
 
 	m8xx_pgcrx[0] = &pcmcia->pcmc_pgcra;
 	m8xx_pgcrx[1] = &pcmcia->pcmc_pgcrb;
@@ -1189,6 +1191,7 @@
 			driver_name, socket)) {
 		pcmcia_error("Cannot allocate IRQ %u for SCHLVL!\n",
 			     pcmcia_schlvl);
+		iounmap(pcmcia);
 		return -1;
 	}
 
@@ -1284,6 +1287,7 @@
 	}
 	for (i = 0; i < PCMCIA_SOCKETS_NO; i++)
 		pcmcia_unregister_socket(&socket[i].socket);
+	iounmap(pcmcia);
 
 	free_irq(pcmcia_schlvl, NULL);
 
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 0ce39de..1d128fb 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -65,23 +65,23 @@
  * Special stuff for managing IO windows, because they are scarce
  */
 
-static int alloc_io_space(struct pcmcia_socket *s, u_int attr, ioaddr_t *base,
-			  ioaddr_t num, u_int lines)
+static int alloc_io_space(struct pcmcia_socket *s, u_int attr,
+			  unsigned int *base, unsigned int num, u_int lines)
 {
 	int i;
-	kio_addr_t try, align;
+	unsigned int try, align;
 
 	align = (*base) ? (lines ? 1<<lines : 0) : 1;
 	if (align && (align < num)) {
 		if (*base) {
-			ds_dbg(s, 0, "odd IO request: num %#x align %#lx\n",
+			ds_dbg(s, 0, "odd IO request: num %#x align %#x\n",
 			       num, align);
 			align = 0;
 		} else
 			while (align && (align < num)) align <<= 1;
 	}
 	if (*base & ~(align-1)) {
-		ds_dbg(s, 0, "odd IO request: base %#x align %#lx\n",
+		ds_dbg(s, 0, "odd IO request: base %#x align %#x\n",
 		       *base, align);
 		align = 0;
 	}
@@ -132,8 +132,8 @@
 } /* alloc_io_space */
 
 
-static void release_io_space(struct pcmcia_socket *s, ioaddr_t base,
-			     ioaddr_t num)
+static void release_io_space(struct pcmcia_socket *s, unsigned int base,
+			     unsigned int num)
 {
 	int i;
 
diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index bfcaad6..a8d1007 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -186,15 +186,16 @@
 ======================================================================*/
 
 #ifdef CONFIG_PCMCIA_PROBE
-static void do_io_probe(struct pcmcia_socket *s, kio_addr_t base, kio_addr_t num)
+static void do_io_probe(struct pcmcia_socket *s, unsigned int base,
+			unsigned int num)
 {
     struct resource *res;
     struct socket_data *s_data = s->resource_data;
-    kio_addr_t i, j, bad;
+    unsigned int i, j, bad;
     int any;
     u_char *b, hole, most;
 
-    printk(KERN_INFO "cs: IO port probe %#lx-%#lx:",
+    printk(KERN_INFO "cs: IO port probe %#x-%#x:",
 	   base, base+num-1);
 
     /* First, what does a floating port look like? */
@@ -233,7 +234,7 @@
 	} else {
 	    if (bad) {
 		sub_interval(&s_data->io_db, bad, i-bad);
-		printk(" %#lx-%#lx", bad, i-1);
+		printk(" %#x-%#x", bad, i-1);
 		bad = 0;
 	    }
 	}
@@ -244,7 +245,7 @@
 	    return;
 	} else {
 	    sub_interval(&s_data->io_db, bad, i-bad);
-	    printk(" %#lx-%#lx", bad, i-1);
+	    printk(" %#x-%#x", bad, i-1);
 	}
     }
 
diff --git a/drivers/pcmcia/sa1100_jornada720.c b/drivers/pcmcia/sa1100_jornada720.c
index af485ae..6284c35 100644
--- a/drivers/pcmcia/sa1100_jornada720.c
+++ b/drivers/pcmcia/sa1100_jornada720.c
@@ -101,7 +101,7 @@
   .socket_suspend	= sa1111_pcmcia_socket_suspend,
 };
 
-int __init pcmcia_jornada720_init(struct device *dev)
+int __devinit pcmcia_jornada720_init(struct device *dev)
 {
 	int ret = -ENODEV;
 
diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
index 749ac37..5792bd5 100644
--- a/drivers/pcmcia/tcic.c
+++ b/drivers/pcmcia/tcic.c
@@ -719,7 +719,7 @@
     u_short base, len, ioctl;
     
     debug(1, "SetIOMap(%d, %d, %#2.2x, %d ns, "
-	  "%#lx-%#lx)\n", psock, io->map, io->flags,
+	  "%#x-%#x)\n", psock, io->map, io->flags,
 	  io->speed, io->start, io->stop);
     if ((io->map > 1) || (io->start > 0xffff) || (io->stop > 0xffff) ||
 	(io->stop < io->start)) return -EINVAL;
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index d640427..d984e0f 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1057,12 +1057,11 @@
 		if (device->features & DASD_FEATURE_ERPLOG) {
 			dasd_log_sense(cqr, irb);
 		}
-		/* If we have no sense data, or we just don't want complex ERP
-		 * for this request, but if we have retries left, then just
-		 * reset this request and retry it in the fastpath
+		/*
+		 * If we don't want complex ERP for this request, then just
+		 * reset this and retry it in the fastpath
 		 */
-		if (!(cqr->irb.esw.esw0.erw.cons &&
-		      test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) &&
+		if (!test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags) &&
 		    cqr->retries > 0) {
 			DEV_MESSAGE(KERN_DEBUG, device,
 				    "default ERP in fastpath (%i retries left)",
@@ -1707,7 +1706,7 @@
 
 	req = (struct request *) cqr->callback_data;
 	dasd_profile_end(cqr->block, cqr, req);
-	status = cqr->memdev->discipline->free_cp(cqr, req);
+	status = cqr->block->base->discipline->free_cp(cqr, req);
 	if (status <= 0)
 		error = status ? status : -EIO;
 	dasd_end_request(req, error);
@@ -1742,12 +1741,8 @@
 
 		/*  Process requests that may be recovered */
 		if (cqr->status == DASD_CQR_NEED_ERP) {
-			if (cqr->irb.esw.esw0.erw.cons &&
-			    test_bit(DASD_CQR_FLAGS_USE_ERP,
-				     &cqr->flags)) {
-				erp_fn = base->discipline->erp_action(cqr);
-				erp_fn(cqr);
-			}
+			erp_fn = base->discipline->erp_action(cqr);
+			erp_fn(cqr);
 			goto restart;
 		}
 
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index c361ab6..f69714a 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -164,7 +164,7 @@
 
 		/* reset status to submit the request again... */
 		erp->status = DASD_CQR_FILLED;
-		erp->retries = 1;
+		erp->retries = 10;
 	} else {
 		DEV_MESSAGE(KERN_ERR, device,
 			    "No alternate channel path left (lpum=%x / "
@@ -301,8 +301,7 @@
 		erp->function = dasd_3990_erp_action_4;
 
 	} else {
-
-		if (sense[25] == 0x1D) {	/* state change pending */
+		if (sense && (sense[25] == 0x1D)) { /* state change pending */
 
 			DEV_MESSAGE(KERN_INFO, device,
 				    "waiting for state change pending "
@@ -311,7 +310,7 @@
 
 			dasd_3990_erp_block_queue(erp, 30*HZ);
 
-                } else if (sense[25] == 0x1E) {	/* busy */
+		} else if (sense && (sense[25] == 0x1E)) {	/* busy */
 			DEV_MESSAGE(KERN_INFO, device,
 				    "busy - redriving request later, "
 				    "%d retries left",
@@ -2120,6 +2119,34 @@
  */
 
 /*
+ * DASD_3990_ERP_CONTROL_CHECK
+ *
+ * DESCRIPTION
+ *   Does a generic inspection if a control check occured and sets up
+ *   the related error recovery procedure
+ *
+ * PARAMETER
+ *   erp		pointer to the currently created default ERP
+ *
+ * RETURN VALUES
+ *   erp_filled		pointer to the erp
+ */
+
+static struct dasd_ccw_req *
+dasd_3990_erp_control_check(struct dasd_ccw_req *erp)
+{
+	struct dasd_device *device = erp->startdev;
+
+	if (erp->refers->irb.scsw.cstat & (SCHN_STAT_INTF_CTRL_CHK
+					   | SCHN_STAT_CHN_CTRL_CHK)) {
+		DEV_MESSAGE(KERN_DEBUG, device, "%s",
+			    "channel or interface control check");
+		erp = dasd_3990_erp_action_4(erp, NULL);
+	}
+	return erp;
+}
+
+/*
  * DASD_3990_ERP_INSPECT
  *
  * DESCRIPTION
@@ -2145,8 +2172,11 @@
 	if (erp_new)
 		return erp_new;
 
+	/* check if no concurrent sens is available */
+	if (!erp->refers->irb.esw.esw0.erw.cons)
+		erp_new = dasd_3990_erp_control_check(erp);
 	/* distinguish between 24 and 32 byte sense data */
-	if (sense[27] & DASD_SENSE_BIT_0) {
+	else if (sense[27] & DASD_SENSE_BIT_0) {
 
 		/* inspect the 24 byte sense data */
 		erp_new = dasd_3990_erp_inspect_24(erp, sense);
@@ -2285,6 +2315,17 @@
 		//	return 0;	/* CCW doesn't match */
 	}
 
+	if (cqr1->irb.esw.esw0.erw.cons != cqr2->irb.esw.esw0.erw.cons)
+		return 0;
+
+	if ((cqr1->irb.esw.esw0.erw.cons == 0) &&
+	    (cqr2->irb.esw.esw0.erw.cons == 0))	{
+		if ((cqr1->irb.scsw.cstat & (SCHN_STAT_INTF_CTRL_CHK |
+					     SCHN_STAT_CHN_CTRL_CHK)) ==
+		    (cqr2->irb.scsw.cstat & (SCHN_STAT_INTF_CTRL_CHK |
+					     SCHN_STAT_CHN_CTRL_CHK)))
+			return 1; /* match with ifcc*/
+	}
 	/* check sense data; byte 0-2,25,27 */
 	if (!((memcmp (cqr1->irb.ecw, cqr2->irb.ecw, 3) == 0) &&
 	      (cqr1->irb.ecw[27] == cqr2->irb.ecw[27]) &&
@@ -2560,17 +2601,6 @@
 
 		return cqr;
 	}
-	/* check if sense data are available */
-	if (!cqr->irb.ecw) {
-		DEV_MESSAGE(KERN_DEBUG, device,
-			    "ERP called witout sense data avail ..."
-			    "request %p - NO ERP possible", cqr);
-
-		cqr->status = DASD_CQR_FAILED;
-
-		return cqr;
-
-	}
 
 	/* check if error happened before */
 	erp = dasd_3990_erp_in_erp(cqr);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 7779bfc..3faf053 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -415,6 +415,8 @@
 	dev_info->gd->queue = dev_info->dcssblk_queue;
 	dev_info->gd->private_data = dev_info;
 	dev_info->gd->driverfs_dev = &dev_info->dev;
+	blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
+	blk_queue_hardsect_size(dev_info->dcssblk_queue, 4096);
 	/*
 	 * load the segment
 	 */
@@ -472,9 +474,6 @@
 	if (rc)
 		goto unregister_dev;
 
-	blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
-	blk_queue_hardsect_size(dev_info->dcssblk_queue, 4096);
-
 	add_disk(dev_info->gd);
 
 	switch (dev_info->segment_type) {
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index e3b3d39..2e616e3 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -332,7 +332,7 @@
 		if (sclp_ttybuf == NULL) {
 			while (list_empty(&sclp_tty_pages)) {
 				spin_unlock_irqrestore(&sclp_tty_lock, flags);
-				if (in_interrupt())
+				if (in_atomic())
 					sclp_sync_wait();
 				else
 					wait_event(sclp_tty_waitq,
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index 40cd21b..6807162 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -400,7 +400,7 @@
 			while (list_empty(&sclp_vt220_empty)) {
 				spin_unlock_irqrestore(&sclp_vt220_lock,
 						       flags);
-				if (in_interrupt())
+				if (in_atomic())
 					sclp_sync_wait();
 				else
 					wait_event(sclp_vt220_waitq,
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index 3964056..03914fa 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -391,12 +391,24 @@
 	return 0;
 }
 
+static void ccwgroup_shutdown(struct device *dev)
+{
+	struct ccwgroup_device *gdev;
+	struct ccwgroup_driver *gdrv;
+
+	gdev = to_ccwgroupdev(dev);
+	gdrv = to_ccwgroupdrv(dev->driver);
+	if (gdrv && gdrv->shutdown)
+		gdrv->shutdown(gdev);
+}
+
 static struct bus_type ccwgroup_bus_type = {
 	.name   = "ccwgroup",
 	.match  = ccwgroup_bus_match,
 	.uevent = ccwgroup_uevent,
 	.probe  = ccwgroup_probe,
 	.remove = ccwgroup_remove,
+	.shutdown = ccwgroup_shutdown,
 };
 
 /**
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index e7ba16a..007aaeb 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -26,6 +26,25 @@
 
 static void *sei_page;
 
+static int chsc_error_from_response(int response)
+{
+	switch (response) {
+	case 0x0001:
+		return 0;
+	case 0x0002:
+	case 0x0003:
+	case 0x0006:
+	case 0x0007:
+	case 0x0008:
+	case 0x000a:
+		return -EINVAL;
+	case 0x0004:
+		return -EOPNOTSUPP;
+	default:
+		return -EIO;
+	}
+}
+
 struct chsc_ssd_area {
 	struct chsc_header request;
 	u16 :10;
@@ -75,11 +94,11 @@
 		ret = (ccode == 3) ? -ENODEV : -EBUSY;
 		goto out_free;
 	}
-	if (ssd_area->response.code != 0x0001) {
+	ret = chsc_error_from_response(ssd_area->response.code);
+	if (ret != 0) {
 		CIO_MSG_EVENT(2, "chsc: ssd failed for 0.%x.%04x (rc=%04x)\n",
 			      schid.ssid, schid.sch_no,
 			      ssd_area->response.code);
-		ret = -EIO;
 		goto out_free;
 	}
 	if (!ssd_area->sch_valid) {
@@ -717,36 +736,15 @@
 		return (ccode == 3) ? -ENODEV : -EBUSY;
 
 	switch (secm_area->response.code) {
-	case 0x0001: /* Success. */
-		ret = 0;
-		break;
-	case 0x0003: /* Invalid block. */
-	case 0x0007: /* Invalid format. */
-	case 0x0008: /* Other invalid block. */
-		CIO_CRW_EVENT(2, "Error in chsc request block!\n");
+	case 0x0102:
+	case 0x0103:
 		ret = -EINVAL;
-		break;
-	case 0x0004: /* Command not provided in model. */
-		CIO_CRW_EVENT(2, "Model does not provide secm\n");
-		ret = -EOPNOTSUPP;
-		break;
-	case 0x0102: /* cub adresses incorrect */
-		CIO_CRW_EVENT(2, "Invalid addresses in chsc request block\n");
-		ret = -EINVAL;
-		break;
-	case 0x0103: /* key error */
-		CIO_CRW_EVENT(2, "Access key error in secm\n");
-		ret = -EINVAL;
-		break;
-	case 0x0105: /* error while starting */
-		CIO_CRW_EVENT(2, "Error while starting channel measurement\n");
-		ret = -EIO;
-		break;
 	default:
-		CIO_CRW_EVENT(2, "Unknown CHSC response %d\n",
-			      secm_area->response.code);
-		ret = -EIO;
+		ret = chsc_error_from_response(secm_area->response.code);
 	}
+	if (ret != 0)
+		CIO_CRW_EVENT(2, "chsc: secm failed (rc=%04x)\n",
+			      secm_area->response.code);
 	return ret;
 }
 
@@ -827,27 +825,14 @@
 		goto out;
 	}
 
-	switch (scpd_area->response.code) {
-	case 0x0001: /* Success. */
+	ret = chsc_error_from_response(scpd_area->response.code);
+	if (ret == 0)
+		/* Success. */
 		memcpy(desc, &scpd_area->desc,
 		       sizeof(struct channel_path_desc));
-		ret = 0;
-		break;
-	case 0x0003: /* Invalid block. */
-	case 0x0007: /* Invalid format. */
-	case 0x0008: /* Other invalid block. */
-		CIO_CRW_EVENT(2, "Error in chsc request block!\n");
-		ret = -EINVAL;
-		break;
-	case 0x0004: /* Command not provided in model. */
-		CIO_CRW_EVENT(2, "Model does not provide scpd\n");
-		ret = -EOPNOTSUPP;
-		break;
-	default:
-		CIO_CRW_EVENT(2, "Unknown CHSC response %d\n",
+	else
+		CIO_CRW_EVENT(2, "chsc: scpd failed (rc=%04x)\n",
 			      scpd_area->response.code);
-		ret = -EIO;
-	}
 out:
 	free_page((unsigned long)scpd_area);
 	return ret;
@@ -923,8 +908,9 @@
 		goto out;
 	}
 
-	switch (scmc_area->response.code) {
-	case 0x0001: /* Success. */
+	ret = chsc_error_from_response(scmc_area->response.code);
+	if (ret == 0) {
+		/* Success. */
 		if (!scmc_area->not_valid) {
 			chp->cmg = scmc_area->cmg;
 			chp->shared = scmc_area->shared;
@@ -935,22 +921,9 @@
 			chp->cmg = -1;
 			chp->shared = -1;
 		}
-		ret = 0;
-		break;
-	case 0x0003: /* Invalid block. */
-	case 0x0007: /* Invalid format. */
-	case 0x0008: /* Invalid bit combination. */
-		CIO_CRW_EVENT(2, "Error in chsc request block!\n");
-		ret = -EINVAL;
-		break;
-	case 0x0004: /* Command not provided. */
-		CIO_CRW_EVENT(2, "Model does not provide scmc\n");
-		ret = -EOPNOTSUPP;
-		break;
-	default:
-		CIO_CRW_EVENT(2, "Unknown CHSC response %d\n",
+	} else {
+		CIO_CRW_EVENT(2, "chsc: scmc failed (rc=%04x)\n",
 			      scmc_area->response.code);
-		ret = -EIO;
 	}
 out:
 	free_page((unsigned long)scmc_area);
@@ -1002,21 +975,17 @@
 		ret = (ret == 3) ? -ENODEV : -EBUSY;
 		goto out;
 	}
+
 	switch (sda_area->response.code) {
-	case 0x0001: /* everything ok */
-		ret = 0;
-		break;
-	case 0x0003: /* invalid request block */
-	case 0x0007:
-		ret = -EINVAL;
-		break;
-	case 0x0004: /* command not provided */
-	case 0x0101: /* facility not provided */
+	case 0x0101:
 		ret = -EOPNOTSUPP;
 		break;
-	default: /* something went wrong */
-		ret = -EIO;
+	default:
+		ret = chsc_error_from_response(sda_area->response.code);
 	}
+	if (ret != 0)
+		CIO_CRW_EVENT(2, "chsc: sda (oc=%x) failed (rc=%04x)\n",
+			      operation_code, sda_area->response.code);
  out:
 	free_page((unsigned long)sda_area);
 	return ret;
@@ -1041,33 +1010,27 @@
 	} __attribute__ ((packed)) *scsc_area;
 
 	scsc_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
-	if (!scsc_area) {
-		CIO_MSG_EVENT(0, "Was not able to determine available "
-			      "CHSCs due to no memory.\n");
+	if (!scsc_area)
 		return -ENOMEM;
-	}
 
 	scsc_area->request.length = 0x0010;
 	scsc_area->request.code = 0x0010;
 
 	result = chsc(scsc_area);
 	if (result) {
-		CIO_MSG_EVENT(0, "Was not able to determine available CHSCs, "
-			      "cc=%i.\n", result);
-		result = -EIO;
+		result = (result == 3) ? -ENODEV : -EBUSY;
 		goto exit;
 	}
 
-	if (scsc_area->response.code != 1) {
-		CIO_MSG_EVENT(0, "Was not able to determine "
-			      "available CHSCs.\n");
-		result = -EIO;
-		goto exit;
-	}
-	memcpy(&css_general_characteristics, scsc_area->general_char,
-	       sizeof(css_general_characteristics));
-	memcpy(&css_chsc_characteristics, scsc_area->chsc_char,
-	       sizeof(css_chsc_characteristics));
+	result = chsc_error_from_response(scsc_area->response.code);
+	if (result == 0) {
+		memcpy(&css_general_characteristics, scsc_area->general_char,
+		       sizeof(css_general_characteristics));
+		memcpy(&css_chsc_characteristics, scsc_area->chsc_char,
+		       sizeof(css_chsc_characteristics));
+	} else
+		CIO_CRW_EVENT(2, "chsc: scsc failed (rc=%04x)\n",
+			      scsc_area->response.code);
 exit:
 	free_page ((unsigned long) scsc_area);
 	return result;
diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
index 918b8b8..dc4d87f 100644
--- a/drivers/s390/cio/device_id.c
+++ b/drivers/s390/cio/device_id.c
@@ -26,17 +26,18 @@
 #include "ioasm.h"
 #include "io_sch.h"
 
-/*
- * Input :
- *   devno - device number
- *   ps	   - pointer to sense ID data area
- * Output : none
+/**
+ * vm_vdev_to_cu_type - Convert vm virtual device into control unit type
+ *			for certain devices.
+ * @class: virtual device class
+ * @type: virtual device type
+ *
+ * Returns control unit type if a match was made or %0xffff otherwise.
  */
-static void
-VM_virtual_device_info (__u16 devno, struct senseid *ps)
+static int vm_vdev_to_cu_type(int class, int type)
 {
 	static struct {
-		int vrdcvcla, vrdcvtyp, cu_type;
+		int class, type, cu_type;
 	} vm_devices[] = {
 		{ 0x08, 0x01, 0x3480 },
 		{ 0x08, 0x02, 0x3430 },
@@ -68,8 +69,26 @@
 		{ 0x40, 0xc0, 0x5080 },
 		{ 0x80, 0x00, 0x3215 },
 	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vm_devices); i++)
+		if (class == vm_devices[i].class && type == vm_devices[i].type)
+			return vm_devices[i].cu_type;
+
+	return 0xffff;
+}
+
+/**
+ * diag_get_dev_info - retrieve device information via DIAG X'210'
+ * @devno: device number
+ * @ps: pointer to sense ID data area
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int diag_get_dev_info(u16 devno, struct senseid *ps)
+{
 	struct diag210 diag_data;
-	int ccode, i;
+	int ccode;
 
 	CIO_TRACE_EVENT (4, "VMvdinf");
 
@@ -79,21 +98,21 @@
 	};
 
 	ccode = diag210 (&diag_data);
-	ps->reserved = 0xff;
+	if ((ccode == 0) || (ccode == 2)) {
+		ps->reserved = 0xff;
 
-	/* Special case for bloody osa devices. */
-	if (diag_data.vrdcvcla == 0x02 &&
-	    diag_data.vrdcvtyp == 0x20) {
-		ps->cu_type = 0x3088;
-		ps->cu_model = 0x60;
-		return;
-	}
-	for (i = 0; i < ARRAY_SIZE(vm_devices); i++)
-		if (diag_data.vrdcvcla == vm_devices[i].vrdcvcla &&
-		    diag_data.vrdcvtyp == vm_devices[i].vrdcvtyp) {
-			ps->cu_type = vm_devices[i].cu_type;
-			return;
+		/* Special case for osa devices. */
+		if (diag_data.vrdcvcla == 0x02 && diag_data.vrdcvtyp == 0x20) {
+			ps->cu_type = 0x3088;
+			ps->cu_model = 0x60;
+			return 0;
 		}
+		ps->cu_type = vm_vdev_to_cu_type(diag_data.vrdcvcla,
+						diag_data.vrdcvtyp);
+		if (ps->cu_type != 0xffff)
+			return 0;
+	}
+
 	CIO_MSG_EVENT(0, "DIAG X'210' for device %04X returned (cc = %d):"
 		      "vdev class : %02X, vdev type : %04X \n ...  "
 		      "rdev class : %02X, rdev type : %04X, "
@@ -102,6 +121,8 @@
 		      diag_data.vrdcvcla, diag_data.vrdcvtyp,
 		      diag_data.vrdcrccl, diag_data.vrdccrty,
 		      diag_data.vrdccrmd);
+
+	return -ENODEV;
 }
 
 /*
@@ -130,6 +151,7 @@
 	/* Try on every path. */
 	ret = -ENODEV;
 	while (cdev->private->imask != 0) {
+		cdev->private->senseid.cu_type = 0xFFFF;
 		if ((sch->opm & cdev->private->imask) != 0 &&
 		    cdev->private->iretry > 0) {
 			cdev->private->iretry--;
@@ -153,7 +175,6 @@
 	int ret;
 
 	memset (&cdev->private->senseid, 0, sizeof (struct senseid));
-	cdev->private->senseid.cu_type = 0xFFFF;
 	cdev->private->imask = 0x80;
 	cdev->private->iretry = 5;
 	ret = __ccw_device_sense_id_start(cdev);
@@ -173,13 +194,7 @@
 
 	sch = to_subchannel(cdev->dev.parent);
 	irb = &cdev->private->irb;
-	/* Did we get a proper answer ? */
-	if (cdev->private->senseid.cu_type != 0xFFFF && 
-	    cdev->private->senseid.reserved == 0xFF) {
-		if (irb->scsw.count < sizeof (struct senseid) - 8)
-			cdev->private->flags.esid = 1;
-		return 0; /* Success */
-	}
+
 	/* Check the error cases. */
 	if (irb->scsw.fctl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) {
 		/* Retry Sense ID if requested. */
@@ -231,6 +246,15 @@
 				      sch->schid.ssid, sch->schid.sch_no);
 		return -EACCES;
 	}
+
+	/* Did we get a proper answer ? */
+	if (irb->scsw.cc == 0 && cdev->private->senseid.cu_type != 0xFFFF &&
+	    cdev->private->senseid.reserved == 0xFF) {
+		if (irb->scsw.count < sizeof(struct senseid) - 8)
+			cdev->private->flags.esid = 1;
+		return 0; /* Success */
+	}
+
 	/* Hmm, whatever happened, try again. */
 	CIO_MSG_EVENT(2, "SenseID : start_IO() for device %04x on "
 		      "subchannel 0.%x.%04x returns status %02X%02X\n",
@@ -283,20 +307,17 @@
 			break;
 		/* fall through. */
 	default:		/* Sense ID failed. Try asking VM. */
-		if (MACHINE_IS_VM) {
-			VM_virtual_device_info (cdev->private->dev_id.devno,
+		if (MACHINE_IS_VM)
+			ret = diag_get_dev_info(cdev->private->dev_id.devno,
 						&cdev->private->senseid);
-			if (cdev->private->senseid.cu_type != 0xFFFF) {
-				/* Got the device information from VM. */
-				ccw_device_sense_id_done(cdev, 0);
-				return;
-			}
-		}
-		/*
-		 * If we can't couldn't identify the device type we
-		 *  consider the device "not operational".
-		 */
-		ccw_device_sense_id_done(cdev, -ENODEV);
+		else
+			/*
+			 * If we can't couldn't identify the device type we
+			 *  consider the device "not operational".
+			 */
+			ret = -ENODEV;
+
+		ccw_device_sense_id_done(cdev, ret);
 		break;
 	}
 }
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 0e8267c..fb08861 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -449,9 +449,6 @@
 		else if (depth < 2)
 			depth = 2;
 		scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, depth);
-		if (!(((struct aac_dev *)host->hostdata)->adapter_info.options &
-				AAC_OPT_NEW_COMM))
-			blk_queue_max_segment_size(sdev->request_queue, 65536);
 	} else
 		scsi_adjust_queue_depth(sdev, 0, 1);
 
@@ -1133,6 +1130,12 @@
 	if (error < 0)
 		goto out_deinit;
 
+	if (!(aac->adapter_info.options & AAC_OPT_NEW_COMM)) {
+		error = pci_set_dma_max_seg_size(pdev, 65536);
+		if (error)
+			goto out_deinit;
+	}
+
 	/*
  	 * Lets override negotiations and drop the maximum SG limit to 34
  	 */
diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c
index 4b82b20..d8b9935 100644
--- a/drivers/scsi/pcmcia/fdomain_stub.c
+++ b/drivers/scsi/pcmcia/fdomain_stub.c
@@ -130,7 +130,7 @@
     cisparse_t parse;
     int i, last_ret, last_fn;
     u_char tuple_data[64];
-    char str[16];
+    char str[22];
     struct Scsi_Host *host;
 
     DEBUG(0, "fdomain_config(0x%p)\n", link);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b12fb31..f243fc30 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1569,6 +1569,7 @@
 					 request_fn_proc *request_fn)
 {
 	struct request_queue *q;
+	struct device *dev = shost->shost_gendev.parent;
 
 	q = blk_init_queue(request_fn, NULL);
 	if (!q)
@@ -1583,6 +1584,9 @@
 	blk_queue_max_sectors(q, shost->max_sectors);
 	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
 	blk_queue_segment_boundary(q, shost->dma_boundary);
+	dma_set_seg_boundary(dev, shost->dma_boundary);
+
+	blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
 
 	if (!shost->use_clustering)
 		clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
diff --git a/drivers/serial/21285.c b/drivers/serial/21285.c
index 6a48dfa..0276471 100644
--- a/drivers/serial/21285.c
+++ b/drivers/serial/21285.c
@@ -237,6 +237,12 @@
 	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); 
 	quot = uart_get_divisor(port, baud);
 
+	if (port->info && port->info->tty) {
+		struct tty_struct *tty = port->info->tty;
+		unsigned int b = port->uartclk / (16 * quot);
+		tty_encode_baud_rate(tty, b, b);
+	}
+
 	switch (termios->c_cflag & CSIZE) {
 	case CS5:
 		h_lcr = 0x00;
@@ -277,8 +283,6 @@
 	if (termios->c_iflag & INPCK)
 		port->read_status_mask |= RXSTAT_FRAME | RXSTAT_PARITY;
 
-	tty_encode_baud_rate(tty,  baud, baud);
-
 	/*
 	 * Which character status flags should we ignore?
 	 */
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index f94109c..b8a4bd9 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -2047,7 +2047,7 @@
 	 * Oxford Semi 952 rev B workaround
 	 */
 	if (up->bugs & UART_BUG_QUOT && (quot & 0xff) == 0)
-		quot ++;
+		quot++;
 
 	if (up->capabilities & UART_CAP_FIFO && up->port.fifosize > 1) {
 		if (baud < 2400)
@@ -2662,16 +2662,17 @@
 	memset(&port, 0, sizeof(struct uart_port));
 
 	for (i = 0; p && p->flags != 0; p++, i++) {
-		port.iobase	= p->iobase;
-		port.membase	= p->membase;
-		port.irq	= p->irq;
-		port.uartclk	= p->uartclk;
-		port.regshift	= p->regshift;
-		port.iotype	= p->iotype;
-		port.flags	= p->flags;
-		port.mapbase	= p->mapbase;
-		port.hub6	= p->hub6;
-		port.dev	= &dev->dev;
+		port.iobase		= p->iobase;
+		port.membase		= p->membase;
+		port.irq		= p->irq;
+		port.uartclk		= p->uartclk;
+		port.regshift		= p->regshift;
+		port.iotype		= p->iotype;
+		port.flags		= p->flags;
+		port.mapbase		= p->mapbase;
+		port.hub6		= p->hub6;
+		port.private_data	= p->private_data;
+		port.dev		= &dev->dev;
 		if (share_irqs)
 			port.flags |= UPF_SHARE_IRQ;
 		ret = serial8250_register_port(&port);
@@ -2812,15 +2813,16 @@
 	if (uart) {
 		uart_remove_one_port(&serial8250_reg, &uart->port);
 
-		uart->port.iobase   = port->iobase;
-		uart->port.membase  = port->membase;
-		uart->port.irq      = port->irq;
-		uart->port.uartclk  = port->uartclk;
-		uart->port.fifosize = port->fifosize;
-		uart->port.regshift = port->regshift;
-		uart->port.iotype   = port->iotype;
-		uart->port.flags    = port->flags | UPF_BOOT_AUTOCONF;
-		uart->port.mapbase  = port->mapbase;
+		uart->port.iobase       = port->iobase;
+		uart->port.membase      = port->membase;
+		uart->port.irq          = port->irq;
+		uart->port.uartclk      = port->uartclk;
+		uart->port.fifosize     = port->fifosize;
+		uart->port.regshift     = port->regshift;
+		uart->port.iotype       = port->iotype;
+		uart->port.flags        = port->flags | UPF_BOOT_AUTOCONF;
+		uart->port.mapbase      = port->mapbase;
+		uart->port.private_data = port->private_data;
 		if (port->dev)
 			uart->port.dev = port->dev;
 
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index ceb03c9..0a4ac2b 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -106,6 +106,32 @@
 }
 
 /*
+ * ADDI-DATA GmbH communication cards <info@addi-data.com>
+ */
+static int addidata_apci7800_setup(struct serial_private *priv,
+				struct pciserial_board *board,
+				struct uart_port *port, int idx)
+{
+	unsigned int bar = 0, offset = board->first_offset;
+	bar = FL_GET_BASE(board->flags);
+
+	if (idx < 2) {
+		offset += idx * board->uart_offset;
+	} else if ((idx >= 2) && (idx < 4)) {
+		bar += 1;
+		offset += ((idx - 2) * board->uart_offset);
+	} else if ((idx >= 4) && (idx < 6)) {
+		bar += 2;
+		offset += ((idx - 4) * board->uart_offset);
+	} else if (idx >= 6) {
+		bar += 3;
+		offset += ((idx - 6) * board->uart_offset);
+	}
+
+	return setup_port(priv, port, bar, offset, board->reg_shift);
+}
+
+/*
  * AFAVLAB uses a different mixture of BARs and offsets
  * Not that ugly ;) -- HW
  */
@@ -752,6 +778,16 @@
  */
 static struct pci_serial_quirk pci_serial_quirks[] = {
 	/*
+	* ADDI-DATA GmbH communication cards <info@addi-data.com>
+	*/
+	{
+		.vendor         = PCI_VENDOR_ID_ADDIDATA_OLD,
+		.device         = PCI_DEVICE_ID_ADDIDATA_APCI7800,
+		.subvendor      = PCI_ANY_ID,
+		.subdevice      = PCI_ANY_ID,
+		.setup          = addidata_apci7800_setup,
+	},
+	/*
 	 * AFAVLAB cards - these may be called via parport_serial
 	 *  It is not clear whether this applies to all products.
 	 */
@@ -1179,6 +1215,12 @@
 		.base_baud	= 115200,
 		.uart_offset	= 8,
 	},
+	[pbn_b0_8_115200] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 8,
+		.base_baud	= 115200,
+		.uart_offset	= 8,
+	},
 
 	[pbn_b0_1_921600] = {
 		.flags		= FL_BASE0,
@@ -2697,6 +2739,97 @@
 		pbn_pasemi_1682M },
 
 	/*
+	* ADDI-DATA GmbH communication cards <info@addi-data.com>
+	*/
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCI7500,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b0_4_115200 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCI7420,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b0_2_115200 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCI7300,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b0_1_115200 },
+
+	{	PCI_VENDOR_ID_ADDIDATA_OLD,
+		PCI_DEVICE_ID_ADDIDATA_APCI7800,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b1_8_115200 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCI7500_2,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b0_4_115200 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCI7420_2,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b0_2_115200 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCI7300_2,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b0_1_115200 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCI7500_3,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b0_4_115200 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCI7420_3,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b0_2_115200 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCI7300_3,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b0_1_115200 },
+
+	{	PCI_VENDOR_ID_ADDIDATA,
+		PCI_DEVICE_ID_ADDIDATA_APCI7800_3,
+		PCI_ANY_ID,
+		PCI_ANY_ID,
+		0,
+		0,
+		pbn_b0_8_115200 },
+
+	/*
 	 * These entries match devices with class COMMUNICATION_SERIAL,
 	 * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL
 	 */
diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c
index 1de098e..6f09cbd 100644
--- a/drivers/serial/8250_pnp.c
+++ b/drivers/serial/8250_pnp.c
@@ -414,8 +414,9 @@
  */
 static int __devinit serial_pnp_guess_board(struct pnp_dev *dev, int *flags)
 {
-	if (!(check_name(pnp_dev_name(dev)) || (dev->card && check_name(dev->card->name))))
-		return -ENODEV;
+	if (!(check_name(pnp_dev_name(dev)) ||
+		(dev->card && check_name(dev->card->name))))
+			return -ENODEV;
 
 	if (check_resources(dev->independent))
 		return 0;
@@ -452,8 +453,9 @@
 		return -ENODEV;
 
 #ifdef SERIAL_DEBUG_PNP
-	printk("Setup PNP port: port %x, mem 0x%lx, irq %d, type %d\n",
-	       port.iobase, port.mapbase, port.irq, port.iotype);
+	printk(KERN_DEBUG
+		"Setup PNP port: port %x, mem 0x%lx, irq %d, type %d\n",
+		       port.iobase, port.mapbase, port.irq, port.iotype);
 #endif
 
 	port.flags |= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF;
diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c
index 111da57..60f5290 100644
--- a/drivers/serial/atmel_serial.c
+++ b/drivers/serial/atmel_serial.c
@@ -34,6 +34,7 @@
 #include <linux/tty_flip.h>
 #include <linux/platform_device.h>
 #include <linux/atmel_pdc.h>
+#include <linux/atmel_serial.h>
 
 #include <asm/io.h>
 
@@ -45,8 +46,6 @@
 #include <asm/arch/gpio.h>
 #endif
 
-#include "atmel_serial.h"
-
 #if defined(CONFIG_SERIAL_ATMEL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
 #define SUPPORT_SYSRQ
 #endif
diff --git a/drivers/serial/mcf.c b/drivers/serial/mcf.c
index 051fcc2..e76fc72 100644
--- a/drivers/serial/mcf.c
+++ b/drivers/serial/mcf.c
@@ -434,7 +434,7 @@
 
 static struct mcf_uart mcf_ports[3];
 
-#define	MCF_MAXPORTS	(sizeof(mcf_ports) / sizeof(struct mcf_uart))
+#define	MCF_MAXPORTS	ARRAY_SIZE(mcf_ports)
 
 /****************************************************************************/
 #if defined(CONFIG_SERIAL_MCF_CONSOLE)
diff --git a/drivers/serial/mpsc.c b/drivers/serial/mpsc.c
index 4d643c9..cb3a919 100644
--- a/drivers/serial/mpsc.c
+++ b/drivers/serial/mpsc.c
@@ -612,6 +612,7 @@
 
 	/* No preamble, 16x divider, low-latency, */
 	writel(0x04400400, pi->mpsc_base + MPSC_MMCRH);
+	mpsc_set_baudrate(pi, pi->default_baud);
 
 	if (pi->mirror_regs) {
 		pi->MPSC_CHR_1_m = 0;
diff --git a/drivers/serial/s3c2410.c b/drivers/serial/s3c2410.c
index e773c8e..45de193 100644
--- a/drivers/serial/s3c2410.c
+++ b/drivers/serial/s3c2410.c
@@ -1527,7 +1527,7 @@
 #define s3c2440_uart_inf_at NULL
 #endif /* CONFIG_CPU_S3C2440 */
 
-#if defined(CONFIG_CPU_S3C2412) || defined(CONFIG_CPU_S3C2413)
+#if defined(CONFIG_CPU_S3C2412)
 
 static int s3c2412_serial_setsource(struct uart_port *port,
 				     struct s3c24xx_uart_clksrc *clk)
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 3bb5d24..276da14 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -371,7 +371,8 @@
 		 */
 		termios->c_cflag &= ~CBAUD;
 		if (old) {
-			termios->c_cflag |= old->c_cflag & CBAUD;
+			baud = tty_termios_baud_rate(old);
+			tty_termios_encode_baud_rate(termios, baud, baud);
 			old = NULL;
 			continue;
 		}
@@ -380,7 +381,7 @@
 		 * As a last resort, if the quotient is zero,
 		 * default to 9600 bps
 		 */
-		termios->c_cflag |= B9600;
+		tty_termios_encode_baud_rate(termios, 9600, 9600);
 	}
 
 	return 0;
@@ -1977,6 +1978,7 @@
 
 	if (state->info && state->info->flags & UIF_INITIALIZED) {
 		const struct uart_ops *ops = port->ops;
+		int tries;
 
 		state->info->flags = (state->info->flags & ~UIF_INITIALIZED)
 				     | UIF_SUSPENDED;
@@ -1990,9 +1992,14 @@
 		/*
 		 * Wait for the transmitter to empty.
 		 */
-		while (!ops->tx_empty(port)) {
+		for (tries = 3; !ops->tx_empty(port) && tries; tries--) {
 			msleep(10);
 		}
+		if (!tries)
+			printk(KERN_ERR "%s%s%s%d: Unable to drain transmitter\n",
+			       port->dev ? port->dev->bus_id : "",
+			       port->dev ? ": " : "",
+			       drv->dev_name, port->line);
 
 		ops->shutdown(port);
 	}
@@ -2029,8 +2036,6 @@
 	}
 	port->suspended = 0;
 
-	uart_change_pm(state, 0);
-
 	/*
 	 * Re-enable the console device after suspending.
 	 */
@@ -2049,6 +2054,7 @@
 		if (state->info && state->info->tty && termios.c_cflag == 0)
 			termios = *state->info->tty->termios;
 
+		uart_change_pm(state, 0);
 		port->ops->set_termios(port, &termios, NULL);
 		console_start(port->cons);
 	}
@@ -2057,6 +2063,7 @@
 		const struct uart_ops *ops = port->ops;
 		int ret;
 
+		uart_change_pm(state, 0);
 		ops->set_mctrl(port, 0);
 		ret = ops->startup(port);
 		if (ret == 0) {
@@ -2150,10 +2157,11 @@
 
 		/*
 		 * Ensure that the modem control lines are de-activated.
+		 * keep the DTR setting that is set in uart_set_options()
 		 * We probably don't need a spinlock around this, but
 		 */
 		spin_lock_irqsave(&port->lock, flags);
-		port->ops->set_mctrl(port, 0);
+		port->ops->set_mctrl(port, port->mctrl & TIOCM_DTR);
 		spin_unlock_irqrestore(&port->lock, flags);
 
 		/*
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index d8b6600..164d2a4 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -389,7 +389,7 @@
 /*====================================================================*/
 
 static int setup_serial(struct pcmcia_device *handle, struct serial_info * info,
-			kio_addr_t iobase, int irq)
+			unsigned int iobase, int irq)
 {
 	struct uart_port port;
 	int line;
@@ -456,7 +456,7 @@
 
 static int simple_config(struct pcmcia_device *link)
 {
-	static const kio_addr_t base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
+	static const unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
 	static const int size_table[2] = { 8, 16 };
 	struct serial_info *info = link->priv;
 	struct serial_cfg_mem *cfg_mem;
@@ -480,7 +480,7 @@
 	/* If the card is already configured, look up the port and irq */
 	i = pcmcia_get_configuration_info(link, &config);
 	if ((i == CS_SUCCESS) && (config.Attributes & CONF_VALID_CLIENT)) {
-		kio_addr_t port = 0;
+		unsigned int port = 0;
 		if ((config.BasePort2 != 0) && (config.NumPorts2 == 8)) {
 			port = config.BasePort2;
 			info->slave = 1;
diff --git a/fs/buffer.c b/fs/buffer.c
index 456c9ab..826baf4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1798,7 +1798,7 @@
 					start = max(from, block_start);
 					size = min(to, block_end) - start;
 
-					zero_user_page(page, start, size, KM_USER0);
+					zero_user(page, start, size);
 					set_buffer_uptodate(bh);
 				}
 
@@ -1861,19 +1861,10 @@
 					mark_buffer_dirty(bh);
 					continue;
 				}
-				if (block_end > to || block_start < from) {
-					void *kaddr;
-
-					kaddr = kmap_atomic(page, KM_USER0);
-					if (block_end > to)
-						memset(kaddr+to, 0,
-							block_end-to);
-					if (block_start < from)
-						memset(kaddr+block_start,
-							0, from-block_start);
-					flush_dcache_page(page);
-					kunmap_atomic(kaddr, KM_USER0);
-				}
+				if (block_end > to || block_start < from)
+					zero_user_segments(page,
+						to, block_end,
+						block_start, from);
 				continue;
 			}
 		}
@@ -2104,8 +2095,7 @@
 					SetPageError(page);
 			}
 			if (!buffer_mapped(bh)) {
-				zero_user_page(page, i * blocksize, blocksize,
-						KM_USER0);
+				zero_user(page, i * blocksize, blocksize);
 				if (!err)
 					set_buffer_uptodate(bh);
 				continue;
@@ -2218,7 +2208,7 @@
 						&page, &fsdata);
 		if (err)
 			goto out;
-		zero_user_page(page, zerofrom, len, KM_USER0);
+		zero_user(page, zerofrom, len);
 		err = pagecache_write_end(file, mapping, curpos, len, len,
 						page, fsdata);
 		if (err < 0)
@@ -2245,7 +2235,7 @@
 						&page, &fsdata);
 		if (err)
 			goto out;
-		zero_user_page(page, zerofrom, len, KM_USER0);
+		zero_user(page, zerofrom, len);
 		err = pagecache_write_end(file, mapping, curpos, len, len,
 						page, fsdata);
 		if (err < 0)
@@ -2422,7 +2412,6 @@
 	unsigned block_in_page;
 	unsigned block_start, block_end;
 	sector_t block_in_file;
-	char *kaddr;
 	int nr_reads = 0;
 	int ret = 0;
 	int is_mapped_to_disk = 1;
@@ -2493,13 +2482,8 @@
 			continue;
 		}
 		if (buffer_new(bh) || !buffer_mapped(bh)) {
-			kaddr = kmap_atomic(page, KM_USER0);
-			if (block_start < from)
-				memset(kaddr+block_start, 0, from-block_start);
-			if (block_end > to)
-				memset(kaddr + to, 0, block_end - to);
-			flush_dcache_page(page);
-			kunmap_atomic(kaddr, KM_USER0);
+			zero_user_segments(page, block_start, from,
+							to, block_end);
 			continue;
 		}
 		if (buffer_uptodate(bh))
@@ -2636,7 +2620,7 @@
 	 * the  page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
+	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
 out:
 	ret = mpage_writepage(page, get_block, wbc);
 	if (ret == -EAGAIN)
@@ -2709,7 +2693,7 @@
 		if (page_has_buffers(page))
 			goto has_buffers;
 	}
-	zero_user_page(page, offset, length, KM_USER0);
+	zero_user(page, offset, length);
 	set_page_dirty(page);
 	err = 0;
 
@@ -2785,7 +2769,7 @@
 			goto unlock;
 	}
 
-	zero_user_page(page, offset, length, KM_USER0);
+	zero_user(page, offset, length);
 	mark_buffer_dirty(bh);
 	err = 0;
 
@@ -2831,7 +2815,7 @@
 	 * the  page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
+	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
 	return __block_write_full_page(inode, page, get_block, wbc);
 }
 
@@ -3169,7 +3153,7 @@
 	
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
 {
-	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep,
+	struct buffer_head *ret = kmem_cache_alloc(bh_cachep,
 				set_migrateflags(gfp_flags, __GFP_RECLAIMABLE));
 	if (ret) {
 		INIT_LIST_HEAD(&ret->b_assoc_buffers);
@@ -3257,12 +3241,24 @@
 }
 EXPORT_SYMBOL(bh_submit_read);
 
+static void
+init_buffer_head(struct kmem_cache *cachep, void *data)
+{
+	struct buffer_head *bh = data;
+
+	memset(bh, 0, sizeof(*bh));
+	INIT_LIST_HEAD(&bh->b_assoc_buffers);
+}
+
 void __init buffer_init(void)
 {
 	int nrpages;
 
-	bh_cachep = KMEM_CACHE(buffer_head,
-			SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
+	bh_cachep = kmem_cache_create("buffer_head",
+			sizeof(struct buffer_head), 0,
+				(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
+				SLAB_MEM_SPREAD),
+				init_buffer_head);
 
 	/*
 	 * Limit the bh occupancy to 10% of ZONE_NORMAL
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index d9567ba..47f2621 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1386,7 +1386,7 @@
 	if (!page)
 		return -ENOMEM;
 
-	zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
+	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
 	unlock_page(page);
 	page_cache_release(page);
 	return rc;
diff --git a/fs/compat.c b/fs/compat.c
index 5216c3f..69baca5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2206,19 +2206,41 @@
 
 #ifdef CONFIG_TIMERFD
 
-asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags,
-				   const struct compat_itimerspec __user *utmr)
+asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
+				   const struct compat_itimerspec __user *utmr,
+				   struct compat_itimerspec __user *otmr)
 {
+	int error;
 	struct itimerspec t;
 	struct itimerspec __user *ut;
 
 	if (get_compat_itimerspec(&t, utmr))
 		return -EFAULT;
-	ut = compat_alloc_user_space(sizeof(*ut));
-	if (copy_to_user(ut, &t, sizeof(t)))
+	ut = compat_alloc_user_space(2 * sizeof(struct itimerspec));
+	if (copy_to_user(&ut[0], &t, sizeof(t)))
 		return -EFAULT;
+	error = sys_timerfd_settime(ufd, flags, &ut[0], &ut[1]);
+	if (!error && otmr)
+		error = (copy_from_user(&t, &ut[1], sizeof(struct itimerspec)) ||
+			 put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
 
-	return sys_timerfd(ufd, clockid, flags, ut);
+	return error;
+}
+
+asmlinkage long compat_sys_timerfd_gettime(int ufd,
+				   struct compat_itimerspec __user *otmr)
+{
+	int error;
+	struct itimerspec t;
+	struct itimerspec __user *ut;
+
+	ut = compat_alloc_user_space(sizeof(struct itimerspec));
+	error = sys_timerfd_gettime(ufd, ut);
+	if (!error)
+		error = (copy_from_user(&t, ut, sizeof(struct itimerspec)) ||
+			 put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
+
+	return error;
 }
 
 #endif /* CONFIG_TIMERFD */
diff --git a/fs/direct-io.c b/fs/direct-io.c
index acf0da1..9e81add 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -878,8 +878,8 @@
 					page_cache_release(page);
 					goto out;
 				}
-				zero_user_page(page, block_in_page << blkbits,
-						1 << blkbits, KM_USER0);
+				zero_user(page, block_in_page << blkbits,
+						1 << blkbits);
 				dio->block_in_file++;
 				block_in_page++;
 				goto next_block;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 32c5711..0535412 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -257,8 +257,7 @@
 	end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
 	if (to > end_byte_in_page)
 		end_byte_in_page = to;
-	zero_user_page(page, end_byte_in_page,
-		PAGE_CACHE_SIZE - end_byte_in_page, KM_USER0);
+	zero_user_segment(page, end_byte_in_page, PAGE_CACHE_SIZE);
 out:
 	return 0;
 }
@@ -307,7 +306,7 @@
 	 */
 	if ((i_size_read(page->mapping->host) == prev_page_end_size) &&
 	    (from != 0)) {
-		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+		zero_user(page, 0, PAGE_CACHE_SIZE);
 	}
 out:
 	return rc;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 81c04ab..a415f42 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -353,7 +353,7 @@
 	spin_unlock_irqrestore(&psw->lock, flags);
 
 	/* Do really wake up now */
-	wake_up(wq);
+	wake_up_nested(wq, 1 + wake_nests);
 
 	/* Remove the current task from the list */
 	spin_lock_irqsave(&psw->lock, flags);
diff --git a/fs/exec.c b/fs/exec.c
index 282240a..be923e4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -760,7 +760,7 @@
 	 */
 	read_lock(&tasklist_lock);
 	spin_lock_irq(lock);
-	if (sig->flags & SIGNAL_GROUP_EXIT) {
+	if (signal_group_exit(sig)) {
 		/*
 		 * Another group action in progress, just
 		 * return so that the signal is processed.
@@ -778,6 +778,7 @@
 	if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
 		task_active_pid_ns(tsk)->child_reaper = tsk;
 
+	sig->group_exit_task = tsk;
 	zap_other_threads(tsk);
 	read_unlock(&tasklist_lock);
 
@@ -802,7 +803,6 @@
 	}
 
 	sig->notify_count = count;
-	sig->group_exit_task = tsk;
 	while (atomic_read(&sig->count) > count) {
 		__set_current_state(TASK_UNINTERRUPTIBLE);
 		spin_unlock_irq(lock);
@@ -871,15 +871,10 @@
 		leader->exit_state = EXIT_DEAD;
 
 		write_unlock_irq(&tasklist_lock);
-        }
+	}
 
 	sig->group_exit_task = NULL;
 	sig->notify_count = 0;
-	/*
-	 * There may be one thread left which is just exiting,
-	 * but it's safe to stop telling the group to kill themselves.
-	 */
-	sig->flags = 0;
 
 no_thread_group:
 	exit_itimers(sig);
@@ -947,12 +942,13 @@
 	spin_unlock(&files->file_lock);
 }
 
-void get_task_comm(char *buf, struct task_struct *tsk)
+char *get_task_comm(char *buf, struct task_struct *tsk)
 {
 	/* buf must be at least sizeof(tsk->comm) in size */
 	task_lock(tsk);
 	strncpy(buf, tsk->comm, sizeof(tsk->comm));
 	task_unlock(tsk);
+	return buf;
 }
 
 void set_task_comm(struct task_struct *tsk, char *buf)
@@ -1548,7 +1544,7 @@
 	int err = -EAGAIN;
 
 	spin_lock_irq(&tsk->sighand->siglock);
-	if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
+	if (!signal_group_exit(tsk->signal)) {
 		tsk->signal->group_exit_code = exit_code;
 		zap_process(tsk);
 		err = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9b162cd..0775354 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1845,7 +1845,7 @@
 	 */
 	if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
 	     ext3_should_writeback_data(inode) && PageUptodate(page)) {
-		zero_user_page(page, offset, length, KM_USER0);
+		zero_user(page, offset, length);
 		set_page_dirty(page);
 		goto unlock;
 	}
@@ -1898,7 +1898,7 @@
 			goto unlock;
 	}
 
-	zero_user_page(page, offset, length, KM_USER0);
+	zero_user(page, offset, length);
 	BUFFER_TRACE(bh, "zeroed end of block");
 
 	err = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bb717cb..05c4145 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1840,7 +1840,7 @@
 	 */
 	if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
 	     ext4_should_writeback_data(inode) && PageUptodate(page)) {
-		zero_user_page(page, offset, length, KM_USER0);
+		zero_user(page, offset, length);
 		set_page_dirty(page);
 		goto unlock;
 	}
@@ -1893,7 +1893,7 @@
 			goto unlock;
 	}
 
-	zero_user_page(page, offset, length, KM_USER0);
+	zero_user(page, offset, length);
 
 	BUFFER_TRACE(bh, "zeroed end of block");
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 300324b..0b30640 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -284,7 +284,17 @@
 				 * soon as the queue becomes uncongested.
 				 */
 				inode->i_state |= I_DIRTY_PAGES;
-				requeue_io(inode);
+				if (wbc->nr_to_write <= 0) {
+					/*
+					 * slice used up: queue for next turn
+					 */
+					requeue_io(inode);
+				} else {
+					/*
+					 * somehow blocked: retry later
+					 */
+					redirty_tail(inode);
+				}
 			} else {
 				/*
 				 * Otherwise fully redirty the inode so that
@@ -334,9 +344,6 @@
 		WARN_ON(inode->i_state & I_WILL_FREE);
 
 	if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
-		struct address_space *mapping = inode->i_mapping;
-		int ret;
-
 		/*
 		 * We're skipping this inode because it's locked, and we're not
 		 * doing writeback-for-data-integrity.  Move it to s_more_io so
@@ -345,15 +352,7 @@
 		 * completed a full scan of s_io.
 		 */
 		requeue_io(inode);
-
-		/*
-		 * Even if we don't actually write the inode itself here,
-		 * we can at least start some of the data writeout..
-		 */
-		spin_unlock(&inode_lock);
-		ret = do_writepages(mapping, wbc);
-		spin_lock(&inode_lock);
-		return ret;
+		return 0;
 	}
 
 	/*
@@ -479,8 +478,12 @@
 		iput(inode);
 		cond_resched();
 		spin_lock(&inode_lock);
-		if (wbc->nr_to_write <= 0)
+		if (wbc->nr_to_write <= 0) {
+			wbc->more_io = 1;
 			break;
+		}
+		if (!list_empty(&sb->s_more_io))
+			wbc->more_io = 1;
 	}
 	return;		/* Leave any unwritten inodes on s_io */
 }
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e4effc4..e9456eb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -932,7 +932,7 @@
 	if (!gfs2_is_writeback(ip))
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);
 
-	zero_user_page(page, offset, length, KM_USER0);
+	zero_user(page, offset, length);
 
 unlock:
 	unlock_page(page);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 38dbe99..ac772b6 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -446,7 +446,7 @@
 	 * so we need to supply one here. It doesn't happen often.
 	 */
 	if (unlikely(page->index)) {
-		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+		zero_user(page, 0, PAGE_CACHE_SIZE);
 		return 0;
 	}
 
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 35c1a9f..53fd0a6 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -285,17 +285,17 @@
 			return err;
 
 		times[0].tv_sec = atime_ts.tv_sec;
-		times[0].tv_usec = atime_ts.tv_nsec * 1000;
+		times[0].tv_usec = atime_ts.tv_nsec / 1000;
 		times[1].tv_sec = mtime_ts.tv_sec;
-		times[1].tv_usec = mtime_ts.tv_nsec * 1000;
+		times[1].tv_usec = mtime_ts.tv_nsec / 1000;
 
 		if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) {
 			times[0].tv_sec = attrs->ia_atime.tv_sec;
-			times[0].tv_usec = attrs->ia_atime.tv_nsec * 1000;
+			times[0].tv_usec = attrs->ia_atime.tv_nsec / 1000;
 		}
 		if (attrs->ia_valid & HOSTFS_ATTR_MTIME_SET) {
 			times[1].tv_sec = attrs->ia_mtime.tv_sec;
-			times[1].tv_usec = attrs->ia_mtime.tv_nsec * 1000;
+			times[1].tv_usec = attrs->ia_mtime.tv_nsec / 1000;
 		}
 
 		if (fd >= 0) {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 09ee07f..3b3cc28 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -768,7 +768,7 @@
 		case Opt_mode:
 			if (match_octal(&args[0], &option))
  				goto bad_val;
-			pconfig->mode = option & 0777U;
+			pconfig->mode = option & 01777U;
 			break;
 
 		case Opt_size: {
diff --git a/fs/libfs.c b/fs/libfs.c
index 6e68b70..5523bde 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -341,13 +341,10 @@
 			unsigned from, unsigned to)
 {
 	if (!PageUptodate(page)) {
-		if (to - from != PAGE_CACHE_SIZE) {
-			void *kaddr = kmap_atomic(page, KM_USER0);
-			memset(kaddr, 0, from);
-			memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
-			flush_dcache_page(page);
-			kunmap_atomic(kaddr, KM_USER0);
-		}
+		if (to - from != PAGE_CACHE_SIZE)
+			zero_user_segments(page,
+				0, from,
+				to, PAGE_CACHE_SIZE);
 	}
 	return 0;
 }
diff --git a/fs/mpage.c b/fs/mpage.c
index d54f8f8..5df5643 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -276,9 +276,7 @@
 	}
 
 	if (first_hole != blocks_per_page) {
-		zero_user_page(page, first_hole << blkbits,
-				PAGE_CACHE_SIZE - (first_hole << blkbits),
-				KM_USER0);
+		zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE);
 		if (first_hole == 0) {
 			SetPageUptodate(page);
 			unlock_page(page);
@@ -571,8 +569,7 @@
 
 		if (page->index > end_index || !offset)
 			goto confused;
-		zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
-				KM_USER0);
+		zero_user_segment(page, offset, PAGE_CACHE_SIZE);
 	}
 
 	/*
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8fd6dfb..3d7d963 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -79,7 +79,7 @@
 static
 int nfs_return_empty_page(struct page *page)
 {
-	zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+	zero_user(page, 0, PAGE_CACHE_SIZE);
 	SetPageUptodate(page);
 	unlock_page(page);
 	return 0;
@@ -103,10 +103,10 @@
 	pglen = PAGE_CACHE_SIZE - base;
 	for (;;) {
 		if (remainder <= pglen) {
-			zero_user_page(*pages, base, remainder, KM_USER0);
+			zero_user(*pages, base, remainder);
 			break;
 		}
-		zero_user_page(*pages, base, pglen, KM_USER0);
+		zero_user(*pages, base, pglen);
 		pages++;
 		remainder -= pglen;
 		pglen = PAGE_CACHE_SIZE;
@@ -130,7 +130,7 @@
 		return PTR_ERR(new);
 	}
 	if (len < PAGE_CACHE_SIZE)
-		zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
+		zero_user_segment(page, len, PAGE_CACHE_SIZE);
 
 	nfs_list_add_request(new, &one_request);
 	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
@@ -532,7 +532,7 @@
 		goto out_error;
 
 	if (len < PAGE_CACHE_SIZE)
-		zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
+		zero_user_segment(page, len, PAGE_CACHE_SIZE);
 	nfs_pageio_add_request(desc->pgio, new);
 	return 0;
 out_error:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 522efff..b144b19 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -665,9 +665,7 @@
 	 * then we need to zero any uninitalised data. */
 	if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE
 			&& !PageUptodate(req->wb_page))
-		zero_user_page(req->wb_page, req->wb_bytes,
-				PAGE_CACHE_SIZE - req->wb_bytes,
-				KM_USER0);
+		zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE);
 	return req;
 }
 
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 2192805..d13403e 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -11,8 +11,6 @@
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/export.h>
 
-#define	CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE))
-
 int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
 {
 	struct exp_flavor_info *f;
@@ -69,10 +67,12 @@
 	ret = set_current_groups(cred.cr_group_info);
 	put_group_info(cred.cr_group_info);
 	if ((cred.cr_uid)) {
-		cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
+		current->cap_effective =
+			cap_drop_nfsd_set(current->cap_effective);
 	} else {
-		cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
-						  current->cap_permitted);
+		current->cap_effective =
+			cap_raise_nfsd_set(current->cap_effective,
+					   current->cap_permitted);
 	}
 	return ret;
 }
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index ad87cb0..00e9ccd 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -87,13 +87,17 @@
 		/* Check for the current buffer head overflowing. */
 		if (unlikely(file_ofs + bh->b_size > init_size)) {
 			int ofs;
+			void *kaddr;
 
 			ofs = 0;
 			if (file_ofs < init_size)
 				ofs = init_size - file_ofs;
 			local_irq_save(flags);
-			zero_user_page(page, bh_offset(bh) + ofs,
-					 bh->b_size - ofs, KM_BIO_SRC_IRQ);
+			kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+			memset(kaddr + bh_offset(bh) + ofs, 0,
+					bh->b_size - ofs);
+			flush_dcache_page(page);
+			kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
 			local_irq_restore(flags);
 		}
 	} else {
@@ -334,7 +338,7 @@
 		bh->b_blocknr = -1UL;
 		clear_buffer_mapped(bh);
 handle_zblock:
-		zero_user_page(page, i * blocksize, blocksize, KM_USER0);
+		zero_user(page, i * blocksize, blocksize);
 		if (likely(!err))
 			set_buffer_uptodate(bh);
 	} while (i++, iblock++, (bh = bh->b_this_page) != head);
@@ -410,7 +414,7 @@
 	/* Is the page fully outside i_size? (truncate in progress) */
 	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
 			PAGE_CACHE_SHIFT)) {
-		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+		zero_user(page, 0, PAGE_CACHE_SIZE);
 		ntfs_debug("Read outside i_size - truncated?");
 		goto done;
 	}
@@ -459,7 +463,7 @@
 	 * ok to ignore the compressed flag here.
 	 */
 	if (unlikely(page->index > 0)) {
-		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+		zero_user(page, 0, PAGE_CACHE_SIZE);
 		goto done;
 	}
 	if (!NInoAttr(ni))
@@ -788,8 +792,7 @@
 		if (err == -ENOENT || lcn == LCN_ENOENT) {
 			bh->b_blocknr = -1;
 			clear_buffer_dirty(bh);
-			zero_user_page(page, bh_offset(bh), blocksize,
-					KM_USER0);
+			zero_user(page, bh_offset(bh), blocksize);
 			set_buffer_uptodate(bh);
 			err = 0;
 			continue;
@@ -1414,8 +1417,7 @@
 		if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
 			/* The page straddles i_size. */
 			unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
-			zero_user_page(page, ofs, PAGE_CACHE_SIZE - ofs,
-					KM_USER0);
+			zero_user_segment(page, ofs, PAGE_CACHE_SIZE);
 		}
 		/* Handle mst protected attributes. */
 		if (NInoMstProtected(ni))
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index d1619d0..33ff314 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -565,7 +565,7 @@
 	if (xpage >= max_page) {
 		kfree(bhs);
 		kfree(pages);
-		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+		zero_user(page, 0, PAGE_CACHE_SIZE);
 		ntfs_debug("Compressed read outside i_size - truncated?");
 		SetPageUptodate(page);
 		unlock_page(page);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 6cd08df..3c5550c 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -607,8 +607,8 @@
 					ntfs_submit_bh_for_read(bh);
 					*wait_bh++ = bh;
 				} else {
-					zero_user_page(page, bh_offset(bh),
-							blocksize, KM_USER0);
+					zero_user(page, bh_offset(bh),
+							blocksize);
 					set_buffer_uptodate(bh);
 				}
 			}
@@ -683,9 +683,8 @@
 						ntfs_submit_bh_for_read(bh);
 						*wait_bh++ = bh;
 					} else {
-						zero_user_page(page,
-							bh_offset(bh),
-							blocksize, KM_USER0);
+						zero_user(page, bh_offset(bh),
+								blocksize);
 						set_buffer_uptodate(bh);
 					}
 				}
@@ -703,8 +702,8 @@
 			 */
 			if (bh_end <= pos || bh_pos >= end) {
 				if (!buffer_uptodate(bh)) {
-					zero_user_page(page, bh_offset(bh),
-							blocksize, KM_USER0);
+					zero_user(page, bh_offset(bh),
+							blocksize);
 					set_buffer_uptodate(bh);
 				}
 				mark_buffer_dirty(bh);
@@ -743,8 +742,7 @@
 				if (!buffer_uptodate(bh))
 					set_buffer_uptodate(bh);
 			} else if (!buffer_uptodate(bh)) {
-				zero_user_page(page, bh_offset(bh), blocksize,
-						KM_USER0);
+				zero_user(page, bh_offset(bh), blocksize);
 				set_buffer_uptodate(bh);
 			}
 			continue;
@@ -868,8 +866,8 @@
 					if (!buffer_uptodate(bh))
 						set_buffer_uptodate(bh);
 				} else if (!buffer_uptodate(bh)) {
-					zero_user_page(page, bh_offset(bh),
-							blocksize, KM_USER0);
+					zero_user(page, bh_offset(bh),
+						blocksize);
 					set_buffer_uptodate(bh);
 				}
 				continue;
@@ -1128,8 +1126,8 @@
 
 				if (likely(bh_pos < initialized_size))
 					ofs = initialized_size - bh_pos;
-				zero_user_page(page, bh_offset(bh) + ofs,
-						blocksize - ofs, KM_USER0);
+				zero_user_segment(page, bh_offset(bh) + ofs,
+						blocksize);
 			}
 		} else /* if (unlikely(!buffer_uptodate(bh))) */
 			err = -EIO;
@@ -1269,8 +1267,8 @@
 				if (PageUptodate(page))
 					set_buffer_uptodate(bh);
 				else {
-					zero_user_page(page, bh_offset(bh),
-							blocksize, KM_USER0);
+					zero_user(page, bh_offset(bh),
+							blocksize);
 					set_buffer_uptodate(bh);
 				}
 			}
@@ -1330,7 +1328,7 @@
 		len = PAGE_CACHE_SIZE;
 		if (len > bytes)
 			len = bytes;
-		zero_user_page(*pages, 0, len, KM_USER0);
+		zero_user(*pages, 0, len);
 	}
 	goto out;
 }
@@ -1451,7 +1449,7 @@
 		len = PAGE_CACHE_SIZE;
 		if (len > bytes)
 			len = bytes;
-		zero_user_page(*pages, 0, len, KM_USER0);
+		zero_user(*pages, 0, len);
 	}
 	goto out;
 }
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index e38e402..cd0be3f 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -85,8 +85,7 @@
 
 static inline void ntfs_free(void *addr)
 {
-	if (likely(((unsigned long)addr < VMALLOC_START) ||
-			((unsigned long)addr >= VMALLOC_END ))) {
+	if (!is_vmalloc_addr(addr)) {
 		kfree(addr);
 		/* free_page((unsigned long)addr); */
 		return;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 64713e1..447206e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5670,7 +5670,7 @@
 		mlog_errno(ret);
 
 	if (zero)
-		zero_user_page(page, from, to - from, KM_USER0);
+		zero_user_segment(page, from, to);
 
 	/*
 	 * Need to set the buffers we zero'd into uptodate
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index bc7b4cb..8224312 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -307,7 +307,7 @@
 	 * XXX sys_readahead() seems to get that wrong?
 	 */
 	if (start >= i_size_read(inode)) {
-		zero_user_page(page, 0, PAGE_SIZE, KM_USER0);
+		zero_user(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 		ret = 0;
 		goto out_alloc;
@@ -869,7 +869,7 @@
 		if (block_start >= to)
 			break;
 
-		zero_user_page(page, block_start, bh->b_size, KM_USER0);
+		zero_user(page, block_start, bh->b_size);
 		set_buffer_uptodate(bh);
 		mark_buffer_dirty(bh);
 
@@ -1034,7 +1034,7 @@
 					start = max(from, block_start);
 					end = min(to, block_end);
 
-					zero_user_page(page, start, end - start, KM_USER0);
+					zero_user_segment(page, start, end);
 					set_buffer_uptodate(bh);
 				}
 
diff --git a/fs/proc/array.c b/fs/proc/array.c
index b380313..6ba2746 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -281,14 +281,23 @@
 	return buffer;
 }
 
+static char *render_cap_t(const char *header, kernel_cap_t *a, char *buffer)
+{
+	unsigned __capi;
+
+	buffer += sprintf(buffer, "%s", header);
+	CAP_FOR_EACH_U32(__capi) {
+		buffer += sprintf(buffer, "%08x",
+				  a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]);
+	}
+	return buffer + sprintf(buffer, "\n");
+}
+
 static inline char *task_cap(struct task_struct *p, char *buffer)
 {
-    return buffer + sprintf(buffer, "CapInh:\t%016x\n"
-			    "CapPrm:\t%016x\n"
-			    "CapEff:\t%016x\n",
-			    cap_t(p->cap_inheritable),
-			    cap_t(p->cap_permitted),
-			    cap_t(p->cap_effective));
+	buffer = render_cap_t("CapInh:\t", &p->cap_inheritable, buffer);
+	buffer = render_cap_t("CapPrm:\t", &p->cap_permitted, buffer);
+	return render_cap_t("CapEff:\t", &p->cap_effective, buffer);
 }
 
 static inline char *task_context_switch_counts(struct task_struct *p,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3353748..c59852b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -88,10 +88,6 @@
  *	in /proc for a task before it execs a suid executable.
  */
 
-
-/* Worst case buffer size needed for holding an integer. */
-#define PROC_NUMBUF 13
-
 struct pid_entry {
 	char *name;
 	int len;
@@ -787,7 +783,7 @@
 }
 #endif
 
-static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
+loff_t mem_lseek(struct file *file, loff_t offset, int orig)
 {
 	switch (orig) {
 	case 0:
@@ -935,42 +931,6 @@
 	.write		= oom_adjust_write,
 };
 
-#ifdef CONFIG_MMU
-static ssize_t clear_refs_write(struct file *file, const char __user *buf,
-				size_t count, loff_t *ppos)
-{
-	struct task_struct *task;
-	char buffer[PROC_NUMBUF], *end;
-	struct mm_struct *mm;
-
-	memset(buffer, 0, sizeof(buffer));
-	if (count > sizeof(buffer) - 1)
-		count = sizeof(buffer) - 1;
-	if (copy_from_user(buffer, buf, count))
-		return -EFAULT;
-	if (!simple_strtol(buffer, &end, 0))
-		return -EINVAL;
-	if (*end == '\n')
-		end++;
-	task = get_proc_task(file->f_path.dentry->d_inode);
-	if (!task)
-		return -ESRCH;
-	mm = get_task_mm(task);
-	if (mm) {
-		clear_refs_smap(mm);
-		mmput(mm);
-	}
-	put_task_struct(task);
-	if (end - buffer == 0)
-		return -EIO;
-	return end - buffer;
-}
-
-static struct file_operations proc_clear_refs_operations = {
-	.write		= clear_refs_write,
-};
-#endif
-
 #ifdef CONFIG_AUDITSYSCALL
 #define TMPBUFLEN 21
 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
@@ -2289,9 +2249,10 @@
 	LNK("exe",        exe),
 	REG("mounts",     S_IRUGO, mounts),
 	REG("mountstats", S_IRUSR, mountstats),
-#ifdef CONFIG_MMU
+#ifdef CONFIG_PROC_PAGE_MONITOR
 	REG("clear_refs", S_IWUSR, clear_refs),
 	REG("smaps",      S_IRUGO, smaps),
+	REG("pagemap",    S_IRUSR, pagemap),
 #endif
 #ifdef CONFIG_SECURITY
 	DIR("attr",       S_IRUGO|S_IXUGO, attr_dir),
@@ -2360,7 +2321,8 @@
 	name.len = snprintf(buf, sizeof(buf), "%d", pid);
 	dentry = d_hash_and_lookup(mnt->mnt_root, &name);
 	if (dentry) {
-		shrink_dcache_parent(dentry);
+		if (!(current->flags & PF_EXITING))
+			shrink_dcache_parent(dentry);
 		d_drop(dentry);
 		dput(dentry);
 	}
@@ -2617,9 +2579,10 @@
 	LNK("root",      root),
 	LNK("exe",       exe),
 	REG("mounts",    S_IRUGO, mounts),
-#ifdef CONFIG_MMU
+#ifdef CONFIG_PROC_PAGE_MONITOR
 	REG("clear_refs", S_IWUSR, clear_refs),
 	REG("smaps",     S_IRUGO, smaps),
+	REG("pagemap",    S_IRUSR, pagemap),
 #endif
 #ifdef CONFIG_SECURITY
 	DIR("attr",      S_IRUGO|S_IXUGO, attr_dir),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 05b3e90..7d57e80 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -52,15 +52,13 @@
 extern int proc_tgid_stat(struct task_struct *, char *);
 extern int proc_pid_status(struct task_struct *, char *);
 extern int proc_pid_statm(struct task_struct *, char *);
+extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
 
 extern const struct file_operations proc_maps_operations;
 extern const struct file_operations proc_numa_maps_operations;
 extern const struct file_operations proc_smaps_operations;
-
-extern const struct file_operations proc_maps_operations;
-extern const struct file_operations proc_numa_maps_operations;
-extern const struct file_operations proc_smaps_operations;
-
+extern const struct file_operations proc_clear_refs_operations;
+extern const struct file_operations proc_pagemap_operations;
 
 void free_proc_entry(struct proc_dir_entry *de);
 
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 1be7308..7dd26e1 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -325,7 +325,7 @@
 		if (m == NULL) {
 			if (clear_user(buffer, tsz))
 				return -EFAULT;
-		} else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) {
+		} else if (is_vmalloc_addr((void *)start)) {
 			char * elf_buf;
 			struct vm_struct *m;
 			unsigned long curstart = start;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 3462bfd..51288db 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -46,6 +46,7 @@
 #include <linux/vmalloc.h>
 #include <linux/crash_dump.h>
 #include <linux/pid_namespace.h>
+#include <linux/bootmem.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -675,6 +676,137 @@
 };
 #endif
 
+#ifdef CONFIG_PROC_PAGE_MONITOR
+#define KPMSIZE sizeof(u64)
+#define KPMMASK (KPMSIZE - 1)
+/* /proc/kpagecount - an array exposing page counts
+ *
+ * Each entry is a u64 representing the corresponding
+ * physical page count.
+ */
+static ssize_t kpagecount_read(struct file *file, char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	u64 __user *out = (u64 __user *)buf;
+	struct page *ppage;
+	unsigned long src = *ppos;
+	unsigned long pfn;
+	ssize_t ret = 0;
+	u64 pcount;
+
+	pfn = src / KPMSIZE;
+	count = min_t(size_t, count, (max_pfn * KPMSIZE) - src);
+	if (src & KPMMASK || count & KPMMASK)
+		return -EIO;
+
+	while (count > 0) {
+		ppage = NULL;
+		if (pfn_valid(pfn))
+			ppage = pfn_to_page(pfn);
+		pfn++;
+		if (!ppage)
+			pcount = 0;
+		else
+			pcount = atomic_read(&ppage->_count);
+
+		if (put_user(pcount, out++)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		count -= KPMSIZE;
+	}
+
+	*ppos += (char __user *)out - buf;
+	if (!ret)
+		ret = (char __user *)out - buf;
+	return ret;
+}
+
+static struct file_operations proc_kpagecount_operations = {
+	.llseek = mem_lseek,
+	.read = kpagecount_read,
+};
+
+/* /proc/kpageflags - an array exposing page flags
+ *
+ * Each entry is a u64 representing the corresponding
+ * physical page flags.
+ */
+
+/* These macros are used to decouple internal flags from exported ones */
+
+#define KPF_LOCKED     0
+#define KPF_ERROR      1
+#define KPF_REFERENCED 2
+#define KPF_UPTODATE   3
+#define KPF_DIRTY      4
+#define KPF_LRU        5
+#define KPF_ACTIVE     6
+#define KPF_SLAB       7
+#define KPF_WRITEBACK  8
+#define KPF_RECLAIM    9
+#define KPF_BUDDY     10
+
+#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos)
+
+static ssize_t kpageflags_read(struct file *file, char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	u64 __user *out = (u64 __user *)buf;
+	struct page *ppage;
+	unsigned long src = *ppos;
+	unsigned long pfn;
+	ssize_t ret = 0;
+	u64 kflags, uflags;
+
+	pfn = src / KPMSIZE;
+	count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
+	if (src & KPMMASK || count & KPMMASK)
+		return -EIO;
+
+	while (count > 0) {
+		ppage = NULL;
+		if (pfn_valid(pfn))
+			ppage = pfn_to_page(pfn);
+		pfn++;
+		if (!ppage)
+			kflags = 0;
+		else
+			kflags = ppage->flags;
+
+		uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) |
+			kpf_copy_bit(kflags, KPF_ERROR, PG_error) |
+			kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) |
+			kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) |
+			kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) |
+			kpf_copy_bit(kflags, KPF_LRU, PG_lru) |
+			kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) |
+			kpf_copy_bit(kflags, KPF_SLAB, PG_slab) |
+			kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) |
+			kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) |
+			kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy);
+
+		if (put_user(uflags, out++)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		count -= KPMSIZE;
+	}
+
+	*ppos += (char __user *)out - buf;
+	if (!ret)
+		ret = (char __user *)out - buf;
+	return ret;
+}
+
+static struct file_operations proc_kpageflags_operations = {
+	.llseek = mem_lseek,
+	.read = kpageflags_read,
+};
+#endif /* CONFIG_PROC_PAGE_MONITOR */
+
 struct proc_dir_entry *proc_root_kcore;
 
 void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
@@ -755,6 +887,10 @@
 				(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
 	}
 #endif
+#ifdef CONFIG_PROC_PAGE_MONITOR
+	create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations);
+	create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations);
+#endif
 #ifdef CONFIG_PROC_VMCORE
 	proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL);
 	if (proc_vmcore)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8043a3e..38338ed 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -5,7 +5,10 @@
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
 #include <linux/pagemap.h>
+#include <linux/ptrace.h>
 #include <linux/mempolicy.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
 
 #include <asm/elf.h>
 #include <asm/uaccess.h>
@@ -114,24 +117,124 @@
 	seq_printf(m, "%*c", len, ' ');
 }
 
-struct mem_size_stats
+static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
 {
-	unsigned long resident;
-	unsigned long shared_clean;
-	unsigned long shared_dirty;
-	unsigned long private_clean;
-	unsigned long private_dirty;
-	unsigned long referenced;
-};
+	if (vma && vma != priv->tail_vma) {
+		struct mm_struct *mm = vma->vm_mm;
+		up_read(&mm->mmap_sem);
+		mmput(mm);
+	}
+}
 
-struct pmd_walker {
-	struct vm_area_struct *vma;
-	void *private;
-	void (*action)(struct vm_area_struct *, pmd_t *, unsigned long,
-		       unsigned long, void *);
-};
+static void *m_start(struct seq_file *m, loff_t *pos)
+{
+	struct proc_maps_private *priv = m->private;
+	unsigned long last_addr = m->version;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma, *tail_vma = NULL;
+	loff_t l = *pos;
 
-static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
+	/* Clear the per syscall fields in priv */
+	priv->task = NULL;
+	priv->tail_vma = NULL;
+
+	/*
+	 * We remember last_addr rather than next_addr to hit with
+	 * mmap_cache most of the time. We have zero last_addr at
+	 * the beginning and also after lseek. We will have -1 last_addr
+	 * after the end of the vmas.
+	 */
+
+	if (last_addr == -1UL)
+		return NULL;
+
+	priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
+	if (!priv->task)
+		return NULL;
+
+	mm = mm_for_maps(priv->task);
+	if (!mm)
+		return NULL;
+
+	tail_vma = get_gate_vma(priv->task);
+	priv->tail_vma = tail_vma;
+
+	/* Start with last addr hint */
+	vma = find_vma(mm, last_addr);
+	if (last_addr && vma) {
+		vma = vma->vm_next;
+		goto out;
+	}
+
+	/*
+	 * Check the vma index is within the range and do
+	 * sequential scan until m_index.
+	 */
+	vma = NULL;
+	if ((unsigned long)l < mm->map_count) {
+		vma = mm->mmap;
+		while (l-- && vma)
+			vma = vma->vm_next;
+		goto out;
+	}
+
+	if (l != mm->map_count)
+		tail_vma = NULL; /* After gate vma */
+
+out:
+	if (vma)
+		return vma;
+
+	/* End of vmas has been reached */
+	m->version = (tail_vma != NULL)? 0: -1UL;
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+	return tail_vma;
+}
+
+static void *m_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct proc_maps_private *priv = m->private;
+	struct vm_area_struct *vma = v;
+	struct vm_area_struct *tail_vma = priv->tail_vma;
+
+	(*pos)++;
+	if (vma && (vma != tail_vma) && vma->vm_next)
+		return vma->vm_next;
+	vma_stop(priv, vma);
+	return (vma != tail_vma)? tail_vma: NULL;
+}
+
+static void m_stop(struct seq_file *m, void *v)
+{
+	struct proc_maps_private *priv = m->private;
+	struct vm_area_struct *vma = v;
+
+	vma_stop(priv, vma);
+	if (priv->task)
+		put_task_struct(priv->task);
+}
+
+static int do_maps_open(struct inode *inode, struct file *file,
+			struct seq_operations *ops)
+{
+	struct proc_maps_private *priv;
+	int ret = -ENOMEM;
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (priv) {
+		priv->pid = proc_pid(inode);
+		ret = seq_open(file, ops);
+		if (!ret) {
+			struct seq_file *m = file->private_data;
+			m->private = priv;
+		} else {
+			kfree(priv);
+		}
+	}
+	return ret;
+}
+
+static int show_map(struct seq_file *m, void *v)
 {
 	struct proc_maps_private *priv = m->private;
 	struct task_struct *task = priv->task;
@@ -191,41 +294,71 @@
 	}
 	seq_putc(m, '\n');
 
-	if (mss)
-		seq_printf(m,
-			   "Size:           %8lu kB\n"
-			   "Rss:            %8lu kB\n"
-			   "Shared_Clean:   %8lu kB\n"
-			   "Shared_Dirty:   %8lu kB\n"
-			   "Private_Clean:  %8lu kB\n"
-			   "Private_Dirty:  %8lu kB\n"
-			   "Referenced:     %8lu kB\n",
-			   (vma->vm_end - vma->vm_start) >> 10,
-			   mss->resident >> 10,
-			   mss->shared_clean  >> 10,
-			   mss->shared_dirty  >> 10,
-			   mss->private_clean >> 10,
-			   mss->private_dirty >> 10,
-			   mss->referenced >> 10);
-
 	if (m->count < m->size)  /* vma is copied successfully */
 		m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
 	return 0;
 }
 
-static int show_map(struct seq_file *m, void *v)
+static struct seq_operations proc_pid_maps_op = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_map
+};
+
+static int maps_open(struct inode *inode, struct file *file)
 {
-	return show_map_internal(m, v, NULL);
+	return do_maps_open(inode, file, &proc_pid_maps_op);
 }
 
-static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
-			    unsigned long addr, unsigned long end,
-			    void *private)
+const struct file_operations proc_maps_operations = {
+	.open		= maps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+/*
+ * Proportional Set Size(PSS): my share of RSS.
+ *
+ * PSS of a process is the count of pages it has in memory, where each
+ * page is divided by the number of processes sharing it.  So if a
+ * process has 1000 pages all to itself, and 1000 shared with one other
+ * process, its PSS will be 1500.
+ *
+ * To keep (accumulated) division errors low, we adopt a 64bit
+ * fixed-point pss counter to minimize division errors. So (pss >>
+ * PSS_SHIFT) would be the real byte count.
+ *
+ * A shift of 12 before division means (assuming 4K page size):
+ * 	- 1M 3-user-pages add up to 8KB errors;
+ * 	- supports mapcount up to 2^24, or 16M;
+ * 	- supports PSS up to 2^52 bytes, or 4PB.
+ */
+#define PSS_SHIFT 12
+
+#ifdef CONFIG_PROC_PAGE_MONITOR
+struct mem_size_stats
+{
+	struct vm_area_struct *vma;
+	unsigned long resident;
+	unsigned long shared_clean;
+	unsigned long shared_dirty;
+	unsigned long private_clean;
+	unsigned long private_dirty;
+	unsigned long referenced;
+	u64 pss;
+};
+
+static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+			   void *private)
 {
 	struct mem_size_stats *mss = private;
+	struct vm_area_struct *vma = mss->vma;
 	pte_t *pte, ptent;
 	spinlock_t *ptl;
 	struct page *page;
+	int mapcount;
 
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -242,26 +375,88 @@
 		/* Accumulate the size in pages that have been accessed. */
 		if (pte_young(ptent) || PageReferenced(page))
 			mss->referenced += PAGE_SIZE;
-		if (page_mapcount(page) >= 2) {
+		mapcount = page_mapcount(page);
+		if (mapcount >= 2) {
 			if (pte_dirty(ptent))
 				mss->shared_dirty += PAGE_SIZE;
 			else
 				mss->shared_clean += PAGE_SIZE;
+			mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
 		} else {
 			if (pte_dirty(ptent))
 				mss->private_dirty += PAGE_SIZE;
 			else
 				mss->private_clean += PAGE_SIZE;
+			mss->pss += (PAGE_SIZE << PSS_SHIFT);
 		}
 	}
 	pte_unmap_unlock(pte - 1, ptl);
 	cond_resched();
+	return 0;
 }
 
-static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
-				 unsigned long addr, unsigned long end,
-				 void *private)
+static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
+
+static int show_smap(struct seq_file *m, void *v)
 {
+	struct vm_area_struct *vma = v;
+	struct mem_size_stats mss;
+	int ret;
+
+	memset(&mss, 0, sizeof mss);
+	mss.vma = vma;
+	if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+		walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
+				&smaps_walk, &mss);
+
+	ret = show_map(m, v);
+	if (ret)
+		return ret;
+
+	seq_printf(m,
+		   "Size:           %8lu kB\n"
+		   "Rss:            %8lu kB\n"
+		   "Pss:            %8lu kB\n"
+		   "Shared_Clean:   %8lu kB\n"
+		   "Shared_Dirty:   %8lu kB\n"
+		   "Private_Clean:  %8lu kB\n"
+		   "Private_Dirty:  %8lu kB\n"
+		   "Referenced:     %8lu kB\n",
+		   (vma->vm_end - vma->vm_start) >> 10,
+		   mss.resident >> 10,
+		   (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
+		   mss.shared_clean  >> 10,
+		   mss.shared_dirty  >> 10,
+		   mss.private_clean >> 10,
+		   mss.private_dirty >> 10,
+		   mss.referenced >> 10);
+
+	return ret;
+}
+
+static struct seq_operations proc_pid_smaps_op = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_smap
+};
+
+static int smaps_open(struct inode *inode, struct file *file)
+{
+	return do_maps_open(inode, file, &proc_pid_smaps_op);
+}
+
+const struct file_operations proc_smaps_operations = {
+	.open		= smaps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
+				unsigned long end, void *private)
+{
+	struct vm_area_struct *vma = private;
 	pte_t *pte, ptent;
 	spinlock_t *ptl;
 	struct page *page;
@@ -282,235 +477,248 @@
 	}
 	pte_unmap_unlock(pte - 1, ptl);
 	cond_resched();
+	return 0;
 }
 
-static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud,
-				  unsigned long addr, unsigned long end)
+static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
+
+static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
 {
-	pmd_t *pmd;
-	unsigned long next;
-
-	for (pmd = pmd_offset(pud, addr); addr != end;
-	     pmd++, addr = next) {
-		next = pmd_addr_end(addr, end);
-		if (pmd_none_or_clear_bad(pmd))
-			continue;
-		walker->action(walker->vma, pmd, addr, next, walker->private);
-	}
-}
-
-static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd,
-				  unsigned long addr, unsigned long end)
-{
-	pud_t *pud;
-	unsigned long next;
-
-	for (pud = pud_offset(pgd, addr); addr != end;
-	     pud++, addr = next) {
-		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud))
-			continue;
-		walk_pmd_range(walker, pud, addr, next);
-	}
-}
-
-/*
- * walk_page_range - walk the page tables of a VMA with a callback
- * @vma - VMA to walk
- * @action - callback invoked for every bottom-level (PTE) page table
- * @private - private data passed to the callback function
- *
- * Recursively walk the page table for the memory area in a VMA, calling
- * a callback for every bottom-level (PTE) page table.
- */
-static inline void walk_page_range(struct vm_area_struct *vma,
-				   void (*action)(struct vm_area_struct *,
-						  pmd_t *, unsigned long,
-						  unsigned long, void *),
-				   void *private)
-{
-	unsigned long addr = vma->vm_start;
-	unsigned long end = vma->vm_end;
-	struct pmd_walker walker = {
-		.vma		= vma,
-		.private	= private,
-		.action		= action,
-	};
-	pgd_t *pgd;
-	unsigned long next;
-
-	for (pgd = pgd_offset(vma->vm_mm, addr); addr != end;
-	     pgd++, addr = next) {
-		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
-			continue;
-		walk_pud_range(&walker, pgd, addr, next);
-	}
-}
-
-static int show_smap(struct seq_file *m, void *v)
-{
-	struct vm_area_struct *vma = v;
-	struct mem_size_stats mss;
-
-	memset(&mss, 0, sizeof mss);
-	if (vma->vm_mm && !is_vm_hugetlb_page(vma))
-		walk_page_range(vma, smaps_pte_range, &mss);
-	return show_map_internal(m, v, &mss);
-}
-
-void clear_refs_smap(struct mm_struct *mm)
-{
+	struct task_struct *task;
+	char buffer[PROC_NUMBUF], *end;
+	struct mm_struct *mm;
 	struct vm_area_struct *vma;
 
-	down_read(&mm->mmap_sem);
-	for (vma = mm->mmap; vma; vma = vma->vm_next)
-		if (vma->vm_mm && !is_vm_hugetlb_page(vma))
-			walk_page_range(vma, clear_refs_pte_range, NULL);
-	flush_tlb_mm(mm);
-	up_read(&mm->mmap_sem);
-}
-
-static void *m_start(struct seq_file *m, loff_t *pos)
-{
-	struct proc_maps_private *priv = m->private;
-	unsigned long last_addr = m->version;
-	struct mm_struct *mm;
-	struct vm_area_struct *vma, *tail_vma = NULL;
-	loff_t l = *pos;
-
-	/* Clear the per syscall fields in priv */
-	priv->task = NULL;
-	priv->tail_vma = NULL;
-
-	/*
-	 * We remember last_addr rather than next_addr to hit with
-	 * mmap_cache most of the time. We have zero last_addr at
-	 * the beginning and also after lseek. We will have -1 last_addr
-	 * after the end of the vmas.
-	 */
-
-	if (last_addr == -1UL)
-		return NULL;
-
-	priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
-	if (!priv->task)
-		return NULL;
-
-	mm = mm_for_maps(priv->task);
-	if (!mm)
-		return NULL;
-
-	priv->tail_vma = tail_vma = get_gate_vma(priv->task);
-
-	/* Start with last addr hint */
-	if (last_addr && (vma = find_vma(mm, last_addr))) {
-		vma = vma->vm_next;
-		goto out;
-	}
-
-	/*
-	 * Check the vma index is within the range and do
-	 * sequential scan until m_index.
-	 */
-	vma = NULL;
-	if ((unsigned long)l < mm->map_count) {
-		vma = mm->mmap;
-		while (l-- && vma)
-			vma = vma->vm_next;
-		goto out;
-	}
-
-	if (l != mm->map_count)
-		tail_vma = NULL; /* After gate vma */
-
-out:
-	if (vma)
-		return vma;
-
-	/* End of vmas has been reached */
-	m->version = (tail_vma != NULL)? 0: -1UL;
-	up_read(&mm->mmap_sem);
-	mmput(mm);
-	return tail_vma;
-}
-
-static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
-{
-	if (vma && vma != priv->tail_vma) {
-		struct mm_struct *mm = vma->vm_mm;
+	memset(buffer, 0, sizeof(buffer));
+	if (count > sizeof(buffer) - 1)
+		count = sizeof(buffer) - 1;
+	if (copy_from_user(buffer, buf, count))
+		return -EFAULT;
+	if (!simple_strtol(buffer, &end, 0))
+		return -EINVAL;
+	if (*end == '\n')
+		end++;
+	task = get_proc_task(file->f_path.dentry->d_inode);
+	if (!task)
+		return -ESRCH;
+	mm = get_task_mm(task);
+	if (mm) {
+		down_read(&mm->mmap_sem);
+		for (vma = mm->mmap; vma; vma = vma->vm_next)
+			if (!is_vm_hugetlb_page(vma))
+				walk_page_range(mm, vma->vm_start, vma->vm_end,
+						&clear_refs_walk, vma);
+		flush_tlb_mm(mm);
 		up_read(&mm->mmap_sem);
 		mmput(mm);
 	}
+	put_task_struct(task);
+	if (end - buffer == 0)
+		return -EIO;
+	return end - buffer;
 }
 
-static void *m_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	struct proc_maps_private *priv = m->private;
-	struct vm_area_struct *vma = v;
-	struct vm_area_struct *tail_vma = priv->tail_vma;
-
-	(*pos)++;
-	if (vma && (vma != tail_vma) && vma->vm_next)
-		return vma->vm_next;
-	vma_stop(priv, vma);
-	return (vma != tail_vma)? tail_vma: NULL;
-}
-
-static void m_stop(struct seq_file *m, void *v)
-{
-	struct proc_maps_private *priv = m->private;
-	struct vm_area_struct *vma = v;
-
-	vma_stop(priv, vma);
-	if (priv->task)
-		put_task_struct(priv->task);
-}
-
-static struct seq_operations proc_pid_maps_op = {
-	.start	= m_start,
-	.next	= m_next,
-	.stop	= m_stop,
-	.show	= show_map
+const struct file_operations proc_clear_refs_operations = {
+	.write		= clear_refs_write,
 };
 
-static struct seq_operations proc_pid_smaps_op = {
-	.start	= m_start,
-	.next	= m_next,
-	.stop	= m_stop,
-	.show	= show_smap
+struct pagemapread {
+	char __user *out, *end;
 };
 
-static int do_maps_open(struct inode *inode, struct file *file,
-			struct seq_operations *ops)
+#define PM_ENTRY_BYTES sizeof(u64)
+#define PM_RESERVED_BITS    3
+#define PM_RESERVED_OFFSET  (64 - PM_RESERVED_BITS)
+#define PM_RESERVED_MASK    (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET)
+#define PM_SPECIAL(nr)      (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK)
+#define PM_NOT_PRESENT      PM_SPECIAL(1LL)
+#define PM_SWAP             PM_SPECIAL(2LL)
+#define PM_END_OF_BUFFER    1
+
+static int add_to_pagemap(unsigned long addr, u64 pfn,
+			  struct pagemapread *pm)
 {
-	struct proc_maps_private *priv;
-	int ret = -ENOMEM;
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (priv) {
-		priv->pid = proc_pid(inode);
-		ret = seq_open(file, ops);
-		if (!ret) {
-			struct seq_file *m = file->private_data;
-			m->private = priv;
-		} else {
-			kfree(priv);
-		}
+	/*
+	 * Make sure there's room in the buffer for an
+	 * entire entry.  Otherwise, only copy part of
+	 * the pfn.
+	 */
+	if (pm->out + PM_ENTRY_BYTES >= pm->end) {
+		if (copy_to_user(pm->out, &pfn, pm->end - pm->out))
+			return -EFAULT;
+		pm->out = pm->end;
+		return PM_END_OF_BUFFER;
 	}
+
+	if (put_user(pfn, pm->out))
+		return -EFAULT;
+	pm->out += PM_ENTRY_BYTES;
+	return 0;
+}
+
+static int pagemap_pte_hole(unsigned long start, unsigned long end,
+				void *private)
+{
+	struct pagemapread *pm = private;
+	unsigned long addr;
+	int err = 0;
+	for (addr = start; addr < end; addr += PAGE_SIZE) {
+		err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+u64 swap_pte_to_pagemap_entry(pte_t pte)
+{
+	swp_entry_t e = pte_to_swp_entry(pte);
+	return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
+}
+
+static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+			     void *private)
+{
+	struct pagemapread *pm = private;
+	pte_t *pte;
+	int err = 0;
+
+	for (; addr != end; addr += PAGE_SIZE) {
+		u64 pfn = PM_NOT_PRESENT;
+		pte = pte_offset_map(pmd, addr);
+		if (is_swap_pte(*pte))
+			pfn = swap_pte_to_pagemap_entry(*pte);
+		else if (pte_present(*pte))
+			pfn = pte_pfn(*pte);
+		/* unmap so we're not in atomic when we copy to userspace */
+		pte_unmap(pte);
+		err = add_to_pagemap(addr, pfn, pm);
+		if (err)
+			return err;
+	}
+
+	cond_resched();
+
+	return err;
+}
+
+static struct mm_walk pagemap_walk = {
+	.pmd_entry = pagemap_pte_range,
+	.pte_hole = pagemap_pte_hole
+};
+
+/*
+ * /proc/pid/pagemap - an array mapping virtual pages to pfns
+ *
+ * For each page in the address space, this file contains one 64-bit
+ * entry representing the corresponding physical page frame number
+ * (PFN) if the page is present. If there is a swap entry for the
+ * physical page, then an encoding of the swap file number and the
+ * page's offset into the swap file are returned. If no page is
+ * present at all, PM_NOT_PRESENT is returned. This allows determining
+ * precisely which pages are mapped (or in swap) and comparing mapped
+ * pages between processes.
+ *
+ * Efficient users of this interface will use /proc/pid/maps to
+ * determine which areas of memory are actually mapped and llseek to
+ * skip over unmapped regions.
+ */
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+	struct page **pages, *page;
+	unsigned long uaddr, uend;
+	struct mm_struct *mm;
+	struct pagemapread pm;
+	int pagecount;
+	int ret = -ESRCH;
+
+	if (!task)
+		goto out;
+
+	ret = -EACCES;
+	if (!ptrace_may_attach(task))
+		goto out;
+
+	ret = -EINVAL;
+	/* file position must be aligned */
+	if (*ppos % PM_ENTRY_BYTES)
+		goto out;
+
+	ret = 0;
+	mm = get_task_mm(task);
+	if (!mm)
+		goto out;
+
+	ret = -ENOMEM;
+	uaddr = (unsigned long)buf & PAGE_MASK;
+	uend = (unsigned long)(buf + count);
+	pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
+	pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_task;
+
+	down_read(&current->mm->mmap_sem);
+	ret = get_user_pages(current, current->mm, uaddr, pagecount,
+			     1, 0, pages, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (ret < 0)
+		goto out_free;
+
+	pm.out = buf;
+	pm.end = buf + count;
+
+	if (!ptrace_may_attach(task)) {
+		ret = -EIO;
+	} else {
+		unsigned long src = *ppos;
+		unsigned long svpfn = src / PM_ENTRY_BYTES;
+		unsigned long start_vaddr = svpfn << PAGE_SHIFT;
+		unsigned long end_vaddr = TASK_SIZE_OF(task);
+
+		/* watch out for wraparound */
+		if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
+			start_vaddr = end_vaddr;
+
+		/*
+		 * The odds are that this will stop walking way
+		 * before end_vaddr, because the length of the
+		 * user buffer is tracked in "pm", and the walk
+		 * will stop when we hit the end of the buffer.
+		 */
+		ret = walk_page_range(mm, start_vaddr, end_vaddr,
+					&pagemap_walk, &pm);
+		if (ret == PM_END_OF_BUFFER)
+			ret = 0;
+		/* don't need mmap_sem for these, but this looks cleaner */
+		*ppos += pm.out - buf;
+		if (!ret)
+			ret = pm.out - buf;
+	}
+
+	for (; pagecount; pagecount--) {
+		page = pages[pagecount-1];
+		if (!PageReserved(page))
+			SetPageDirty(page);
+		page_cache_release(page);
+	}
+	mmput(mm);
+out_free:
+	kfree(pages);
+out_task:
+	put_task_struct(task);
+out:
 	return ret;
 }
 
-static int maps_open(struct inode *inode, struct file *file)
-{
-	return do_maps_open(inode, file, &proc_pid_maps_op);
-}
-
-const struct file_operations proc_maps_operations = {
-	.open		= maps_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+const struct file_operations proc_pagemap_operations = {
+	.llseek		= mem_lseek, /* borrow this */
+	.read		= pagemap_read,
 };
+#endif /* CONFIG_PROC_PAGE_MONITOR */
 
 #ifdef CONFIG_NUMA
 extern int show_numa_map(struct seq_file *m, void *v);
@@ -545,15 +753,3 @@
 	.release	= seq_release_private,
 };
 #endif
-
-static int smaps_open(struct inode *inode, struct file *file)
-{
-	return do_maps_open(inode, file, &proc_pid_smaps_op);
-}
-
-const struct file_operations proc_smaps_operations = {
-	.open		= smaps_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
-};
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 231fd5c..1953098 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2143,7 +2143,7 @@
 		/* if we are not on a block boundary */
 		if (length) {
 			length = blocksize - length;
-			zero_user_page(page, offset, length, KM_USER0);
+			zero_user(page, offset, length);
 			if (buffer_mapped(bh) && bh->b_blocknr != 0) {
 				mark_buffer_dirty(bh);
 			}
@@ -2367,7 +2367,7 @@
 			unlock_page(page);
 			return 0;
 		}
-		zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0);
+		zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
 	}
 	bh = head;
 	block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 61983f3..10c80b5 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -25,13 +25,15 @@
 	struct hrtimer tmr;
 	ktime_t tintv;
 	wait_queue_head_t wqh;
+	u64 ticks;
 	int expired;
+	int clockid;
 };
 
 /*
  * This gets called when the timer event triggers. We set the "expired"
  * flag, but we do not re-arm the timer (in case it's necessary,
- * tintv.tv64 != 0) until the timer is read.
+ * tintv.tv64 != 0) until the timer is accessed.
  */
 static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
 {
@@ -40,13 +42,24 @@
 
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
 	ctx->expired = 1;
+	ctx->ticks++;
 	wake_up_locked(&ctx->wqh);
 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 
 	return HRTIMER_NORESTART;
 }
 
-static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags,
+static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
+{
+	ktime_t now, remaining;
+
+	now = ctx->tmr.base->get_time();
+	remaining = ktime_sub(ctx->tmr.expires, now);
+
+	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
+}
+
+static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
 			  const struct itimerspec *ktmr)
 {
 	enum hrtimer_mode htmode;
@@ -57,8 +70,9 @@
 
 	texp = timespec_to_ktime(ktmr->it_value);
 	ctx->expired = 0;
+	ctx->ticks = 0;
 	ctx->tintv = timespec_to_ktime(ktmr->it_interval);
-	hrtimer_init(&ctx->tmr, clockid, htmode);
+	hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
 	ctx->tmr.expires = texp;
 	ctx->tmr.function = timerfd_tmrproc;
 	if (texp.tv64 != 0)
@@ -83,7 +97,7 @@
 	poll_wait(file, &ctx->wqh, wait);
 
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
-	if (ctx->expired)
+	if (ctx->ticks)
 		events |= POLLIN;
 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 
@@ -102,11 +116,11 @@
 		return -EINVAL;
 	spin_lock_irq(&ctx->wqh.lock);
 	res = -EAGAIN;
-	if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) {
+	if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) {
 		__add_wait_queue(&ctx->wqh, &wait);
 		for (res = 0;;) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			if (ctx->expired) {
+			if (ctx->ticks) {
 				res = 0;
 				break;
 			}
@@ -121,22 +135,21 @@
 		__remove_wait_queue(&ctx->wqh, &wait);
 		__set_current_state(TASK_RUNNING);
 	}
-	if (ctx->expired) {
-		ctx->expired = 0;
-		if (ctx->tintv.tv64 != 0) {
+	if (ctx->ticks) {
+		ticks = ctx->ticks;
+		if (ctx->expired && ctx->tintv.tv64) {
 			/*
 			 * If tintv.tv64 != 0, this is a periodic timer that
 			 * needs to be re-armed. We avoid doing it in the timer
 			 * callback to avoid DoS attacks specifying a very
 			 * short timer period.
 			 */
-			ticks = (u64)
-				hrtimer_forward(&ctx->tmr,
-						hrtimer_cb_get_time(&ctx->tmr),
-						ctx->tintv);
+			ticks += hrtimer_forward_now(&ctx->tmr,
+						     ctx->tintv) - 1;
 			hrtimer_restart(&ctx->tmr);
-		} else
-			ticks = 1;
+		}
+		ctx->expired = 0;
+		ctx->ticks = 0;
 	}
 	spin_unlock_irq(&ctx->wqh.lock);
 	if (ticks)
@@ -150,76 +163,132 @@
 	.read		= timerfd_read,
 };
 
-asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
-			    const struct itimerspec __user *utmr)
+static struct file *timerfd_fget(int fd)
 {
-	int error;
+	struct file *file;
+
+	file = fget(fd);
+	if (!file)
+		return ERR_PTR(-EBADF);
+	if (file->f_op != &timerfd_fops) {
+		fput(file);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return file;
+}
+
+asmlinkage long sys_timerfd_create(int clockid, int flags)
+{
+	int error, ufd;
 	struct timerfd_ctx *ctx;
 	struct file *file;
 	struct inode *inode;
-	struct itimerspec ktmr;
+
+	if (flags)
+		return -EINVAL;
+	if (clockid != CLOCK_MONOTONIC &&
+	    clockid != CLOCK_REALTIME)
+		return -EINVAL;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	init_waitqueue_head(&ctx->wqh);
+	ctx->clockid = clockid;
+	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
+
+	error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
+				 &timerfd_fops, ctx);
+	if (error) {
+		kfree(ctx);
+		return error;
+	}
+
+	return ufd;
+}
+
+asmlinkage long sys_timerfd_settime(int ufd, int flags,
+				    const struct itimerspec __user *utmr,
+				    struct itimerspec __user *otmr)
+{
+	struct file *file;
+	struct timerfd_ctx *ctx;
+	struct itimerspec ktmr, kotmr;
 
 	if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
 		return -EFAULT;
 
-	if (clockid != CLOCK_MONOTONIC &&
-	    clockid != CLOCK_REALTIME)
-		return -EINVAL;
 	if (!timespec_valid(&ktmr.it_value) ||
 	    !timespec_valid(&ktmr.it_interval))
 		return -EINVAL;
 
-	if (ufd == -1) {
-		ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
-		if (!ctx)
-			return -ENOMEM;
+	file = timerfd_fget(ufd);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+	ctx = file->private_data;
 
-		init_waitqueue_head(&ctx->wqh);
-
-		timerfd_setup(ctx, clockid, flags, &ktmr);
-
-		/*
-		 * When we call this, the initialization must be complete, since
-		 * anon_inode_getfd() will install the fd.
-		 */
-		error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
-					 &timerfd_fops, ctx);
-		if (error)
-			goto err_tmrcancel;
-	} else {
-		file = fget(ufd);
-		if (!file)
-			return -EBADF;
-		ctx = file->private_data;
-		if (file->f_op != &timerfd_fops) {
-			fput(file);
-			return -EINVAL;
-		}
-		/*
-		 * We need to stop the existing timer before reprogramming
-		 * it to the new values.
-		 */
-		for (;;) {
-			spin_lock_irq(&ctx->wqh.lock);
-			if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
-				break;
-			spin_unlock_irq(&ctx->wqh.lock);
-			cpu_relax();
-		}
-		/*
-		 * Re-program the timer to the new value ...
-		 */
-		timerfd_setup(ctx, clockid, flags, &ktmr);
-
+	/*
+	 * We need to stop the existing timer before reprogramming
+	 * it to the new values.
+	 */
+	for (;;) {
+		spin_lock_irq(&ctx->wqh.lock);
+		if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
+			break;
 		spin_unlock_irq(&ctx->wqh.lock);
-		fput(file);
+		cpu_relax();
 	}
 
-	return ufd;
+	/*
+	 * If the timer is expired and it's periodic, we need to advance it
+	 * because the caller may want to know the previous expiration time.
+	 * We do not update "ticks" and "expired" since the timer will be
+	 * re-programmed again in the following timerfd_setup() call.
+	 */
+	if (ctx->expired && ctx->tintv.tv64)
+		hrtimer_forward_now(&ctx->tmr, ctx->tintv);
 
-err_tmrcancel:
-	hrtimer_cancel(&ctx->tmr);
-	kfree(ctx);
-	return error;
+	kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
+	kotmr.it_interval = ktime_to_timespec(ctx->tintv);
+
+	/*
+	 * Re-program the timer to the new value ...
+	 */
+	timerfd_setup(ctx, flags, &ktmr);
+
+	spin_unlock_irq(&ctx->wqh.lock);
+	fput(file);
+	if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
+		return -EFAULT;
+
+	return 0;
+}
+
+asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr)
+{
+	struct file *file;
+	struct timerfd_ctx *ctx;
+	struct itimerspec kotmr;
+
+	file = timerfd_fget(ufd);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+	ctx = file->private_data;
+
+	spin_lock_irq(&ctx->wqh.lock);
+	if (ctx->expired && ctx->tintv.tv64) {
+		ctx->expired = 0;
+		ctx->ticks +=
+			hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
+		hrtimer_restart(&ctx->tmr);
+	}
+	kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
+	kotmr.it_interval = ktime_to_timespec(ctx->tintv);
+	spin_unlock_irq(&ctx->wqh.lock);
+	fput(file);
+
+	return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
 }
 
diff --git a/fs/xattr.c b/fs/xattr.c
index 6645b73..f7c8f87 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -105,6 +105,33 @@
 EXPORT_SYMBOL_GPL(vfs_setxattr);
 
 ssize_t
+xattr_getsecurity(struct inode *inode, const char *name, void *value,
+			size_t size)
+{
+	void *buffer = NULL;
+	ssize_t len;
+
+	if (!value || !size) {
+		len = security_inode_getsecurity(inode, name, &buffer, false);
+		goto out_noalloc;
+	}
+
+	len = security_inode_getsecurity(inode, name, &buffer, true);
+	if (len < 0)
+		return len;
+	if (size < len) {
+		len = -ERANGE;
+		goto out;
+	}
+	memcpy(value, buffer, len);
+out:
+	security_release_secctx(buffer, len);
+out_noalloc:
+	return len;
+}
+EXPORT_SYMBOL_GPL(xattr_getsecurity);
+
+ssize_t
 vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
 {
 	struct inode *inode = dentry->d_inode;
@@ -118,23 +145,23 @@
 	if (error)
 		return error;
 
-	if (inode->i_op->getxattr)
-		error = inode->i_op->getxattr(dentry, name, value, size);
-	else
-		error = -EOPNOTSUPP;
-
 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
 				XATTR_SECURITY_PREFIX_LEN)) {
 		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
-		int ret = security_inode_getsecurity(inode, suffix, value,
-						     size, error);
+		int ret = xattr_getsecurity(inode, suffix, value, size);
 		/*
 		 * Only overwrite the return value if a security module
 		 * is actually active.
 		 */
-		if (ret != -EOPNOTSUPP)
-			error = ret;
+		if (ret == -EOPNOTSUPP)
+			goto nolsm;
+		return ret;
 	}
+nolsm:
+	if (inode->i_op->getxattr)
+		error = inode->i_op->getxattr(dentry, name, value, size);
+	else
+		error = -EOPNOTSUPP;
 
 	return error;
 }
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index ed2b16d..e040f1c 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -92,8 +92,7 @@
 void
 kmem_free(void *ptr, size_t size)
 {
-	if (((unsigned long)ptr < VMALLOC_START) ||
-	    ((unsigned long)ptr >= VMALLOC_END)) {
+	if (!is_vmalloc_addr(ptr)) {
 		kfree(ptr);
 	} else {
 		vfree(ptr);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index a49dd8d..0382c19 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -709,8 +709,7 @@
 mem_to_page(
 	void			*addr)
 {
-	if (((unsigned long)addr < VMALLOC_START) ||
-	    ((unsigned long)addr >= VMALLOC_END)) {
+	if ((!is_vmalloc_addr(addr))) {
 		return virt_to_page(addr);
 	} else {
 		return vmalloc_to_page(addr);
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index d6a8ddd..6f614f3 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -155,7 +155,7 @@
 		if (status)
 			break;
 
-		zero_user_page(page, offset, bytes, KM_USER0);
+		zero_user(page, offset, bytes);
 
 		status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
 					page, fsdata);
diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h
index f5cb7b8..ca88e54 100644
--- a/include/asm-alpha/atomic.h
+++ b/include/asm-alpha/atomic.h
@@ -100,7 +100,7 @@
 /*
  * Same as above, but return the result value
  */
-static __inline__ long atomic_add_return(int i, atomic_t * v)
+static inline int atomic_add_return(int i, atomic_t *v)
 {
 	long temp, result;
 	smp_mb();
diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h
index 30ee766..d5b10ef 100644
--- a/include/asm-alpha/pci.h
+++ b/include/asm-alpha/pci.h
@@ -4,6 +4,7 @@
 #ifdef __KERNEL__
 
 #include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
 #include <asm/scatterlist.h>
 #include <asm/machvec.h>
 
diff --git a/include/asm-alpha/pgalloc.h b/include/asm-alpha/pgalloc.h
index 471864e..fdbedac 100644
--- a/include/asm-alpha/pgalloc.h
+++ b/include/asm-alpha/pgalloc.h
@@ -31,7 +31,7 @@
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
 static inline void
-pgd_free(pgd_t *pgd)
+pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	free_page((unsigned long)pgd);
 }
@@ -44,7 +44,7 @@
 }
 
 static inline void
-pmd_free(pmd_t *pmd)
+pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	free_page((unsigned long)pmd);
 }
@@ -52,7 +52,7 @@
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
 
 static inline void
-pte_free_kernel(pte_t *pte)
+pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
@@ -67,7 +67,7 @@
 }
 
 static inline void
-pte_free(struct page *page)
+pte_free(struct mm_struct *mm, struct page *page)
 {
 	__free_page(page);
 }
diff --git a/include/asm-alpha/tlb.h b/include/asm-alpha/tlb.h
index aa91335..c136365 100644
--- a/include/asm-alpha/tlb.h
+++ b/include/asm-alpha/tlb.h
@@ -9,7 +9,7 @@
 
 #include <asm-generic/tlb.h>
 
-#define __pte_free_tlb(tlb,pte)			pte_free(pte)
-#define __pmd_free_tlb(tlb,pmd)			pmd_free(pmd)
+#define __pte_free_tlb(tlb, pte)			pte_free((tlb)->mm, pte)
+#define __pmd_free_tlb(tlb, pmd)			pmd_free((tlb)->mm, pmd)
  
 #endif
diff --git a/include/asm-alpha/tlbflush.h b/include/asm-alpha/tlbflush.h
index b9e9147..9d87aaa 100644
--- a/include/asm-alpha/tlbflush.h
+++ b/include/asm-alpha/tlbflush.h
@@ -142,6 +142,10 @@
 
 #endif /* CONFIG_SMP */
 
-#define flush_tlb_kernel_range(start, end) flush_tlb_all()
+static inline void flush_tlb_kernel_range(unsigned long start,
+					unsigned long end)
+{
+	flush_tlb_all();
+}
 
 #endif /* _ALPHA_TLBFLUSH_H */
diff --git a/include/asm-alpha/unistd.h b/include/asm-alpha/unistd.h
index 29bf2fd..5b5c174 100644
--- a/include/asm-alpha/unistd.h
+++ b/include/asm-alpha/unistd.h
@@ -442,7 +442,6 @@
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
-#define __ARCH_WANT_SYS_SOCKETCALL
 #define __ARCH_WANT_SYS_FADVISE64
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
diff --git a/include/asm-arm/arch-at91/at91_mci.h b/include/asm-arm/arch-at91/at91_mci.h
index c2e11cc..1551fc2 100644
--- a/include/asm-arm/arch-at91/at91_mci.h
+++ b/include/asm-arm/arch-at91/at91_mci.h
@@ -89,7 +89,7 @@
 #define		AT91_MCI_ENDRX		(1 <<  6)	/* End of RX Buffer */
 #define		AT91_MCI_ENDTX		(1 <<  7)	/* End fo TX Buffer */
 #define		AT91_MCI_SDIOIRQA	(1 <<  8)	/* SDIO Interrupt for Slot A */
-#define		At91_MCI_SDIOIRQB	(1 <<  9)	/* SDIO Interrupt for Slot B [AT91RM9200 only] */
+#define		AT91_MCI_SDIOIRQB	(1 <<  9)	/* SDIO Interrupt for Slot B */
 #define		AT91_MCI_RXBUFF		(1 << 14)	/* RX Buffer Full */
 #define		AT91_MCI_TXBUFE		(1 << 15)	/* TX Buffer Empty */
 #define		AT91_MCI_RINDE		(1 << 16)	/* Response Index Error */
diff --git a/include/asm-arm/arch-at91/at91rm9200.h b/include/asm-arm/arch-at91/at91rm9200.h
index 802891a..e8fc0b1 100644
--- a/include/asm-arm/arch-at91/at91rm9200.h
+++ b/include/asm-arm/arch-at91/at91rm9200.h
@@ -93,6 +93,11 @@
 #define AT91_RTC	(0xfffffe00 - AT91_BASE_SYS)	/* Real-Time Clock */
 #define AT91_MC		(0xffffff00 - AT91_BASE_SYS)	/* Memory Controllers */
 
+#define AT91_USART0	AT91RM9200_BASE_US0
+#define AT91_USART1	AT91RM9200_BASE_US1
+#define AT91_USART2	AT91RM9200_BASE_US2
+#define AT91_USART3	AT91RM9200_BASE_US3
+
 #define AT91_MATRIX	0	/* not supported */
 
 /*
diff --git a/include/asm-arm/arch-at91/at91sam9260.h b/include/asm-arm/arch-at91/at91sam9260.h
index 0427f86..c8934fe 100644
--- a/include/asm-arm/arch-at91/at91sam9260.h
+++ b/include/asm-arm/arch-at91/at91sam9260.h
@@ -99,6 +99,13 @@
 #define AT91_WDT	(0xfffffd40 - AT91_BASE_SYS)
 #define AT91_GPBR	(0xfffffd50 - AT91_BASE_SYS)
 
+#define AT91_USART0	AT91SAM9260_BASE_US0
+#define AT91_USART1	AT91SAM9260_BASE_US1
+#define AT91_USART2	AT91SAM9260_BASE_US2
+#define AT91_USART3	AT91SAM9260_BASE_US3
+#define AT91_USART4	AT91SAM9260_BASE_US4
+#define AT91_USART5	AT91SAM9260_BASE_US5
+
 
 /*
  * Internal Memory.
diff --git a/include/asm-arm/arch-at91/at91sam9261.h b/include/asm-arm/arch-at91/at91sam9261.h
index 9eb4595..c7c4778 100644
--- a/include/asm-arm/arch-at91/at91sam9261.h
+++ b/include/asm-arm/arch-at91/at91sam9261.h
@@ -84,6 +84,10 @@
 #define AT91_WDT	(0xfffffd40 - AT91_BASE_SYS)
 #define AT91_GPBR	(0xfffffd50 - AT91_BASE_SYS)
 
+#define AT91_USART0	AT91SAM9261_BASE_US0
+#define AT91_USART1	AT91SAM9261_BASE_US1
+#define AT91_USART2	AT91SAM9261_BASE_US2
+
 
 /*
  * Internal Memory.
diff --git a/include/asm-arm/arch-at91/at91sam9263.h b/include/asm-arm/arch-at91/at91sam9263.h
index 115c47a..018a647 100644
--- a/include/asm-arm/arch-at91/at91sam9263.h
+++ b/include/asm-arm/arch-at91/at91sam9263.h
@@ -101,6 +101,10 @@
 #define AT91_RTT1	(0xfffffd50 - AT91_BASE_SYS)
 #define AT91_GPBR	(0xfffffd60 - AT91_BASE_SYS)
 
+#define AT91_USART0	AT91SAM9263_BASE_US0
+#define AT91_USART1	AT91SAM9263_BASE_US1
+#define AT91_USART2	AT91SAM9263_BASE_US2
+
 #define AT91_SMC	AT91_SMC0
 
 /*
diff --git a/include/asm-arm/arch-at91/at91sam9rl.h b/include/asm-arm/arch-at91/at91sam9rl.h
index 8a9708a..16d2832 100644
--- a/include/asm-arm/arch-at91/at91sam9rl.h
+++ b/include/asm-arm/arch-at91/at91sam9rl.h
@@ -94,6 +94,11 @@
 #define AT91_GPBR	(0xfffffd60 - AT91_BASE_SYS)
 #define AT91_RTC	(0xfffffe00 - AT91_BASE_SYS)
 
+#define AT91_USART0	AT91SAM9RL_BASE_US0
+#define AT91_USART1	AT91SAM9RL_BASE_US1
+#define AT91_USART2	AT91SAM9RL_BASE_US2
+#define AT91_USART3	AT91SAM9RL_BASE_US3
+
 
 /*
  * Internal Memory.
diff --git a/include/asm-arm/arch-at91/uncompress.h b/include/asm-arm/arch-at91/uncompress.h
index 272a7e0..f5636a8 100644
--- a/include/asm-arm/arch-at91/uncompress.h
+++ b/include/asm-arm/arch-at91/uncompress.h
@@ -22,7 +22,23 @@
 #define __ASM_ARCH_UNCOMPRESS_H
 
 #include <asm/io.h>
-#include <asm/arch/at91_dbgu.h>
+#include <linux/atmel_serial.h>
+
+#if defined(CONFIG_AT91_EARLY_DBGU)
+#define UART_OFFSET (AT91_DBGU + AT91_BASE_SYS)
+#elif defined(CONFIG_AT91_EARLY_USART0)
+#define UART_OFFSET AT91_USART0
+#elif defined(CONFIG_AT91_EARLY_USART1)
+#define UART_OFFSET AT91_USART1
+#elif defined(CONFIG_AT91_EARLY_USART2)
+#define UART_OFFSET AT91_USART2
+#elif defined(CONFIG_AT91_EARLY_USART3)
+#define UART_OFFSET AT91_USART3
+#elif defined(CONFIG_AT91_EARLY_USART4)
+#define UART_OFFSET AT91_USART4
+#elif defined(CONFIG_AT91_EARLY_USART5)
+#define UART_OFFSET AT91_USART5
+#endif
 
 /*
  * The following code assumes the serial port has already been
@@ -33,22 +49,22 @@
  */
 static void putc(int c)
 {
-#ifdef AT91_DBGU
-	void __iomem *sys = (void __iomem *) AT91_BASE_SYS;	/* physical address */
+#ifdef UART_OFFSET
+	void __iomem *sys = (void __iomem *) UART_OFFSET;	/* physical address */
 
-	while (!(__raw_readl(sys + AT91_DBGU_SR) & AT91_DBGU_TXRDY))
+	while (!(__raw_readl(sys + ATMEL_US_CSR) & ATMEL_US_TXRDY))
 		barrier();
-	__raw_writel(c, sys + AT91_DBGU_THR);
+	__raw_writel(c, sys + ATMEL_US_THR);
 #endif
 }
 
 static inline void flush(void)
 {
-#ifdef AT91_DBGU
-	void __iomem *sys = (void __iomem *) AT91_BASE_SYS;	/* physical address */
+#ifdef UART_OFFSET
+	void __iomem *sys = (void __iomem *) UART_OFFSET;	/* physical address */
 
 	/* wait for transmission to complete */
-	while (!(__raw_readl(sys + AT91_DBGU_SR) & AT91_DBGU_TXEMPTY))
+	while (!(__raw_readl(sys + ATMEL_US_CSR) & ATMEL_US_TXEMPTY))
 		barrier();
 #endif
 }
diff --git a/include/asm-arm/arch-ixp4xx/cpu.h b/include/asm-arm/arch-ixp4xx/cpu.h
index d2523b3..2fa3d6b 100644
--- a/include/asm-arm/arch-ixp4xx/cpu.h
+++ b/include/asm-arm/arch-ixp4xx/cpu.h
@@ -28,4 +28,19 @@
 #define cpu_is_ixp46x()	((processor_id & IXP4XX_PROCESSOR_ID_MASK) == \
 			  IXP465_PROCESSOR_ID_VALUE)
 
+static inline u32 ixp4xx_read_feature_bits(void)
+{
+	unsigned int val = ~*IXP4XX_EXP_CFG2;
+	val &= ~IXP4XX_FEATURE_RESERVED;
+	if (!cpu_is_ixp46x())
+		val &= ~IXP4XX_FEATURE_IXP46X_ONLY;
+
+	return val;
+}
+
+static inline void ixp4xx_write_feature_bits(u32 value)
+{
+	*IXP4XX_EXP_CFG2 = ~value;
+}
+
 #endif  /* _ASM_ARCH_CPU_H */
diff --git a/include/asm-arm/arch-ixp4xx/dsmg600.h b/include/asm-arm/arch-ixp4xx/dsmg600.h
index a19605a..b7673e1 100644
--- a/include/asm-arm/arch-ixp4xx/dsmg600.h
+++ b/include/asm-arm/arch-ixp4xx/dsmg600.h
@@ -40,18 +40,13 @@
 /* Buttons */
 
 #define DSMG600_PB_GPIO		15	/* power button */
-#define DSMG600_PB_BM		(1L << DSMG600_PB_GPIO)
-
 #define DSMG600_RB_GPIO		3	/* reset button */
 
-#define DSMG600_RB_IRQ		IRQ_IXP4XX_GPIO3
+/* Power control */
 
 #define DSMG600_PO_GPIO		2	/* power off */
 
 /* LEDs */
 
 #define DSMG600_LED_PWR_GPIO	0
-#define DSMG600_LED_PWR_BM	(1L << DSMG600_LED_PWR_GPIO)
-
 #define DSMG600_LED_WLAN_GPIO	14
-#define DSMG600_LED_WLAN_BM	(1L << DSMG600_LED_WLAN_GPIO)
diff --git a/include/asm-arm/arch-ixp4xx/hardware.h b/include/asm-arm/arch-ixp4xx/hardware.h
index 297ceda..73e8dc3 100644
--- a/include/asm-arm/arch-ixp4xx/hardware.h
+++ b/include/asm-arm/arch-ixp4xx/hardware.h
@@ -27,13 +27,13 @@
 
 #define pcibios_assign_all_busses()	1
 
+/* Register locations and bits */
+#include "ixp4xx-regs.h"
+
 #ifndef __ASSEMBLER__
 #include <asm/arch/cpu.h>
 #endif
 
-/* Register locations and bits */
-#include "ixp4xx-regs.h"
-
 /* Platform helper functions and definitions */
 #include "platform.h"
 
diff --git a/include/asm-arm/arch-ixp4xx/io.h b/include/asm-arm/arch-ixp4xx/io.h
index 9c5d235..de181ce 100644
--- a/include/asm-arm/arch-ixp4xx/io.h
+++ b/include/asm-arm/arch-ixp4xx/io.h
@@ -13,6 +13,8 @@
 #ifndef __ASM_ARM_ARCH_IO_H
 #define __ASM_ARM_ARCH_IO_H
 
+#include <linux/bitops.h>
+
 #include <asm/hardware.h>
 
 #define IO_SPACE_LIMIT 0xffff0000
diff --git a/include/asm-arm/arch-ixp4xx/ixp4xx-regs.h b/include/asm-arm/arch-ixp4xx/ixp4xx-regs.h
index 1205c28..68aca85 100644
--- a/include/asm-arm/arch-ixp4xx/ixp4xx-regs.h
+++ b/include/asm-arm/arch-ixp4xx/ixp4xx-regs.h
@@ -15,10 +15,6 @@
  *
  */
 
-#ifndef __ASM_ARCH_HARDWARE_H__
-#error "Do not include this directly, instead #include <asm/hardware.h>"
-#endif
-
 #ifndef _ASM_ARM_IXP4XX_H_
 #define _ASM_ARM_IXP4XX_H_
 
@@ -607,4 +603,36 @@
 
 #define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
 
+/* "fuse" bits of IXP_EXP_CFG2 */
+#define IXP4XX_FEATURE_RCOMP		(1 << 0)
+#define IXP4XX_FEATURE_USB_DEVICE	(1 << 1)
+#define IXP4XX_FEATURE_HASH		(1 << 2)
+#define IXP4XX_FEATURE_AES		(1 << 3)
+#define IXP4XX_FEATURE_DES		(1 << 4)
+#define IXP4XX_FEATURE_HDLC		(1 << 5)
+#define IXP4XX_FEATURE_AAL		(1 << 6)
+#define IXP4XX_FEATURE_HSS		(1 << 7)
+#define IXP4XX_FEATURE_UTOPIA		(1 << 8)
+#define IXP4XX_FEATURE_NPEB_ETH0	(1 << 9)
+#define IXP4XX_FEATURE_NPEC_ETH		(1 << 10)
+#define IXP4XX_FEATURE_RESET_NPEA	(1 << 11)
+#define IXP4XX_FEATURE_RESET_NPEB	(1 << 12)
+#define IXP4XX_FEATURE_RESET_NPEC	(1 << 13)
+#define IXP4XX_FEATURE_PCI		(1 << 14)
+#define IXP4XX_FEATURE_ECC_TIMESYNC	(1 << 15)
+#define IXP4XX_FEATURE_UTOPIA_PHY_LIMIT	(3 << 16)
+#define IXP4XX_FEATURE_USB_HOST		(1 << 18)
+#define IXP4XX_FEATURE_NPEA_ETH		(1 << 19)
+#define IXP4XX_FEATURE_NPEB_ETH_1_TO_3	(1 << 20)
+#define IXP4XX_FEATURE_RSA		(1 << 21)
+#define IXP4XX_FEATURE_XSCALE_MAX_FREQ	(3 << 22)
+#define IXP4XX_FEATURE_RESERVED		(0xFF << 24)
+
+#define IXP4XX_FEATURE_IXP46X_ONLY (IXP4XX_FEATURE_ECC_TIMESYNC |	\
+				    IXP4XX_FEATURE_USB_HOST |		\
+				    IXP4XX_FEATURE_NPEA_ETH |		\
+				    IXP4XX_FEATURE_NPEB_ETH_1_TO_3 |	\
+				    IXP4XX_FEATURE_RSA |		\
+				    IXP4XX_FEATURE_XSCALE_MAX_FREQ)
+
 #endif
diff --git a/include/asm-arm/arch-ixp4xx/nas100d.h b/include/asm-arm/arch-ixp4xx/nas100d.h
index 131e0a1..98d9378 100644
--- a/include/asm-arm/arch-ixp4xx/nas100d.h
+++ b/include/asm-arm/arch-ixp4xx/nas100d.h
@@ -38,15 +38,15 @@
 
 /* Buttons */
 
-#define NAS100D_PB_GPIO         14
-#define NAS100D_RB_GPIO         4
+#define NAS100D_PB_GPIO         14   /* power button */
+#define NAS100D_RB_GPIO         4    /* reset button */
+
+/* Power control */
+
 #define NAS100D_PO_GPIO         12   /* power off */
 
-#define NAS100D_PB_IRQ          IRQ_IXP4XX_GPIO14
-#define NAS100D_RB_IRQ          IRQ_IXP4XX_GPIO4
+/* LEDs */
 
-/*
-#define NAS100D_PB_BM           (1L << NAS100D_PB_GPIO)
-#define NAS100D_PO_BM           (1L << NAS100D_PO_GPIO)
-#define NAS100D_RB_BM           (1L << NAS100D_RB_GPIO)
-*/
+#define NAS100D_LED_WLAN_GPIO	0
+#define NAS100D_LED_DISK_GPIO	3
+#define NAS100D_LED_PWR_GPIO	15
diff --git a/include/asm-arm/arch-ixp4xx/npe.h b/include/asm-arm/arch-ixp4xx/npe.h
new file mode 100644
index 0000000..37d0511
--- /dev/null
+++ b/include/asm-arm/arch-ixp4xx/npe.h
@@ -0,0 +1,39 @@
+#ifndef __IXP4XX_NPE_H
+#define __IXP4XX_NPE_H
+
+#include <linux/kernel.h>
+
+extern const char *npe_names[];
+
+struct npe_regs {
+	u32 exec_addr, exec_data, exec_status_cmd, exec_count;
+	u32 action_points[4];
+	u32 watchpoint_fifo, watch_count;
+	u32 profile_count;
+	u32 messaging_status, messaging_control;
+	u32 mailbox_status, /*messaging_*/ in_out_fifo;
+};
+
+struct npe {
+	struct resource *mem_res;
+	struct npe_regs __iomem *regs;
+	u32 regs_phys;
+	int id;
+	int valid;
+};
+
+
+static inline const char *npe_name(struct npe *npe)
+{
+	return npe_names[npe->id];
+}
+
+int npe_running(struct npe *npe);
+int npe_send_message(struct npe *npe, const void *msg, const char *what);
+int npe_recv_message(struct npe *npe, void *msg, const char *what);
+int npe_send_recv_message(struct npe *npe, void *msg, const char *what);
+int npe_load_firmware(struct npe *npe, const char *name, struct device *dev);
+struct npe *npe_request(int id);
+void npe_release(struct npe *npe);
+
+#endif /* __IXP4XX_NPE_H */
diff --git a/include/asm-arm/arch-ixp4xx/nslu2.h b/include/asm-arm/arch-ixp4xx/nslu2.h
index 850fdc5..714bbc6 100644
--- a/include/asm-arm/arch-ixp4xx/nslu2.h
+++ b/include/asm-arm/arch-ixp4xx/nslu2.h
@@ -39,34 +39,17 @@
 
 /* Buttons */
 
-#define NSLU2_PB_GPIO		5
+#define NSLU2_PB_GPIO		5	/* power button */
 #define NSLU2_PO_GPIO		8	/* power off */
-#define NSLU2_RB_GPIO		12
-
-#define NSLU2_PB_IRQ		IRQ_IXP4XX_GPIO5
-#define NSLU2_RB_IRQ		IRQ_IXP4XX_GPIO12
-
-#define NSLU2_PB_BM		(1L << NSLU2_PB_GPIO)
-#define NSLU2_PO_BM		(1L << NSLU2_PO_GPIO)
-#define NSLU2_RB_BM		(1L << NSLU2_RB_GPIO)
+#define NSLU2_RB_GPIO		12	/* reset button */
 
 /* Buzzer */
 
 #define NSLU2_GPIO_BUZZ		4
-#define NSLU2_BZ_BM		(1L << NSLU2_GPIO_BUZZ)
 
 /* LEDs */
 
 #define NSLU2_LED_RED_GPIO	0
 #define NSLU2_LED_GRN_GPIO	1
-
-#define NSLU2_LED_RED_BM	(1L << NSLU2_LED_RED_GPIO)
-#define NSLU2_LED_GRN_BM	(1L << NSLU2_LED_GRN_GPIO)
-
 #define NSLU2_LED_DISK1_GPIO	3
 #define NSLU2_LED_DISK2_GPIO	2
-
-#define NSLU2_LED_DISK1_BM	(1L << NSLU2_LED_DISK1_GPIO)
-#define NSLU2_LED_DISK2_BM	(1L << NSLU2_LED_DISK2_GPIO)
-
-
diff --git a/include/asm-arm/arch-ixp4xx/platform.h b/include/asm-arm/arch-ixp4xx/platform.h
index 2ce28e3..a1f2b54 100644
--- a/include/asm-arm/arch-ixp4xx/platform.h
+++ b/include/asm-arm/arch-ixp4xx/platform.h
@@ -91,6 +91,27 @@
 
 struct sys_timer;
 
+#define IXP4XX_ETH_NPEA		0x00
+#define IXP4XX_ETH_NPEB		0x10
+#define IXP4XX_ETH_NPEC		0x20
+
+/* Information about built-in Ethernet MAC interfaces */
+struct eth_plat_info {
+	u8 phy;		/* MII PHY ID, 0 - 31 */
+	u8 rxq;		/* configurable, currently 0 - 31 only */
+	u8 txreadyq;
+	u8 hwaddr[6];
+};
+
+/* Information about built-in HSS (synchronous serial) interfaces */
+struct hss_plat_info {
+	int (*set_clock)(int port, unsigned int clock_type);
+	int (*open)(int port, void *pdev,
+		    void (*set_carrier_cb)(void *pdev, int carrier));
+	void (*close)(int port, void *pdev);
+	u8 txreadyq;
+};
+
 /*
  * Frequency of clock used for primary clocksource
  */
diff --git a/include/asm-arm/arch-ixp4xx/qmgr.h b/include/asm-arm/arch-ixp4xx/qmgr.h
new file mode 100644
index 0000000..1e52b95
--- /dev/null
+++ b/include/asm-arm/arch-ixp4xx/qmgr.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2007 Krzysztof Halasa <khc@pm.waw.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef IXP4XX_QMGR_H
+#define IXP4XX_QMGR_H
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+
+#define HALF_QUEUES	32
+#define QUEUES		64	/* only 32 lower queues currently supported */
+#define MAX_QUEUE_LENGTH 4	/* in dwords */
+
+#define QUEUE_STAT1_EMPTY		1 /* queue status bits */
+#define QUEUE_STAT1_NEARLY_EMPTY	2
+#define QUEUE_STAT1_NEARLY_FULL		4
+#define QUEUE_STAT1_FULL		8
+#define QUEUE_STAT2_UNDERFLOW		1
+#define QUEUE_STAT2_OVERFLOW		2
+
+#define QUEUE_WATERMARK_0_ENTRIES	0
+#define QUEUE_WATERMARK_1_ENTRY		1
+#define QUEUE_WATERMARK_2_ENTRIES	2
+#define QUEUE_WATERMARK_4_ENTRIES	3
+#define QUEUE_WATERMARK_8_ENTRIES	4
+#define QUEUE_WATERMARK_16_ENTRIES	5
+#define QUEUE_WATERMARK_32_ENTRIES	6
+#define QUEUE_WATERMARK_64_ENTRIES	7
+
+/* queue interrupt request conditions */
+#define QUEUE_IRQ_SRC_EMPTY		0
+#define QUEUE_IRQ_SRC_NEARLY_EMPTY	1
+#define QUEUE_IRQ_SRC_NEARLY_FULL	2
+#define QUEUE_IRQ_SRC_FULL		3
+#define QUEUE_IRQ_SRC_NOT_EMPTY		4
+#define QUEUE_IRQ_SRC_NOT_NEARLY_EMPTY	5
+#define QUEUE_IRQ_SRC_NOT_NEARLY_FULL	6
+#define QUEUE_IRQ_SRC_NOT_FULL		7
+
+struct qmgr_regs {
+	u32 acc[QUEUES][MAX_QUEUE_LENGTH]; /* 0x000 - 0x3FF */
+	u32 stat1[4];		/* 0x400 - 0x40F */
+	u32 stat2[2];		/* 0x410 - 0x417 */
+	u32 statne_h;		/* 0x418 - queue nearly empty */
+	u32 statf_h;		/* 0x41C - queue full */
+	u32 irqsrc[4];		/* 0x420 - 0x42F IRC source */
+	u32 irqen[2];		/* 0x430 - 0x437 IRQ enabled */
+	u32 irqstat[2];		/* 0x438 - 0x43F - IRQ access only */
+	u32 reserved[1776];
+	u32 sram[2048];		/* 0x2000 - 0x3FFF - config and buffer */
+};
+
+void qmgr_set_irq(unsigned int queue, int src,
+		  void (*handler)(void *pdev), void *pdev);
+void qmgr_enable_irq(unsigned int queue);
+void qmgr_disable_irq(unsigned int queue);
+
+/* request_ and release_queue() must be called from non-IRQ context */
+int qmgr_request_queue(unsigned int queue, unsigned int len /* dwords */,
+		       unsigned int nearly_empty_watermark,
+		       unsigned int nearly_full_watermark);
+void qmgr_release_queue(unsigned int queue);
+
+
+static inline void qmgr_put_entry(unsigned int queue, u32 val)
+{
+	extern struct qmgr_regs __iomem *qmgr_regs;
+	__raw_writel(val, &qmgr_regs->acc[queue][0]);
+}
+
+static inline u32 qmgr_get_entry(unsigned int queue)
+{
+	extern struct qmgr_regs __iomem *qmgr_regs;
+	return __raw_readl(&qmgr_regs->acc[queue][0]);
+}
+
+static inline int qmgr_get_stat1(unsigned int queue)
+{
+	extern struct qmgr_regs __iomem *qmgr_regs;
+	return (__raw_readl(&qmgr_regs->stat1[queue >> 3])
+		>> ((queue & 7) << 2)) & 0xF;
+}
+
+static inline int qmgr_get_stat2(unsigned int queue)
+{
+	extern struct qmgr_regs __iomem *qmgr_regs;
+	return (__raw_readl(&qmgr_regs->stat2[queue >> 4])
+		>> ((queue & 0xF) << 1)) & 0x3;
+}
+
+static inline int qmgr_stat_empty(unsigned int queue)
+{
+	return !!(qmgr_get_stat1(queue) & QUEUE_STAT1_EMPTY);
+}
+
+static inline int qmgr_stat_nearly_empty(unsigned int queue)
+{
+	return !!(qmgr_get_stat1(queue) & QUEUE_STAT1_NEARLY_EMPTY);
+}
+
+static inline int qmgr_stat_nearly_full(unsigned int queue)
+{
+	return !!(qmgr_get_stat1(queue) & QUEUE_STAT1_NEARLY_FULL);
+}
+
+static inline int qmgr_stat_full(unsigned int queue)
+{
+	return !!(qmgr_get_stat1(queue) & QUEUE_STAT1_FULL);
+}
+
+static inline int qmgr_stat_underflow(unsigned int queue)
+{
+	return !!(qmgr_get_stat2(queue) & QUEUE_STAT2_UNDERFLOW);
+}
+
+static inline int qmgr_stat_overflow(unsigned int queue)
+{
+	return !!(qmgr_get_stat2(queue) & QUEUE_STAT2_OVERFLOW);
+}
+
+#endif
diff --git a/include/asm-arm/arch-ixp4xx/uncompress.h b/include/asm-arm/arch-ixp4xx/uncompress.h
index f7a35b7..34ef48f 100644
--- a/include/asm-arm/arch-ixp4xx/uncompress.h
+++ b/include/asm-arm/arch-ixp4xx/uncompress.h
@@ -13,7 +13,7 @@
 #ifndef _ARCH_UNCOMPRESS_H_
 #define _ARCH_UNCOMPRESS_H_
 
-#include <asm/hardware.h>
+#include "ixp4xx-regs.h"
 #include <asm/mach-types.h>
 #include <linux/serial_reg.h>
 
diff --git a/include/asm-arm/arch-pxa/gpio.h b/include/asm-arm/arch-pxa/gpio.h
index 9dbc2dc..bdbf5f9 100644
--- a/include/asm-arm/arch-pxa/gpio.h
+++ b/include/asm-arm/arch-pxa/gpio.h
@@ -28,43 +28,35 @@
 #include <asm/irq.h>
 #include <asm/hardware.h>
 
-static inline int gpio_request(unsigned gpio, const char *label)
+#include <asm-generic/gpio.h>
+
+
+/* NOTE: some PXAs have fewer on-chip GPIOs (like PXA255, with 85).
+ * Those cases currently cause holes in the GPIO number space.
+ */
+#define NR_BUILTIN_GPIO 128
+
+static inline int gpio_get_value(unsigned gpio)
 {
-	return 0;
-}
-
-static inline void gpio_free(unsigned gpio)
-{
-	return;
-}
-
-extern int gpio_direction_input(unsigned gpio);
-extern int gpio_direction_output(unsigned gpio, int value);
-
-static inline int __gpio_get_value(unsigned gpio)
-{
-	return GPLR(gpio) & GPIO_bit(gpio);
-}
-
-#define gpio_get_value(gpio)			\
-	(__builtin_constant_p(gpio) ?		\
-	 __gpio_get_value(gpio) :		\
-	 pxa_gpio_get_value(gpio))
-
-static inline void __gpio_set_value(unsigned gpio, int value)
-{
-	if (value)
-		GPSR(gpio) = GPIO_bit(gpio);
+	if (__builtin_constant_p(gpio) && (gpio < NR_BUILTIN_GPIO))
+		return GPLR(gpio) & GPIO_bit(gpio);
 	else
-		GPCR(gpio) = GPIO_bit(gpio);
+		return __gpio_get_value(gpio);
 }
 
-#define gpio_set_value(gpio,value)		\
-	(__builtin_constant_p(gpio) ?		\
-	 __gpio_set_value(gpio, value) :	\
-	 pxa_gpio_set_value(gpio, value))
+static inline void gpio_set_value(unsigned gpio, int value)
+{
+	if (__builtin_constant_p(gpio) && (gpio < NR_BUILTIN_GPIO)) {
+		if (value)
+			GPSR(gpio) = GPIO_bit(gpio);
+		else
+			GPCR(gpio) = GPIO_bit(gpio);
+	} else {
+		__gpio_set_value(gpio, value);
+	}
+}
 
-#include <asm-generic/gpio.h>			/* cansleep wrappers */
+#define gpio_cansleep __gpio_cansleep
 
 #define gpio_to_irq(gpio)	IRQ_GPIO(gpio)
 #define irq_to_gpio(irq)	IRQ_TO_GPIO(irq)
diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h
index 16ed24d..ac175b4 100644
--- a/include/asm-arm/arch-pxa/pxa-regs.h
+++ b/include/asm-arm/arch-pxa/pxa-regs.h
@@ -1131,6 +1131,19 @@
  * General Purpose I/O
  */
 
+#define GPIO0_BASE	((void __iomem *)io_p2v(0x40E00000))
+#define GPIO1_BASE	((void __iomem *)io_p2v(0x40E00004))
+#define GPIO2_BASE	((void __iomem *)io_p2v(0x40E00008))
+#define GPIO3_BASE	((void __iomem *)io_p2v(0x40E00100))
+
+#define GPLR_OFFSET	0x00
+#define GPDR_OFFSET	0x0C
+#define GPSR_OFFSET	0x18
+#define GPCR_OFFSET	0x24
+#define GRER_OFFSET	0x30
+#define GFER_OFFSET	0x3C
+#define GEDR_OFFSET	0x48
+
 #define GPLR0		__REG(0x40E00000)  /* GPIO Pin-Level Register GPIO<31:0> */
 #define GPLR1		__REG(0x40E00004)  /* GPIO Pin-Level Register GPIO<63:32> */
 #define GPLR2		__REG(0x40E00008)  /* GPIO Pin-Level Register GPIO<80:64> */
diff --git a/include/asm-arm/arch-pxa/pxa3xx-regs.h b/include/asm-arm/arch-pxa/pxa3xx-regs.h
index 66d5411..8e1b3ea 100644
--- a/include/asm-arm/arch-pxa/pxa3xx-regs.h
+++ b/include/asm-arm/arch-pxa/pxa3xx-regs.h
@@ -12,6 +12,19 @@
 
 #ifndef __ASM_ARCH_PXA3XX_REGS_H
 #define __ASM_ARCH_PXA3XX_REGS_H
+/*
+ * Service Power Management Unit (MPMU)
+ */
+#define PMCR		__REG(0x40F50000)	/* Power Manager Control Register */
+#define PSR		__REG(0x40F50004)	/* Power Manager S2 Status Register */
+#define PSPR		__REG(0x40F50008)	/* Power Manager Scratch Pad Register */
+#define PCFR		__REG(0x40F5000C)	/* Power Manager General Configuration Register */
+#define PWER		__REG(0x40F50010)	/* Power Manager Wake-up Enable Register */
+#define PWSR		__REG(0x40F50014)	/* Power Manager Wake-up Status Register */
+#define PECR		__REG(0x40F50018)	/* Power Manager EXT_WAKEUP[1:0] Control Register */
+#define DCDCSR		__REG(0x40F50080)	/* DC-DC Controller Status Register */
+#define PVCR		__REG(0x40F50100)	/* Power Manager Voltage Change Control Register */
+#define PCMD(x)		__REG(0x40F50110 + ((x) << 2))
 
 /*
  * Slave Power Managment Unit
diff --git a/include/asm-arm/arch-realview/board-eb.h b/include/asm-arm/arch-realview/board-eb.h
new file mode 100644
index 0000000..3e437b7
--- /dev/null
+++ b/include/asm-arm/arch-realview/board-eb.h
@@ -0,0 +1,171 @@
+/*
+ * include/asm-arm/arch-realview/board-eb.h
+ *
+ * Copyright (C) 2007 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __ASM_ARCH_BOARD_EB_H
+#define __ASM_ARCH_BOARD_EB_H
+
+#include <asm/arch/platform.h>
+
+/*
+ * RealView EB + ARM11MPCore peripheral addresses
+ */
+#ifdef CONFIG_REALVIEW_EB_ARM11MP_REVB
+#define REALVIEW_EB11MP_SCU_BASE	0x10100000	/* SCU registers */
+#define REALVIEW_EB11MP_GIC_CPU_BASE	0x10100100	/* Generic interrupt controller CPU interface */
+#define REALVIEW_EB11MP_TWD_BASE	0x10100700
+#define REALVIEW_EB11MP_TWD_SIZE	0x00000100
+#define REALVIEW_EB11MP_GIC_DIST_BASE	0x10101000	/* Generic interrupt controller distributor */
+#define REALVIEW_EB11MP_L220_BASE	0x10102000	/* L220 registers */
+#define REALVIEW_EB11MP_SYS_PLD_CTRL1	0xD8		/* Register offset for MPCore sysctl */
+#else
+#define REALVIEW_EB11MP_SCU_BASE	0x1F000000	/* SCU registers */
+#define REALVIEW_EB11MP_GIC_CPU_BASE	0x1F000100	/* Generic interrupt controller CPU interface */
+#define REALVIEW_EB11MP_TWD_BASE	0x1F000700
+#define REALVIEW_EB11MP_TWD_SIZE	0x00000100
+#define REALVIEW_EB11MP_GIC_DIST_BASE	0x1F001000	/* Generic interrupt controller distributor */
+#define REALVIEW_EB11MP_L220_BASE	0x1F002000	/* L220 registers */
+#define REALVIEW_EB11MP_SYS_PLD_CTRL1	0x74		/* Register offset for MPCore sysctl */
+#endif
+
+#define IRQ_EB_GIC_START	32
+
+/*
+ * RealView EB interrupt sources
+ */
+#define IRQ_EB_WDOG		(IRQ_EB_GIC_START + 0)		/* Watchdog timer */
+#define IRQ_EB_SOFT		(IRQ_EB_GIC_START + 1)		/* Software interrupt */
+#define IRQ_EB_COMMRx		(IRQ_EB_GIC_START + 2)		/* Debug Comm Rx interrupt */
+#define IRQ_EB_COMMTx		(IRQ_EB_GIC_START + 3)		/* Debug Comm Tx interrupt */
+#define IRQ_EB_TIMER0_1		(IRQ_EB_GIC_START + 4)		/* Timer 0 and 1 */
+#define IRQ_EB_TIMER2_3		(IRQ_EB_GIC_START + 5)		/* Timer 2 and 3 */
+#define IRQ_EB_GPIO0		(IRQ_EB_GIC_START + 6)		/* GPIO 0 */
+#define IRQ_EB_GPIO1		(IRQ_EB_GIC_START + 7)		/* GPIO 1 */
+#define IRQ_EB_GPIO2		(IRQ_EB_GIC_START + 8)		/* GPIO 2 */
+								/* 9 reserved */
+#define IRQ_EB_RTC		(IRQ_EB_GIC_START + 10)		/* Real Time Clock */
+#define IRQ_EB_SSP		(IRQ_EB_GIC_START + 11)		/* Synchronous Serial Port */
+#define IRQ_EB_UART0		(IRQ_EB_GIC_START + 12)		/* UART 0 on development chip */
+#define IRQ_EB_UART1		(IRQ_EB_GIC_START + 13)		/* UART 1 on development chip */
+#define IRQ_EB_UART2		(IRQ_EB_GIC_START + 14)		/* UART 2 on development chip */
+#define IRQ_EB_UART3		(IRQ_EB_GIC_START + 15)		/* UART 3 on development chip */
+#define IRQ_EB_SCI		(IRQ_EB_GIC_START + 16)		/* Smart Card Interface */
+#define IRQ_EB_MMCI0A		(IRQ_EB_GIC_START + 17)		/* Multimedia Card 0A */
+#define IRQ_EB_MMCI0B		(IRQ_EB_GIC_START + 18)		/* Multimedia Card 0B */
+#define IRQ_EB_AACI		(IRQ_EB_GIC_START + 19)		/* Audio Codec */
+#define IRQ_EB_KMI0		(IRQ_EB_GIC_START + 20)		/* Keyboard/Mouse port 0 */
+#define IRQ_EB_KMI1		(IRQ_EB_GIC_START + 21)		/* Keyboard/Mouse port 1 */
+#define IRQ_EB_CHARLCD		(IRQ_EB_GIC_START + 22)		/* Character LCD */
+#define IRQ_EB_CLCD		(IRQ_EB_GIC_START + 23)		/* CLCD controller */
+#define IRQ_EB_DMA		(IRQ_EB_GIC_START + 24)		/* DMA controller */
+#define IRQ_EB_PWRFAIL		(IRQ_EB_GIC_START + 25)		/* Power failure */
+#define IRQ_EB_PISMO		(IRQ_EB_GIC_START + 26)		/* PISMO interface */
+#define IRQ_EB_DoC		(IRQ_EB_GIC_START + 27)		/* Disk on Chip memory controller */
+#define IRQ_EB_ETH		(IRQ_EB_GIC_START + 28)		/* Ethernet controller */
+#define IRQ_EB_USB		(IRQ_EB_GIC_START + 29)		/* USB controller */
+#define IRQ_EB_TSPEN		(IRQ_EB_GIC_START + 30)		/* Touchscreen pen */
+#define IRQ_EB_TSKPAD		(IRQ_EB_GIC_START + 31)		/* Touchscreen keypad */
+
+/*
+ * RealView EB + ARM11MPCore interrupt sources (primary GIC on the core tile)
+ */
+#define IRQ_EB11MP_AACI		(IRQ_EB_GIC_START + 0)
+#define IRQ_EB11MP_TIMER0_1	(IRQ_EB_GIC_START + 1)
+#define IRQ_EB11MP_TIMER2_3	(IRQ_EB_GIC_START + 2)
+#define IRQ_EB11MP_USB		(IRQ_EB_GIC_START + 3)
+#define IRQ_EB11MP_UART0	(IRQ_EB_GIC_START + 4)
+#define IRQ_EB11MP_UART1	(IRQ_EB_GIC_START + 5)
+#define IRQ_EB11MP_RTC		(IRQ_EB_GIC_START + 6)
+#define IRQ_EB11MP_KMI0		(IRQ_EB_GIC_START + 7)
+#define IRQ_EB11MP_KMI1		(IRQ_EB_GIC_START + 8)
+#define IRQ_EB11MP_ETH		(IRQ_EB_GIC_START + 9)
+#define IRQ_EB11MP_EB_IRQ1	(IRQ_EB_GIC_START + 10)		/* main GIC */
+#define IRQ_EB11MP_EB_IRQ2	(IRQ_EB_GIC_START + 11)		/* tile GIC */
+#define IRQ_EB11MP_EB_FIQ1	(IRQ_EB_GIC_START + 12)		/* main GIC */
+#define IRQ_EB11MP_EB_FIQ2	(IRQ_EB_GIC_START + 13)		/* tile GIC */
+#define IRQ_EB11MP_MMCI0A	(IRQ_EB_GIC_START + 14)
+#define IRQ_EB11MP_MMCI0B	(IRQ_EB_GIC_START + 15)
+
+#define IRQ_EB11MP_PMU_CPU0	(IRQ_EB_GIC_START + 17)
+#define IRQ_EB11MP_PMU_CPU1	(IRQ_EB_GIC_START + 18)
+#define IRQ_EB11MP_PMU_CPU2	(IRQ_EB_GIC_START + 19)
+#define IRQ_EB11MP_PMU_CPU3	(IRQ_EB_GIC_START + 20)
+#define IRQ_EB11MP_PMU_SCU0	(IRQ_EB_GIC_START + 21)
+#define IRQ_EB11MP_PMU_SCU1	(IRQ_EB_GIC_START + 22)
+#define IRQ_EB11MP_PMU_SCU2	(IRQ_EB_GIC_START + 23)
+#define IRQ_EB11MP_PMU_SCU3	(IRQ_EB_GIC_START + 24)
+#define IRQ_EB11MP_PMU_SCU4	(IRQ_EB_GIC_START + 25)
+#define IRQ_EB11MP_PMU_SCU5	(IRQ_EB_GIC_START + 26)
+#define IRQ_EB11MP_PMU_SCU6	(IRQ_EB_GIC_START + 27)
+#define IRQ_EB11MP_PMU_SCU7	(IRQ_EB_GIC_START + 28)
+
+#define IRQ_EB11MP_L220_EVENT	(IRQ_EB_GIC_START + 29)
+#define IRQ_EB11MP_L220_SLAVE	(IRQ_EB_GIC_START + 30)
+#define IRQ_EB11MP_L220_DECODE	(IRQ_EB_GIC_START + 31)
+
+#define IRQ_EB11MP_UART2	-1
+#define IRQ_EB11MP_UART3	-1
+#define IRQ_EB11MP_CLCD		-1
+#define IRQ_EB11MP_DMA		-1
+#define IRQ_EB11MP_WDOG		-1
+#define IRQ_EB11MP_GPIO0	-1
+#define IRQ_EB11MP_GPIO1	-1
+#define IRQ_EB11MP_GPIO2	-1
+#define IRQ_EB11MP_SCI		-1
+#define IRQ_EB11MP_SSP		-1
+
+#define NR_GIC_EB11MP		2
+
+/*
+ * Only define NR_IRQS if less than NR_IRQS_EB
+ */
+#define NR_IRQS_EB		(IRQ_EB_GIC_START + 96)
+
+#if defined(CONFIG_MACH_REALVIEW_EB) \
+	&& (!defined(NR_IRQS) || (NR_IRQS < NR_IRQS_EB))
+#undef NR_IRQS
+#define NR_IRQS			NR_IRQS_EB
+#endif
+
+#if defined(CONFIG_REALVIEW_EB_ARM11MP) \
+	&& (!defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_EB11MP))
+#undef MAX_GIC_NR
+#define MAX_GIC_NR		NR_GIC_EB11MP
+#endif
+
+/*
+ * Core tile identification (REALVIEW_SYS_PROCID)
+ */
+#define REALVIEW_EB_PROC_MASK		0xFF000000
+#define REALVIEW_EB_PROC_ARM7TDMI	0x00000000
+#define REALVIEW_EB_PROC_ARM9		0x02000000
+#define REALVIEW_EB_PROC_ARM11		0x04000000
+#define REALVIEW_EB_PROC_ARM11MP	0x06000000
+
+#define check_eb_proc(proc_type)						\
+	((readl(__io_address(REALVIEW_SYS_PROCID)) & REALVIEW_EB_PROC_MASK)	\
+	 == proc_type)
+
+#ifdef CONFIG_REALVIEW_EB_ARM11MP
+#define core_tile_eb11mp()	check_eb_proc(REALVIEW_EB_PROC_ARM11MP)
+#else
+#define core_tile_eb11mp()	0
+#endif
+
+#endif	/* __ASM_ARCH_BOARD_EB_H */
diff --git a/include/asm-arm/arch-realview/entry-macro.S b/include/asm-arm/arch-realview/entry-macro.S
index 3b4e207..cd26306 100644
--- a/include/asm-arm/arch-realview/entry-macro.S
+++ b/include/asm-arm/arch-realview/entry-macro.S
@@ -14,7 +14,8 @@
 		.endm
 
 		.macro  get_irqnr_preamble, base, tmp
-		ldr     \base, =IO_ADDRESS(REALVIEW_GIC_CPU_BASE)
+		ldr	\base, =gic_cpu_base_addr
+		ldr	\base, [\base]
 		.endm
 
 		.macro  arch_ret_to_user, tmp1, tmp2
diff --git a/include/asm-arm/arch-realview/hardware.h b/include/asm-arm/arch-realview/hardware.h
index aa78fe0..bad8d7c 100644
--- a/include/asm-arm/arch-realview/hardware.h
+++ b/include/asm-arm/arch-realview/hardware.h
@@ -23,7 +23,6 @@
 #define __ASM_ARCH_HARDWARE_H
 
 #include <asm/sizes.h>
-#include <asm/arch/platform.h>
 
 /* macro to get at IO space when running virtually */
 #define IO_ADDRESS(x)		((((x) & 0x0effffff) | (((x) >> 4) & 0x0f000000)) + 0xf0000000)
diff --git a/include/asm-arm/arch-realview/irqs.h b/include/asm-arm/arch-realview/irqs.h
index 5a5db56..ad0c911 100644
--- a/include/asm-arm/arch-realview/irqs.h
+++ b/include/asm-arm/arch-realview/irqs.h
@@ -19,103 +19,18 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <asm/arch/platform.h>
+#ifndef __ASM_ARCH_IRQS_H
+#define __ASM_ARCH_IRQS_H
 
-#define IRQ_LOCALTIMER			29
-#define IRQ_LOCALWDOG			30
+#include <asm/arch/board-eb.h>
 
-/* 
- *  IRQ interrupts definitions are the same the INT definitions
- *  held within platform.h
- */
+#define IRQ_LOCALTIMER		29
+#define IRQ_LOCALWDOG		30
+
 #define IRQ_GIC_START		32
-#define IRQ_WDOGINT		(IRQ_GIC_START + INT_WDOGINT)
-#define IRQ_SOFTINT		(IRQ_GIC_START + INT_SOFTINT)
-#define IRQ_COMMRx		(IRQ_GIC_START + INT_COMMRx)
-#define IRQ_COMMTx		(IRQ_GIC_START + INT_COMMTx)
-#define IRQ_TIMERINT0_1		(IRQ_GIC_START + INT_TIMERINT0_1)
-#define IRQ_TIMERINT2_3		(IRQ_GIC_START + INT_TIMERINT2_3)
-#define IRQ_GPIOINT0		(IRQ_GIC_START + INT_GPIOINT0)
-#define IRQ_GPIOINT1		(IRQ_GIC_START + INT_GPIOINT1)
-#define IRQ_GPIOINT2		(IRQ_GIC_START + INT_GPIOINT2)
-#define IRQ_GPIOINT3		(IRQ_GIC_START + INT_GPIOINT3)
-#define IRQ_RTCINT		(IRQ_GIC_START + INT_RTCINT)
-#define IRQ_SSPINT		(IRQ_GIC_START + INT_SSPINT)
-#define IRQ_UARTINT0		(IRQ_GIC_START + INT_UARTINT0)
-#define IRQ_UARTINT1		(IRQ_GIC_START + INT_UARTINT1)
-#define IRQ_UARTINT2		(IRQ_GIC_START + INT_UARTINT2)
-#define IRQ_UART3		(IRQ_GIC_START + INT_UARTINT3)
-#define IRQ_SCIINT		(IRQ_GIC_START + INT_SCIINT)
-#define IRQ_CLCDINT		(IRQ_GIC_START + INT_CLCDINT)
-#define IRQ_DMAINT		(IRQ_GIC_START + INT_DMAINT)
-#define IRQ_PWRFAILINT 		(IRQ_GIC_START + INT_PWRFAILINT)
-#define IRQ_MBXINT		(IRQ_GIC_START + INT_MBXINT)
-#define IRQ_GNDINT		(IRQ_GIC_START + INT_GNDINT)
-#define IRQ_MMCI0B 		(IRQ_GIC_START + INT_MMCI0B)
-#define IRQ_MMCI1B 		(IRQ_GIC_START + INT_MMCI1B)
-#define IRQ_KMI0		(IRQ_GIC_START + INT_KMI0)
-#define IRQ_KMI1		(IRQ_GIC_START + INT_KMI1)
-#define IRQ_SCI3		(IRQ_GIC_START + INT_SCI3)
-#define IRQ_CLCD		(IRQ_GIC_START + INT_CLCD)
-#define IRQ_TOUCH		(IRQ_GIC_START + INT_TOUCH)
-#define IRQ_KEYPAD 		(IRQ_GIC_START + INT_KEYPAD)
-#define IRQ_DoC			(IRQ_GIC_START + INT_DoC)
-#define IRQ_MMCI0A 		(IRQ_GIC_START + INT_MMCI0A)
-#define IRQ_MMCI1A 		(IRQ_GIC_START + INT_MMCI1A)
-#define IRQ_AACI		(IRQ_GIC_START + INT_AACI)
-#define IRQ_ETH			(IRQ_GIC_START + INT_ETH)
-#define IRQ_USB			(IRQ_GIC_START + INT_USB)
-#define IRQ_PMU_CPU0		(IRQ_GIC_START + INT_PMU_CPU0)
-#define IRQ_PMU_CPU1		(IRQ_GIC_START + INT_PMU_CPU1)
-#define IRQ_PMU_CPU2		(IRQ_GIC_START + INT_PMU_CPU2)
-#define IRQ_PMU_CPU3		(IRQ_GIC_START + INT_PMU_CPU3)
-#define IRQ_PMU_SCU0		(IRQ_GIC_START + INT_PMU_SCU0)
-#define IRQ_PMU_SCU1		(IRQ_GIC_START + INT_PMU_SCU1)
-#define IRQ_PMU_SCU2		(IRQ_GIC_START + INT_PMU_SCU2)
-#define IRQ_PMU_SCU3		(IRQ_GIC_START + INT_PMU_SCU3)
-#define IRQ_PMU_SCU4		(IRQ_GIC_START + INT_PMU_SCU4)
-#define IRQ_PMU_SCU5		(IRQ_GIC_START + INT_PMU_SCU5)
-#define IRQ_PMU_SCU6		(IRQ_GIC_START + INT_PMU_SCU6)
-#define IRQ_PMU_SCU7		(IRQ_GIC_START + INT_PMU_SCU7)
 
-#define IRQ_EB_IRQ1		(IRQ_GIC_START + INT_EB_IRQ1)
-#define IRQ_EB_IRQ2		(IRQ_GIC_START + INT_EB_IRQ2)
+#ifndef NR_IRQS
+#error "NR_IRQS not defined by the board-specific files"
+#endif
 
-#define IRQMASK_WDOGINT		INTMASK_WDOGINT
-#define IRQMASK_SOFTINT		INTMASK_SOFTINT
-#define IRQMASK_COMMRx 		INTMASK_COMMRx
-#define IRQMASK_COMMTx 		INTMASK_COMMTx
-#define IRQMASK_TIMERINT0_1	INTMASK_TIMERINT0_1
-#define IRQMASK_TIMERINT2_3	INTMASK_TIMERINT2_3
-#define IRQMASK_GPIOINT0	INTMASK_GPIOINT0
-#define IRQMASK_GPIOINT1	INTMASK_GPIOINT1
-#define IRQMASK_GPIOINT2	INTMASK_GPIOINT2
-#define IRQMASK_GPIOINT3	INTMASK_GPIOINT3
-#define IRQMASK_RTCINT 		INTMASK_RTCINT
-#define IRQMASK_SSPINT 		INTMASK_SSPINT
-#define IRQMASK_UARTINT0	INTMASK_UARTINT0
-#define IRQMASK_UARTINT1	INTMASK_UARTINT1
-#define IRQMASK_UARTINT2	INTMASK_UARTINT2
-#define IRQMASK_SCIINT 		INTMASK_SCIINT
-#define IRQMASK_CLCDINT		INTMASK_CLCDINT
-#define IRQMASK_DMAINT 		INTMASK_DMAINT
-#define IRQMASK_PWRFAILINT	INTMASK_PWRFAILINT
-#define IRQMASK_MBXINT 		INTMASK_MBXINT
-#define IRQMASK_GNDINT 		INTMASK_GNDINT
-#define IRQMASK_MMCI0B		INTMASK_MMCI0B
-#define IRQMASK_MMCI1B		INTMASK_MMCI1B
-#define IRQMASK_KMI0		INTMASK_KMI0
-#define IRQMASK_KMI1		INTMASK_KMI1
-#define IRQMASK_SCI3		INTMASK_SCI3
-#define IRQMASK_UART3		INTMASK_UART3
-#define IRQMASK_CLCD		INTMASK_CLCD
-#define IRQMASK_TOUCH		INTMASK_TOUCH
-#define IRQMASK_KEYPAD		INTMASK_KEYPAD
-#define IRQMASK_DoC		INTMASK_DoC
-#define IRQMASK_MMCI0A		INTMASK_MMCI0A
-#define IRQMASK_MMCI1A		INTMASK_MMCI1A
-#define IRQMASK_AACI		INTMASK_AACI
-#define IRQMASK_ETH		INTMASK_ETH
-#define IRQMASK_USB		INTMASK_USB
-
-#define NR_IRQS			(IRQ_GIC_START + 96)
+#endif
diff --git a/include/asm-arm/arch-realview/platform.h b/include/asm-arm/arch-realview/platform.h
index 6e0eab9..4fd351b 100644
--- a/include/asm-arm/arch-realview/platform.h
+++ b/include/asm-arm/arch-realview/platform.h
@@ -18,8 +18,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#ifndef __address_h
-#define __address_h                     1
+#ifndef __ASM_ARCH_PLATFORM_H
+#define __ASM_ARCH_PLATFORM_H
 
 /*
  * Memory definitions
@@ -81,11 +81,12 @@
 #define REALVIEW_SYS_24MHz_OFFSET            0x5C
 #define REALVIEW_SYS_MISC_OFFSET             0x60
 #define REALVIEW_SYS_IOSEL_OFFSET            0x70
-#define REALVIEW_SYS_TEST_OSC0_OFFSET        0x80
-#define REALVIEW_SYS_TEST_OSC1_OFFSET        0x84
-#define REALVIEW_SYS_TEST_OSC2_OFFSET        0x88
-#define REALVIEW_SYS_TEST_OSC3_OFFSET        0x8C
-#define REALVIEW_SYS_TEST_OSC4_OFFSET        0x90
+#define REALVIEW_SYS_PROCID_OFFSET           0x84
+#define REALVIEW_SYS_TEST_OSC0_OFFSET        0xC0
+#define REALVIEW_SYS_TEST_OSC1_OFFSET        0xC4
+#define REALVIEW_SYS_TEST_OSC2_OFFSET        0xC8
+#define REALVIEW_SYS_TEST_OSC3_OFFSET        0xCC
+#define REALVIEW_SYS_TEST_OSC4_OFFSET        0xD0
 
 #define REALVIEW_SYS_BASE                    0x10000000
 #define REALVIEW_SYS_ID                      (REALVIEW_SYS_BASE + REALVIEW_SYS_ID_OFFSET)
@@ -114,6 +115,7 @@
 #define REALVIEW_SYS_24MHz                   (REALVIEW_SYS_BASE + REALVIEW_SYS_24MHz_OFFSET)
 #define REALVIEW_SYS_MISC                    (REALVIEW_SYS_BASE + REALVIEW_SYS_MISC_OFFSET)
 #define REALVIEW_SYS_IOSEL                   (REALVIEW_SYS_BASE + REALVIEW_SYS_IOSEL_OFFSET)
+#define REALVIEW_SYS_PROCID                  (REALVIEW_SYS_BASE + REALVIEW_SYS_PROCID_OFFSET)
 #define REALVIEW_SYS_TEST_OSC0               (REALVIEW_SYS_BASE + REALVIEW_SYS_TEST_OSC0_OFFSET)
 #define REALVIEW_SYS_TEST_OSC1               (REALVIEW_SYS_BASE + REALVIEW_SYS_TEST_OSC1_OFFSET)
 #define REALVIEW_SYS_TEST_OSC2               (REALVIEW_SYS_BASE + REALVIEW_SYS_TEST_OSC2_OFFSET)
@@ -203,30 +205,8 @@
 	/* Reserved 0x1001A000 - 0x1001FFFF */
 #define REALVIEW_CLCD_BASE            0x10020000	/* CLCD */
 #define REALVIEW_DMAC_BASE            0x10030000	/* DMA controller */
-#ifndef CONFIG_REALVIEW_MPCORE
 #define REALVIEW_GIC_CPU_BASE         0x10040000	/* Generic interrupt controller CPU interface */
 #define REALVIEW_GIC_DIST_BASE        0x10041000	/* Generic interrupt controller distributor */
-#else
-#ifdef CONFIG_REALVIEW_MPCORE_REVB
-#define REALVIEW_MPCORE_SCU_BASE	0x10100000	/*  SCU registers */
-#define REALVIEW_GIC_CPU_BASE		0x10100100	/* Generic interrupt controller CPU interface */
-#define REALVIEW_TWD_BASE		0x10100700
-#define REALVIEW_TWD_SIZE		0x00000100
-#define REALVIEW_GIC_DIST_BASE		0x10101000	/* Generic interrupt controller distributor */
-#define REALVIEW_MPCORE_L220_BASE	0x10102000	/* L220 registers */
-#define REALVIEW_MPCORE_SYS_PLD_CTRL1 0xD8		/*  Register offset for MPCore sysctl */
-#else
-#define REALVIEW_MPCORE_SCU_BASE      0x1F000000	/*  SCU registers */
-#define REALVIEW_GIC_CPU_BASE         0x1F000100	/* Generic interrupt controller CPU interface */
-#define REALVIEW_TWD_BASE             0x1F000700
-#define REALVIEW_TWD_SIZE             0x00000100
-#define REALVIEW_GIC_DIST_BASE        0x1F001000	/* Generic interrupt controller distributor */
-#define REALVIEW_MPCORE_L220_BASE     0x1F002000	/* L220 registers */
-#define REALVIEW_MPCORE_SYS_PLD_CTRL1 0x74		/*  Register offset for MPCore sysctl */
-#endif
-#define REALVIEW_GIC1_CPU_BASE        0x10040000	/* Generic interrupt controller CPU interface */
-#define REALVIEW_GIC1_DIST_BASE       0x10041000	/* Generic interrupt controller distributor */
-#endif
 #define REALVIEW_SMC_BASE             0x10080000	/* SMC */
 	/* Reserved 0x10090000 - 0x100EFFFF */
 
@@ -283,134 +263,6 @@
 #define REALVIEW_INTREG_OFFSET		0x8	/* Interrupt control */
 #define REALVIEW_DECODE_OFFSET		0xC	/* Fitted logic modules */
 
-/* ------------------------------------------------------------------------
- *  Interrupts - bit assignment (primary)
- * ------------------------------------------------------------------------
- */
-#ifndef CONFIG_REALVIEW_MPCORE
-#define INT_WDOGINT			0	/* Watchdog timer */
-#define INT_SOFTINT			1	/* Software interrupt */
-#define INT_COMMRx			2	/* Debug Comm Rx interrupt */
-#define INT_COMMTx			3	/* Debug Comm Tx interrupt */
-#define INT_TIMERINT0_1			4	/* Timer 0 and 1 */
-#define INT_TIMERINT2_3			5	/* Timer 2 and 3 */
-#define INT_GPIOINT0			6	/* GPIO 0 */
-#define INT_GPIOINT1			7	/* GPIO 1 */
-#define INT_GPIOINT2			8	/* GPIO 2 */
-/* 9 reserved */
-#define INT_RTCINT			10	/* Real Time Clock */
-#define INT_SSPINT			11	/* Synchronous Serial Port */
-#define INT_UARTINT0			12	/* UART 0 on development chip */
-#define INT_UARTINT1			13	/* UART 1 on development chip */
-#define INT_UARTINT2			14	/* UART 2 on development chip */
-#define INT_UARTINT3			15	/* UART 3 on development chip */
-#define INT_SCIINT			16	/* Smart Card Interface */
-#define INT_MMCI0A			17	/* Multimedia Card 0A */
-#define INT_MMCI0B			18	/* Multimedia Card 0B */
-#define INT_AACI			19	/* Audio Codec */
-#define INT_KMI0			20	/* Keyboard/Mouse port 0 */
-#define INT_KMI1			21	/* Keyboard/Mouse port 1 */
-#define INT_CHARLCD			22	/* Character LCD */
-#define INT_CLCDINT			23	/* CLCD controller */
-#define INT_DMAINT			24	/* DMA controller */
-#define INT_PWRFAILINT			25	/* Power failure */
-#define INT_PISMO			26
-#define INT_DoC				27	/* Disk on Chip memory controller */
-#define INT_ETH				28	/* Ethernet controller */
-#define INT_USB				29	/* USB controller */
-#define INT_TSPENINT			30	/* Touchscreen pen */
-#define INT_TSKPADINT			31	/* Touchscreen keypad */
-
-#else
-
-#define MAX_GIC_NR			2
-
-#define INT_AACI			0
-#define INT_TIMERINT0_1			1
-#define INT_TIMERINT2_3			2
-#define INT_USB				3
-#define INT_UARTINT0			4
-#define INT_UARTINT1			5
-#define INT_RTCINT			6
-#define INT_KMI0			7
-#define INT_KMI1			8
-#define INT_ETH				9
-#define INT_EB_IRQ1			10	/* main GIC */
-#define INT_EB_IRQ2			11	/* tile GIC */
-#define INT_EB_FIQ1			12	/* main GIC */
-#define INT_EB_FIQ2			13	/* tile GIC */
-#define INT_MMCI0A			14
-#define INT_MMCI0B			15
-
-#define INT_PMU_CPU0			17
-#define INT_PMU_CPU1			18
-#define INT_PMU_CPU2			19
-#define INT_PMU_CPU3			20
-#define INT_PMU_SCU0			21
-#define INT_PMU_SCU1			22
-#define INT_PMU_SCU2			23
-#define INT_PMU_SCU3			24
-#define INT_PMU_SCU4			25
-#define INT_PMU_SCU5			26
-#define INT_PMU_SCU6			27
-#define INT_PMU_SCU7			28
-
-#define INT_L220_EVENT			29
-#define INT_L220_SLAVE			30
-#define INT_L220_DECODE			31
-
-#define INT_UARTINT2			-1
-#define INT_UARTINT3			-1
-#define INT_CLCDINT			-1
-#define INT_DMAINT			-1
-#define INT_WDOGINT			-1
-#define INT_GPIOINT0			-1
-#define INT_GPIOINT1			-1
-#define INT_GPIOINT2			-1
-#define INT_SCIINT			-1
-#define INT_SSPINT			-1
-#endif
-
-/* 
- *  Interrupt bit positions
- * 
- */
-#define INTMASK_WDOGINT			(1 << INT_WDOGINT)
-#define INTMASK_SOFTINT			(1 << INT_SOFTINT)
-#define INTMASK_COMMRx			(1 << INT_COMMRx)
-#define INTMASK_COMMTx			(1 << INT_COMMTx)
-#define INTMASK_TIMERINT0_1		(1 << INT_TIMERINT0_1)
-#define INTMASK_TIMERINT2_3		(1 << INT_TIMERINT2_3)
-#define INTMASK_GPIOINT0		(1 << INT_GPIOINT0)
-#define INTMASK_GPIOINT1		(1 << INT_GPIOINT1)
-#define INTMASK_GPIOINT2		(1 << INT_GPIOINT2)
-#define INTMASK_RTCINT			(1 << INT_RTCINT)
-#define INTMASK_SSPINT			(1 << INT_SSPINT)
-#define INTMASK_UARTINT0		(1 << INT_UARTINT0)
-#define INTMASK_UARTINT1		(1 << INT_UARTINT1)
-#define INTMASK_UARTINT2		(1 << INT_UARTINT2)
-#define INTMASK_UARTINT3		(1 << INT_UARTINT3)
-#define INTMASK_SCIINT			(1 << INT_SCIINT)
-#define INTMASK_MMCI0A			(1 << INT_MMCI0A)
-#define INTMASK_MMCI0B			(1 << INT_MMCI0B)
-#define INTMASK_AACI			(1 << INT_AACI)
-#define INTMASK_KMI0			(1 << INT_KMI0)
-#define INTMASK_KMI1			(1 << INT_KMI1)
-#define INTMASK_CHARLCD			(1 << INT_CHARLCD)
-#define INTMASK_CLCDINT			(1 << INT_CLCDINT)
-#define INTMASK_DMAINT			(1 << INT_DMAINT)
-#define INTMASK_PWRFAILINT		(1 << INT_PWRFAILINT)
-#define INTMASK_PISMO			(1 << INT_PISMO)
-#define INTMASK_DoC			(1 << INT_DoC)
-#define INTMASK_ETH			(1 << INT_ETH)
-#define INTMASK_USB			(1 << INT_USB)
-#define INTMASK_TSPENINT		(1 << INT_TSPENINT)
-#define INTMASK_TSKPADINT		(1 << INT_TSKPADINT)
-
-#define MAXIRQNUM                       31
-#define MAXFIQNUM                       31
-#define MAXSWINUM                       31
-
 /* 
  *  Application Flash
  * 
@@ -463,6 +315,4 @@
 #define REALVIEW_CSR_BASE             0x10000000
 #define REALVIEW_CSR_SIZE             0x10000000
 
-#endif
-
-/* 	END */
+#endif	/* __ASM_ARCH_PLATFORM_H */
diff --git a/include/asm-arm/arch-realview/scu.h b/include/asm-arm/arch-realview/scu.h
index cc29364..08b3db8 100644
--- a/include/asm-arm/arch-realview/scu.h
+++ b/include/asm-arm/arch-realview/scu.h
@@ -1,8 +1,8 @@
 #ifndef __ASMARM_ARCH_SCU_H
 #define __ASMARM_ARCH_SCU_H
 
-#include <asm/arch/platform.h>
+#include <asm/arch/board-eb.h>
 
-#define SCU_BASE	REALVIEW_MPCORE_SCU_BASE
+#define SCU_BASE	REALVIEW_EB11MP_SCU_BASE
 
 #endif
diff --git a/include/asm-arm/arch-realview/uncompress.h b/include/asm-arm/arch-realview/uncompress.h
index f05631d..3d5c2db 100644
--- a/include/asm-arm/arch-realview/uncompress.h
+++ b/include/asm-arm/arch-realview/uncompress.h
@@ -19,6 +19,8 @@
  */
 #include <asm/hardware.h>
 
+#include <asm/arch/platform.h>
+
 #define AMBA_UART_DR	(*(volatile unsigned char *) (REALVIEW_UART0_BASE + 0x00))
 #define AMBA_UART_LCRH	(*(volatile unsigned char *) (REALVIEW_UART0_BASE + 0x2c))
 #define AMBA_UART_CR	(*(volatile unsigned char *) (REALVIEW_UART0_BASE + 0x30))
diff --git a/include/asm-arm/hardware/arm_twd.h b/include/asm-arm/hardware/arm_twd.h
index 131d5b4..e521b70 100644
--- a/include/asm-arm/hardware/arm_twd.h
+++ b/include/asm-arm/hardware/arm_twd.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_HARDWARE_TWD_H
 #define __ASM_HARDWARE_TWD_H
 
-#define TWD_TIMER_LOAD 		0x00
+#define TWD_TIMER_LOAD 			0x00
 #define TWD_TIMER_COUNTER		0x04
 #define TWD_TIMER_CONTROL		0x08
 #define TWD_TIMER_INTSTAT		0x0C
@@ -13,4 +13,9 @@
 #define TWD_WDOG_RESETSTAT		0x30
 #define TWD_WDOG_DISABLE		0x34
 
+#define TWD_TIMER_CONTROL_ENABLE	(1 << 0)
+#define TWD_TIMER_CONTROL_ONESHOT	(0 << 1)
+#define TWD_TIMER_CONTROL_PERIODIC	(1 << 1)
+#define TWD_TIMER_CONTROL_IT_ENABLE	(1 << 2)
+
 #endif
diff --git a/include/asm-arm/kexec.h b/include/asm-arm/kexec.h
index 46dcc4d..1ee17b6 100644
--- a/include/asm-arm/kexec.h
+++ b/include/asm-arm/kexec.h
@@ -16,6 +16,9 @@
 
 #define KEXEC_BOOT_PARAMS_SIZE 1536
 
+#define KEXEC_ARM_ATAGS_OFFSET  0x1000
+#define KEXEC_ARM_ZIMAGE_OFFSET 0x8000
+
 #ifndef __ASSEMBLY__
 
 struct kimage;
diff --git a/include/asm-arm/pgalloc.h b/include/asm-arm/pgalloc.h
index 4d43945..fb6c6e3 100644
--- a/include/asm-arm/pgalloc.h
+++ b/include/asm-arm/pgalloc.h
@@ -27,14 +27,14 @@
  * Since we have only two-level page tables, these are trivial
  */
 #define pmd_alloc_one(mm,addr)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(pmd)			do { } while (0)
+#define pmd_free(mm, pmd)		do { } while (0)
 #define pgd_populate(mm,pmd,pte)	BUG()
 
 extern pgd_t *get_pgd_slow(struct mm_struct *mm);
-extern void free_pgd_slow(pgd_t *pgd);
+extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
 
 #define pgd_alloc(mm)			get_pgd_slow(mm)
-#define pgd_free(pgd)			free_pgd_slow(pgd)
+#define pgd_free(mm, pgd)		free_pgd_slow(mm, pgd)
 
 /*
  * Allocate one PTE table.
@@ -83,7 +83,7 @@
 /*
  * Free one PTE table.
  */
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	if (pte) {
 		pte -= PTRS_PER_PTE;
@@ -91,7 +91,7 @@
 	}
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	__free_page(pte);
 }
diff --git a/include/asm-arm/smp.h b/include/asm-arm/smp.h
index f67acce..af99636 100644
--- a/include/asm-arm/smp.h
+++ b/include/asm-arm/smp.h
@@ -61,6 +61,11 @@
 extern void smp_send_timer(void);
 
 /*
+ * Broadcast a clock event to other CPUs.
+ */
+extern void smp_timer_broadcast(cpumask_t mask);
+
+/*
  * Boot a secondary CPU, and assign it the specified idle task.
  * This also gives us the initial stack to use for this CPU.
  */
@@ -96,11 +101,12 @@
 extern int platform_cpu_kill(unsigned int cpu);
 extern void platform_cpu_enable(unsigned int cpu);
 
-#ifdef CONFIG_LOCAL_TIMERS
 /*
- * Setup a local timer interrupt for a CPU.
+ * Local timer interrupt handling function (can be IPI'ed).
  */
-extern void local_timer_setup(unsigned int cpu);
+extern void local_timer_interrupt(void);
+
+#ifdef CONFIG_LOCAL_TIMERS
 
 /*
  * Stop a local timer interrupt.
@@ -114,10 +120,6 @@
 
 #else
 
-static inline void local_timer_setup(unsigned int cpu)
-{
-}
-
 static inline void local_timer_stop(unsigned int cpu)
 {
 }
@@ -125,6 +127,11 @@
 #endif
 
 /*
+ * Setup a local timer interrupt for a CPU.
+ */
+extern void local_timer_setup(unsigned int cpu);
+
+/*
  * show local interrupt info
  */
 extern void show_local_irqs(struct seq_file *);
diff --git a/include/asm-arm/tlb.h b/include/asm-arm/tlb.h
index cb74002..36bd402 100644
--- a/include/asm-arm/tlb.h
+++ b/include/asm-arm/tlb.h
@@ -85,8 +85,8 @@
 }
 
 #define tlb_remove_page(tlb,page)	free_page_and_swap_cache(page)
-#define pte_free_tlb(tlb,ptep)		pte_free(ptep)
-#define pmd_free_tlb(tlb,pmdp)		pmd_free(pmdp)
+#define pte_free_tlb(tlb, ptep)		pte_free((tlb)->mm, ptep)
+#define pmd_free_tlb(tlb, pmdp)		pmd_free((tlb)->mm, pmdp)
 
 #define tlb_migrate_finish(mm)		do { } while (0)
 
diff --git a/include/asm-avr32/arch-at32ap/at32ap700x.h b/include/asm-avr32/arch-at32ap/at32ap700x.h
index 99684d6..31e48b0 100644
--- a/include/asm-avr32/arch-at32ap/at32ap700x.h
+++ b/include/asm-avr32/arch-at32ap/at32ap700x.h
@@ -13,8 +13,6 @@
 #define GPIO_PERIPH_A	0
 #define GPIO_PERIPH_B	1
 
-#define NR_GPIO_CONTROLLERS	4
-
 /*
  * Pin numbers identifying specific GPIO pins on the chip. They can
  * also be converted to IRQ numbers by passing them through
diff --git a/include/asm-avr32/arch-at32ap/gpio.h b/include/asm-avr32/arch-at32ap/gpio.h
index af7f953..0180f58 100644
--- a/include/asm-avr32/arch-at32ap/gpio.h
+++ b/include/asm-avr32/arch-at32ap/gpio.h
@@ -5,20 +5,36 @@
 #include <asm/irq.h>
 
 
-/* Arch-neutral GPIO API */
-int __must_check gpio_request(unsigned int gpio, const char *label);
-void gpio_free(unsigned int gpio);
+/* Some GPIO chips can manage IRQs; some can't.  The exact numbers can
+ * be changed if needed, but for the moment they're not configurable.
+ */
+#define ARCH_NR_GPIOS	(NR_GPIO_IRQS + 2 * 32)
 
-int gpio_direction_input(unsigned int gpio);
-int gpio_direction_output(unsigned int gpio, int value);
-int gpio_get_value(unsigned int gpio);
-void gpio_set_value(unsigned int gpio, int value);
 
-#include <asm-generic/gpio.h>		/* cansleep wrappers */
+/* Arch-neutral GPIO API, supporting both "native" and external GPIOs. */
+#include <asm-generic/gpio.h>
+
+static inline int gpio_get_value(unsigned int gpio)
+{
+	return __gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned int gpio, int value)
+{
+	__gpio_set_value(gpio, value);
+}
+
+static inline int gpio_cansleep(unsigned int gpio)
+{
+	return __gpio_cansleep(gpio);
+}
+
 
 static inline int gpio_to_irq(unsigned int gpio)
 {
-	return gpio + GPIO_IRQ_BASE;
+	if (gpio < NR_GPIO_IRQS)
+		return gpio + GPIO_IRQ_BASE;
+	return -EINVAL;
 }
 
 static inline int irq_to_gpio(unsigned int irq)
diff --git a/include/asm-avr32/arch-at32ap/irq.h b/include/asm-avr32/arch-at32ap/irq.h
index 5adffab..608e350 100644
--- a/include/asm-avr32/arch-at32ap/irq.h
+++ b/include/asm-avr32/arch-at32ap/irq.h
@@ -3,11 +3,11 @@
 
 #define EIM_IRQ_BASE	NR_INTERNAL_IRQS
 #define NR_EIM_IRQS	32
-
 #define AT32_EXTINT(n)	(EIM_IRQ_BASE + (n))
 
 #define GPIO_IRQ_BASE	(EIM_IRQ_BASE + NR_EIM_IRQS)
-#define NR_GPIO_IRQS	(5 * 32)
+#define NR_GPIO_CTLR	(5 /*internal*/ + 1 /*external*/)
+#define NR_GPIO_IRQS	(NR_GPIO_CTLR * 32)
 
 #define NR_IRQS		(GPIO_IRQ_BASE + NR_GPIO_IRQS)
 
diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h
index 0e680f4..b77e364 100644
--- a/include/asm-avr32/pgalloc.h
+++ b/include/asm-avr32/pgalloc.h
@@ -30,7 +30,7 @@
 	return kcalloc(USER_PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL);
 }
 
-static inline void pgd_free(pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	kfree(pgd);
 }
@@ -55,12 +55,12 @@
 	return pte;
 }
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	__free_page(pte);
 }
diff --git a/include/asm-cris/bitops.h b/include/asm-cris/bitops.h
index e2f49c2..75ea6e0 100644
--- a/include/asm-cris/bitops.h
+++ b/include/asm-cris/bitops.h
@@ -24,13 +24,6 @@
 #include <linux/compiler.h>
 
 /*
- * Some hacks to defeat gcc over-optimizations..
- */
-struct __dummy { unsigned long a[100]; };
-#define ADDR (*(struct __dummy *) addr)
-#define CONST_ADDR (*(const struct __dummy *) addr)
-
-/*
  * set_bit - Atomically set a bit in memory
  * @nr: the bit to set
  * @addr: the address to start counting from
diff --git a/include/asm-cris/pgalloc.h b/include/asm-cris/pgalloc.h
index deaddfe..8ddd66f 100644
--- a/include/asm-cris/pgalloc.h
+++ b/include/asm-cris/pgalloc.h
@@ -16,7 +16,7 @@
 	return (pgd_t *)get_zeroed_page(GFP_KERNEL);
 }
 
-static inline void pgd_free (pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	free_page((unsigned long)pgd);
 }
@@ -34,12 +34,12 @@
 	return pte;
 }
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	__free_page(pte);
 }
diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h
index bcb2df6..2e8966c 100644
--- a/include/asm-frv/dma-mapping.h
+++ b/include/asm-frv/dma-mapping.h
@@ -17,16 +17,6 @@
 void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle);
 
 /*
- * These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns, or alternatively stop on the first sg_dma_len(sg) which
- * is 0.
- */
-#define sg_dma_address(sg)	((sg)->dma_address)
-#define sg_dma_len(sg)		((sg)->length)
-
-/*
  * Map a single buffer of the indicated size for DMA in streaming mode.
  * The 32-bit bus address to use is returned.
  *
diff --git a/include/asm-frv/page.h b/include/asm-frv/page.h
index 213d92f..bd9bd2d 100644
--- a/include/asm-frv/page.h
+++ b/include/asm-frv/page.h
@@ -76,10 +76,6 @@
 
 #endif /* __ASSEMBLY__ */
 
-#ifdef CONFIG_CONTIGUOUS_PAGE_ALLOC
-#define WANT_PAGE_VIRTUAL	1
-#endif
-
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
diff --git a/include/asm-frv/pgalloc.h b/include/asm-frv/pgalloc.h
index ce982a6..e89620e 100644
--- a/include/asm-frv/pgalloc.h
+++ b/include/asm-frv/pgalloc.h
@@ -31,18 +31,18 @@
  */
 
 extern pgd_t *pgd_alloc(struct mm_struct *);
-extern void pgd_free(pgd_t *);
+extern void pgd_free(struct mm_struct *mm, pgd_t *);
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
 
 extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	__free_page(pte);
 }
@@ -55,7 +55,7 @@
  * (In the PAE case we free the pmds as part of the pgd.)
  */
 #define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *) 2); })
-#define pmd_free(x)			do { } while (0)
+#define pmd_free(mm, x)			do { } while (0)
 #define __pmd_free_tlb(tlb,x)		do { } while (0)
 
 #endif /* CONFIG_MMU */
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index 3c402af..6c0682e 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -226,7 +226,7 @@
  * inside the pgd, so has no extra memory associated with it.
  */
 #define pud_alloc_one(mm, address)		NULL
-#define pud_free(x)				do { } while (0)
+#define pud_free(mm, x)				do { } while (0)
 #define __pud_free_tlb(tlb, x)			do { } while (0)
 
 /*
diff --git a/include/asm-frv/scatterlist.h b/include/asm-frv/scatterlist.h
index 2e7143b..4bca8a2 100644
--- a/include/asm-frv/scatterlist.h
+++ b/include/asm-frv/scatterlist.h
@@ -31,6 +31,16 @@
 	unsigned int	length;
 };
 
+/*
+ * These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns, or alternatively stop on the first sg_dma_len(sg) which
+ * is 0.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#define sg_dma_len(sg)		((sg)->length)
+
 #define ISA_DMA_THRESHOLD (0xffffffffUL)
 
 #endif /* !_ASM_SCATTERLIST_H */
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index 7b88d39..9d40e87 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -28,7 +28,7 @@
 
 #undef pud_free_tlb
 #define pud_free_tlb(tlb, x)            do { } while (0)
-#define pud_free(x)			do { } while (0)
+#define pud_free(mm, x)			do { } while (0)
 #define __pud_free_tlb(tlb, x)		do { } while (0)
 
 #undef  pud_addr_end
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 2d0aab1..f29a502 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -1,6 +1,102 @@
 #ifndef _ASM_GENERIC_GPIO_H
 #define _ASM_GENERIC_GPIO_H
 
+#ifdef CONFIG_HAVE_GPIO_LIB
+
+/* Platforms may implement their GPIO interface with library code,
+ * at a small performance cost for non-inlined operations and some
+ * extra memory (for code and for per-GPIO table entries).
+ *
+ * While the GPIO programming interface defines valid GPIO numbers
+ * to be in the range 0..MAX_INT, this library restricts them to the
+ * smaller range 0..ARCH_NR_GPIOS.
+ */
+
+#ifndef ARCH_NR_GPIOS
+#define ARCH_NR_GPIOS		256
+#endif
+
+struct seq_file;
+
+/**
+ * struct gpio_chip - abstract a GPIO controller
+ * @label: for diagnostics
+ * @direction_input: configures signal "offset" as input, or returns error
+ * @get: returns value for signal "offset"; for output signals this
+ *	returns either the value actually sensed, or zero
+ * @direction_output: configures signal "offset" as output, or returns error
+ * @set: assigns output value for signal "offset"
+ * @dbg_show: optional routine to show contents in debugfs; default code
+ *	will be used when this is omitted, but custom code can show extra
+ *	state (such as pullup/pulldown configuration).
+ * @base: identifies the first GPIO number handled by this chip; or, if
+ *	negative during registration, requests dynamic ID allocation.
+ * @ngpio: the number of GPIOs handled by this controller; the last GPIO
+ *	handled is (base + ngpio - 1).
+ * @can_sleep: flag must be set iff get()/set() methods sleep, as they
+ *	must while accessing GPIO expander chips over I2C or SPI
+ *
+ * A gpio_chip can help platforms abstract various sources of GPIOs so
+ * they can all be accessed through a common programing interface.
+ * Example sources would be SOC controllers, FPGAs, multifunction
+ * chips, dedicated GPIO expanders, and so on.
+ *
+ * Each chip controls a number of signals, identified in method calls
+ * by "offset" values in the range 0..(@ngpio - 1).  When those signals
+ * are referenced through calls like gpio_get_value(gpio), the offset
+ * is calculated by subtracting @base from the gpio number.
+ */
+struct gpio_chip {
+	char			*label;
+
+	int			(*direction_input)(struct gpio_chip *chip,
+						unsigned offset);
+	int			(*get)(struct gpio_chip *chip,
+						unsigned offset);
+	int			(*direction_output)(struct gpio_chip *chip,
+						unsigned offset, int value);
+	void			(*set)(struct gpio_chip *chip,
+						unsigned offset, int value);
+	void			(*dbg_show)(struct seq_file *s,
+						struct gpio_chip *chip);
+	int			base;
+	u16			ngpio;
+	unsigned		can_sleep:1;
+};
+
+extern const char *gpiochip_is_requested(struct gpio_chip *chip,
+			unsigned offset);
+
+/* add/remove chips */
+extern int gpiochip_add(struct gpio_chip *chip);
+extern int __must_check gpiochip_remove(struct gpio_chip *chip);
+
+
+/* Always use the library code for GPIO management calls,
+ * or when sleeping may be involved.
+ */
+extern int gpio_request(unsigned gpio, const char *label);
+extern void gpio_free(unsigned gpio);
+
+extern int gpio_direction_input(unsigned gpio);
+extern int gpio_direction_output(unsigned gpio, int value);
+
+extern int gpio_get_value_cansleep(unsigned gpio);
+extern void gpio_set_value_cansleep(unsigned gpio, int value);
+
+
+/* A platform's <asm/gpio.h> code may want to inline the I/O calls when
+ * the GPIO is constant and refers to some always-present controller,
+ * giving direct access to chip registers and tight bitbanging loops.
+ */
+extern int __gpio_get_value(unsigned gpio);
+extern void __gpio_set_value(unsigned gpio, int value);
+
+extern int __gpio_cansleep(unsigned gpio);
+
+
+#else
+
 /* platforms that don't directly support access to GPIOs through I2C, SPI,
  * or other blocking infrastructure can use these wrappers.
  */
@@ -22,4 +118,6 @@
 	gpio_set_value(gpio, value);
 }
 
+#endif
+
 #endif /* _ASM_GENERIC_GPIO_H */
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index 29ff5d8..087325e 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -54,7 +54,7 @@
  * inside the pud, so has no extra memory associated with it.
  */
 #define pmd_alloc_one(mm, address)		NULL
-#define pmd_free(x)				do { } while (0)
+#define pmd_free(mm, x)				do { } while (0)
 #define __pmd_free_tlb(tlb, x)			do { } while (0)
 
 #undef  pmd_addr_end
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index 5664645..87cf449 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -51,7 +51,7 @@
  * inside the pgd, so has no extra memory associated with it.
  */
 #define pud_alloc_one(mm, address)		NULL
-#define pud_free(x)				do { } while (0)
+#define pud_free(mm, x)				do { } while (0)
 #define __pud_free_tlb(tlb, x)			do { } while (0)
 
 #undef  pud_addr_end
diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h
index 67552ca..556d988 100644
--- a/include/asm-ia64/pgalloc.h
+++ b/include/asm-ia64/pgalloc.h
@@ -27,7 +27,7 @@
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
 
-static inline void pgd_free(pgd_t * pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	quicklist_free(0, NULL, pgd);
 }
@@ -44,11 +44,11 @@
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
 
-static inline void pud_free(pud_t * pud)
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
 	quicklist_free(0, NULL, pud);
 }
-#define __pud_free_tlb(tlb, pud)	pud_free(pud)
+#define __pud_free_tlb(tlb, pud)	pud_free((tlb)->mm, pud)
 #endif /* CONFIG_PGTABLE_4 */
 
 static inline void
@@ -62,12 +62,12 @@
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
 
-static inline void pmd_free(pmd_t * pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	quicklist_free(0, NULL, pmd);
 }
 
-#define __pmd_free_tlb(tlb, pmd)	pmd_free(pmd)
+#define __pmd_free_tlb(tlb, pmd)	pmd_free((tlb)->mm, pmd)
 
 static inline void
 pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte)
@@ -94,12 +94,12 @@
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	quicklist_free_page(0, NULL, pte);
 }
 
-static inline void pte_free_kernel(pte_t * pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	quicklist_free(0, NULL, pte);
 }
@@ -109,6 +109,6 @@
 	quicklist_trim(0, NULL, 25, 16);
 }
 
-#define __pte_free_tlb(tlb, pte)	pte_free(pte)
+#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
 
 #endif				/* _ASM_IA64_PGALLOC_H */
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 038642f..741f7ec 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -31,7 +31,8 @@
  * each (assuming 8KB page size), for a total of 8TB of user virtual
  * address space.
  */
-#define TASK_SIZE		(current->thread.task_size)
+#define TASK_SIZE_OF(tsk)	((tsk)->thread.task_size)
+#define TASK_SIZE       	TASK_SIZE_OF(current)
 
 /*
  * This decides where the kernel will search for a free chunk of vm
diff --git a/include/asm-m32r/irq.h b/include/asm-m32r/irq.h
index 2f93f47..242028b 100644
--- a/include/asm-m32r/irq.h
+++ b/include/asm-m32r/irq.h
@@ -3,7 +3,7 @@
 #define _ASM_M32R_IRQ_H
 
 
-#if defined(CONFIG_PLAT_M32700UT_Alpha) || defined(CONFIG_PLAT_USRV)
+#if defined(CONFIG_PLAT_USRV)
 /*
  * IRQ definitions for M32700UT
  *  M32700 Chip: 64 interrupts
diff --git a/include/asm-m32r/m32700ut/m32700ut_pld.h b/include/asm-m32r/m32700ut/m32700ut_pld.h
index d391212..57623be 100644
--- a/include/asm-m32r/m32700ut/m32700ut_pld.h
+++ b/include/asm-m32r/m32700ut/m32700ut_pld.h
@@ -13,9 +13,7 @@
  * this archive for more details.
  */
 
-#if defined(CONFIG_PLAT_M32700UT_Alpha)
-#define PLD_PLAT_BASE		0x08c00000
-#elif defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_USRV)
+#if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_USRV)
 #define PLD_PLAT_BASE		0x04c00000
 #else
 #error "no platform configuration"
diff --git a/include/asm-m32r/pgalloc.h b/include/asm-m32r/pgalloc.h
index 943ba63..e5921ad 100644
--- a/include/asm-m32r/pgalloc.h
+++ b/include/asm-m32r/pgalloc.h
@@ -24,7 +24,7 @@
 	return pgd;
 }
 
-static __inline__ void pgd_free(pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	free_page((unsigned long)pgd);
 }
@@ -46,17 +46,17 @@
 	return pte;
 }
 
-static __inline__ void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
 
-static __inline__ void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb, pte)	pte_free((pte))
+#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
 
 /*
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
@@ -65,7 +65,7 @@
  */
 
 #define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(x)			do { } while (0)
+#define pmd_free(mm, x)			do { } while (0)
 #define __pmd_free_tlb(tlb, x)		do { } while (0)
 #define pgd_populate(mm, pmd, pte)	BUG()
 
diff --git a/include/asm-m68k/macintosh.h b/include/asm-m68k/macintosh.h
index 27d11da..28b0f49 100644
--- a/include/asm-m68k/macintosh.h
+++ b/include/asm-m68k/macintosh.h
@@ -14,8 +14,6 @@
 extern int mac_irq_pending(unsigned int);
 extern void mac_identify(void);
 extern void mac_report_hardware(void);
-extern void mac_debugging_penguin(int);
-extern void mac_boom(int);
 
 /*
  *	Floppy driver magic hook - probably shouldnt be here
diff --git a/include/asm-m68k/motorola_pgalloc.h b/include/asm-m68k/motorola_pgalloc.h
index 5158412..500ec9b 100644
--- a/include/asm-m68k/motorola_pgalloc.h
+++ b/include/asm-m68k/motorola_pgalloc.h
@@ -22,7 +22,7 @@
 	return pte;
 }
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	cache_page(pte);
 	free_page((unsigned long) pte);
@@ -47,7 +47,7 @@
 	return page;
 }
 
-static inline void pte_free(struct page *page)
+static inline void pte_free(struct mm_struct *mm, struct page *page)
 {
 	cache_page(kmap(page));
 	kunmap(page);
@@ -67,7 +67,7 @@
 	return get_pointer_table();
 }
 
-static inline int pmd_free(pmd_t *pmd)
+static inline int pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	return free_pointer_table(pmd);
 }
@@ -78,9 +78,9 @@
 }
 
 
-static inline void pgd_free(pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	pmd_free((pmd_t *)pgd);
+	pmd_free(mm, (pmd_t *)pgd);
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/include/asm-m68k/sun3_pgalloc.h b/include/asm-m68k/sun3_pgalloc.h
index fd82411..a5a91e7 100644
--- a/include/asm-m68k/sun3_pgalloc.h
+++ b/include/asm-m68k/sun3_pgalloc.h
@@ -21,12 +21,12 @@
 #define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); })
 
 
-static inline void pte_free_kernel(pte_t * pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
         free_page((unsigned long) pte);
 }
 
-static inline void pte_free(struct page *page)
+static inline void pte_free(struct mm_struct *mm, struct page *page)
 {
         __free_page(page);
 }
@@ -72,10 +72,10 @@
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
  * inside the pgd, so has no extra memory associated with it.
  */
-#define pmd_free(x)			do { } while (0)
+#define pmd_free(mm, x)			do { } while (0)
 #define __pmd_free_tlb(tlb, x)		do { } while (0)
 
-static inline void pgd_free(pgd_t * pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
         free_page((unsigned long) pgd);
 }
diff --git a/include/asm-m68knommu/mcfne.h b/include/asm-m68knommu/mcfne.h
index c920ccd..431f63a 100644
--- a/include/asm-m68knommu/mcfne.h
+++ b/include/asm-m68knommu/mcfne.h
@@ -60,17 +60,6 @@
 #define	NE2000_BYTE		volatile unsigned char
 #endif
 
-#if defined(CONFIG_CFV240)
-#define NE2000_ADDR             0x40010000
-#define NE2000_ADDR1            0x40010001
-#define NE2000_ODDOFFSET        0x00000000
-#define NE2000_IRQ              1
-#define NE2000_IRQ_VECTOR       0x19
-#define NE2000_IRQ_PRIORITY     2
-#define NE2000_IRQ_LEVEL        1
-#define	NE2000_BYTE		volatile unsigned char
-#endif
-
 #if defined(CONFIG_M5307C3)
 #define NE2000_ADDR		0x40000300
 #define NE2000_ODDOFFSET	0x00010000
@@ -173,13 +162,8 @@
  *	On most NE2000 implementations on ColdFire boards the chip is
  *	mapped in kinda funny, due to its ISA heritage.
  */
-#ifdef CONFIG_CFV240
-#define NE2000_PTR(addr)	(NE2000_ADDR + ((addr & 0x3f) << 1) + 1)
-#define NE2000_DATA_PTR(addr)	(NE2000_ADDR + ((addr & 0x3f) << 1))
-#else
 #define	NE2000_PTR(addr)	((addr&0x1)?(NE2000_ODDOFFSET+addr-1):(addr))
 #define	NE2000_DATA_PTR(addr)	(addr)
-#endif
 
 
 void ne2000_outb(unsigned int val, unsigned int addr)
@@ -285,17 +269,6 @@
 }
 #endif
 
-#if defined(CONFIG_CFV240)
-void ne2000_irqsetup(int irq)
-{
-	volatile unsigned char  *icrp;
-
-	icrp = (volatile unsigned char *) (MCF_MBAR + MCFSIM_ICR1);
-	*icrp = MCFSIM_ICR_LEVEL1 | MCFSIM_ICR_PRI2 | MCFSIM_ICR_AUTOVEC;
-	mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_EINT1);
-}
-#endif
-
 #if defined(CONFIG_M5206e) && defined(CONFIG_NETtel)
 void ne2000_irqsetup(int irq)
 {
diff --git a/include/asm-m68knommu/mcfsim.h b/include/asm-m68knommu/mcfsim.h
index 1074ae7..da3f2ce 100644
--- a/include/asm-m68knommu/mcfsim.h
+++ b/include/asm-m68knommu/mcfsim.h
@@ -17,9 +17,7 @@
  *	Include 5204, 5206/e, 5235, 5249, 5270/5271, 5272, 5280/5282,
  *	5307 or 5407 specific addresses.
  */
-#if defined(CONFIG_M5204)
-#include <asm/m5204sim.h>
-#elif defined(CONFIG_M5206) || defined(CONFIG_M5206e)
+#if defined(CONFIG_M5206) || defined(CONFIG_M5206e)
 #include <asm/m5206sim.h>
 #elif defined(CONFIG_M520x)
 #include <asm/m520xsim.h>
diff --git a/include/asm-m68knommu/mcftimer.h b/include/asm-m68knommu/mcftimer.h
index 6f4d796..0f90f6d 100644
--- a/include/asm-m68knommu/mcftimer.h
+++ b/include/asm-m68knommu/mcftimer.h
@@ -16,7 +16,7 @@
 /*
  *	Get address specific defines for this ColdFire member.
  */
-#if defined(CONFIG_M5204) || defined(CONFIG_M5206) || defined(CONFIG_M5206e)
+#if defined(CONFIG_M5206) || defined(CONFIG_M5206e)
 #define	MCFTIMER_BASE1		0x100		/* Base address of TIMER1 */
 #define	MCFTIMER_BASE2		0x120		/* Base address of TIMER2 */
 #elif defined(CONFIG_M5272)
diff --git a/include/asm-m68knommu/mcfuart.h b/include/asm-m68knommu/mcfuart.h
index 8a7a677..ef22938 100644
--- a/include/asm-m68knommu/mcfuart.h
+++ b/include/asm-m68knommu/mcfuart.h
@@ -19,7 +19,7 @@
 #if defined(CONFIG_M5272)
 #define	MCFUART_BASE1		0x100		/* Base address of UART1 */
 #define	MCFUART_BASE2		0x140		/* Base address of UART2 */
-#elif defined(CONFIG_M5204) || defined(CONFIG_M5206) || defined(CONFIG_M5206e)
+#elif defined(CONFIG_M5206) || defined(CONFIG_M5206e)
 #if defined(CONFIG_NETtel)
 #define	MCFUART_BASE1		0x180		/* Base address of UART1 */
 #define	MCFUART_BASE2		0x140		/* Base address of UART2 */
diff --git a/include/asm-m68knommu/system.h b/include/asm-m68knommu/system.h
index 15b4c7d..ee2dc07 100644
--- a/include/asm-m68knommu/system.h
+++ b/include/asm-m68knommu/system.h
@@ -207,23 +207,6 @@
 }
 
 
-#ifdef CONFIG_M68332
-#define HARD_RESET_NOW() ({		\
-        local_irq_disable();		\
-        asm("				\
-	movew   #0x0000, 0xfffa6a;	\
-        reset;				\
-        /*movew #0x1557, 0xfffa44;*/	\
-        /*movew #0x0155, 0xfffa46;*/	\
-        moveal #0, %a0;			\
-        movec %a0, %vbr;		\
-        moveal 0, %sp;			\
-        moveal 4, %a0;			\
-        jmp (%a0);			\
-        ");				\
-})
-#endif
-
 #if defined( CONFIG_M68328 ) || defined( CONFIG_M68EZ328 ) || \
 	defined (CONFIG_M68360) || defined( CONFIG_M68VZ328 )
 #define HARD_RESET_NOW() ({		\
diff --git a/include/asm-mips/pgalloc.h b/include/asm-mips/pgalloc.h
index 81b7212..c4efece 100644
--- a/include/asm-mips/pgalloc.h
+++ b/include/asm-mips/pgalloc.h
@@ -58,7 +58,7 @@
 	return ret;
 }
 
-static inline void pgd_free(pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	free_pages((unsigned long)pgd, PGD_ORDER);
 }
@@ -85,12 +85,12 @@
 	return pte;
 }
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_pages((unsigned long)pte, PTE_ORDER);
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	__free_pages(pte, PTE_ORDER);
 }
@@ -103,7 +103,7 @@
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
  * inside the pgd, so has no extra memory associated with it.
  */
-#define pmd_free(x)			do { } while (0)
+#define pmd_free(mm, x)			do { } while (0)
 #define __pmd_free_tlb(tlb, x)		do { } while (0)
 
 #endif
@@ -120,12 +120,12 @@
 	return pmd;
 }
 
-static inline void pmd_free(pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	free_pages((unsigned long)pmd, PMD_ORDER);
 }
 
-#define __pmd_free_tlb(tlb, x)	pmd_free(x)
+#define __pmd_free_tlb(tlb, x)	pmd_free((tlb)->mm, x)
 
 #endif
 
diff --git a/include/asm-mips/processor.h b/include/asm-mips/processor.h
index 83bc945..36f42de 100644
--- a/include/asm-mips/processor.h
+++ b/include/asm-mips/processor.h
@@ -65,6 +65,8 @@
 #define TASK_UNMAPPED_BASE						\
 	(test_thread_flag(TIF_32BIT_ADDR) ?				\
 		PAGE_ALIGN(TASK_SIZE32 / 3) : PAGE_ALIGN(TASK_SIZE / 3))
+#define TASK_SIZE_OF(tsk)						\
+	(test_tsk_thread_flag(tsk, TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE)
 #endif
 
 #define NUM_FPU_REGS	32
diff --git a/include/asm-parisc/pgalloc.h b/include/asm-parisc/pgalloc.h
index 1af1a41..aab66f1 100644
--- a/include/asm-parisc/pgalloc.h
+++ b/include/asm-parisc/pgalloc.h
@@ -43,7 +43,7 @@
 	return actual_pgd;
 }
 
-static inline void pgd_free(pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 #ifdef CONFIG_64BIT
 	pgd -= PTRS_PER_PGD;
@@ -70,7 +70,7 @@
 	return pmd;
 }
 
-static inline void pmd_free(pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 #ifdef CONFIG_64BIT
 	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
@@ -91,7 +91,7 @@
  */
 
 #define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(x)			do { } while (0)
+#define pmd_free(mm, x)			do { } while (0)
 #define pgd_populate(mm, pmd, pte)	BUG()
 
 #endif
@@ -130,12 +130,12 @@
 	return pte;
 }
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
 
-#define pte_free(page)	pte_free_kernel(page_address(page))
+#define pte_free(mm, page) pte_free_kernel(page_address(page))
 
 #define check_pgt_cache()	do { } while (0)
 
diff --git a/include/asm-parisc/processor.h b/include/asm-parisc/processor.h
index 6b294fb..3bb06e8 100644
--- a/include/asm-parisc/processor.h
+++ b/include/asm-parisc/processor.h
@@ -32,7 +32,8 @@
 #endif
 #define current_text_addr() ({ void *pc; current_ia(pc); pc; })
 
-#define TASK_SIZE               (current->thread.task_size)
+#define TASK_SIZE_OF(tsk)       ((tsk)->thread.task_size)
+#define TASK_SIZE	        TASK_SIZE_OF(current)
 #define TASK_UNMAPPED_BASE      (current->thread.map_base)
 
 #define DEFAULT_TASK_SIZE32	(0xFFF00000UL)
diff --git a/include/asm-parisc/tlb.h b/include/asm-parisc/tlb.h
index 33107a2..383b1db 100644
--- a/include/asm-parisc/tlb.h
+++ b/include/asm-parisc/tlb.h
@@ -21,7 +21,7 @@
 
 #include <asm-generic/tlb.h>
 
-#define __pmd_free_tlb(tlb, pmd)	pmd_free(pmd)
-#define __pte_free_tlb(tlb, pte)	pte_free(pte)
+#define __pmd_free_tlb(tlb, pmd)	pmd_free((tlb)->mm, pmd)
+#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
 
 #endif
diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h
index 7a3cef7..852e15f 100644
--- a/include/asm-powerpc/iommu.h
+++ b/include/asm-powerpc/iommu.h
@@ -79,19 +79,19 @@
 extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
 					    int nid);
 
-extern int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
+extern int iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 			int nelems, unsigned long mask,
 			enum dma_data_direction direction);
 extern void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 			   int nelems, enum dma_data_direction direction);
 
-extern void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
-				  dma_addr_t *dma_handle, unsigned long mask,
-				  gfp_t flag, int node);
+extern void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
+				  size_t size, dma_addr_t *dma_handle,
+				  unsigned long mask, gfp_t flag, int node);
 extern void iommu_free_coherent(struct iommu_table *tbl, size_t size,
 				void *vaddr, dma_addr_t dma_handle);
-extern dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
-				   size_t size, unsigned long mask,
+extern dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl,
+				   void *vaddr, size_t size, unsigned long mask,
 				   enum dma_data_direction direction);
 extern void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
 			       size_t size, enum dma_data_direction direction);
diff --git a/include/asm-powerpc/nvram.h b/include/asm-powerpc/nvram.h
index 4e7059c..efde5ac 100644
--- a/include/asm-powerpc/nvram.h
+++ b/include/asm-powerpc/nvram.h
@@ -58,6 +58,9 @@
 };
 
 #ifdef __KERNEL__
+
+#include <linux/list.h>
+
 struct nvram_partition {
 	struct list_head partition;
 	struct nvram_header header;
diff --git a/include/asm-powerpc/pgalloc-32.h b/include/asm-powerpc/pgalloc-32.h
index e130743..c162a4c 100644
--- a/include/asm-powerpc/pgalloc-32.h
+++ b/include/asm-powerpc/pgalloc-32.h
@@ -6,14 +6,14 @@
 extern void __bad_pte(pmd_t *pmd);
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 /*
  * We don't have any real pmd's, and this code never triggers because
  * the pgd will always be present..
  */
 /* #define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); }) */
-#define pmd_free(x)                     do { } while (0)
+#define pmd_free(mm, x) 		do { } while (0)
 #define __pmd_free_tlb(tlb,x)		do { } while (0)
 /* #define pgd_populate(mm, pmd, pte)      BUG() */
 
@@ -31,10 +31,10 @@
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
 extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr);
-extern void pte_free_kernel(pte_t *pte);
-extern void pte_free(struct page *pte);
+extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
+extern void pte_free(struct mm_struct *mm, struct page *pte);
 
-#define __pte_free_tlb(tlb, pte)	pte_free((pte))
+#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
 
 #define check_pgt_cache()	do { } while (0)
 
diff --git a/include/asm-powerpc/pgalloc-64.h b/include/asm-powerpc/pgalloc-64.h
index 43214c8..5afae85 100644
--- a/include/asm-powerpc/pgalloc-64.h
+++ b/include/asm-powerpc/pgalloc-64.h
@@ -29,7 +29,7 @@
 	return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
 }
 
-static inline void pgd_free(pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	subpage_prot_free(pgd);
 	kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
@@ -45,7 +45,7 @@
 				GFP_KERNEL|__GFP_REPEAT);
 }
 
-static inline void pud_free(pud_t *pud)
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
 	kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
 }
@@ -81,7 +81,7 @@
 				GFP_KERNEL|__GFP_REPEAT);
 }
 
-static inline void pmd_free(pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
 }
@@ -99,12 +99,12 @@
 	return pte ? virt_to_page(pte) : NULL;
 }
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct page *ptepage)
+static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
 {
 	__free_page(ptepage);
 }
diff --git a/include/asm-powerpc/processor.h b/include/asm-powerpc/processor.h
index dba7c94..1f4765d 100644
--- a/include/asm-powerpc/processor.h
+++ b/include/asm-powerpc/processor.h
@@ -99,8 +99,9 @@
  */
 #define TASK_SIZE_USER32 (0x0000000100000000UL - (1*PAGE_SIZE))
 
-#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
+#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
 		TASK_SIZE_USER32 : TASK_SIZE_USER64)
+#define TASK_SIZE	  TASK_SIZE_OF(current)
 
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index 0c8b0d6..e996521 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -309,7 +309,7 @@
 COMPAT_SYS(epoll_pwait)
 COMPAT_SYS_SPU(utimensat)
 COMPAT_SYS_SPU(signalfd)
-COMPAT_SYS_SPU(timerfd)
+SYSCALL(ni_syscall)
 SYSCALL_SPU(eventfd)
 COMPAT_SYS_SPU(sync_file_range2)
 COMPAT_SYS(fallocate)
diff --git a/include/asm-ppc/pgalloc.h b/include/asm-ppc/pgalloc.h
index 44d88a9..7c39a95 100644
--- a/include/asm-ppc/pgalloc.h
+++ b/include/asm-ppc/pgalloc.h
@@ -7,14 +7,14 @@
 extern void __bad_pte(pmd_t *pmd);
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 /*
  * We don't have any real pmd's, and this code never triggers because
  * the pgd will always be present..
  */
 #define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); })
-#define pmd_free(x)                     do { } while (0)
+#define pmd_free(mm, x) 		do { } while (0)
 #define __pmd_free_tlb(tlb,x)		do { } while (0)
 #define pgd_populate(mm, pmd, pte)      BUG()
 
@@ -32,10 +32,10 @@
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
 extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr);
-extern void pte_free_kernel(pte_t *pte);
-extern void pte_free(struct page *pte);
+extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
+extern void pte_free(struct mm_struct *mm, struct page *pte);
 
-#define __pte_free_tlb(tlb, pte)	pte_free((pte))
+#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
 
 #define check_pgt_cache()	do { } while (0)
 
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index dba6fec..882db05 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -440,242 +440,256 @@
  __test_bit((nr),(addr)) )
 
 /*
- * ffz = Find First Zero in word. Undefined if no zero exists,
- * so code should check against ~0UL first..
+ * Optimized find bit helper functions.
  */
-static inline unsigned long ffz(unsigned long word)
-{
-        unsigned long bit = 0;
 
+/**
+ * __ffz_word_loop - find byte offset of first long != -1UL
+ * @addr: pointer to array of unsigned long
+ * @size: size of the array in bits
+ */
+static inline unsigned long __ffz_word_loop(const unsigned long *addr,
+					    unsigned long size)
+{
+	typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
+	unsigned long bytes = 0;
+
+	asm volatile(
+#ifndef __s390x__
+		"	ahi	%1,31\n"
+		"	srl	%1,5\n"
+		"0:	c	%2,0(%0,%3)\n"
+		"	jne	1f\n"
+		"	la	%0,4(%0)\n"
+		"	brct	%1,0b\n"
+		"1:\n"
+#else
+		"	aghi	%1,63\n"
+		"	srlg	%1,%1,6\n"
+		"0:	cg	%2,0(%0,%3)\n"
+		"	jne	1f\n"
+		"	la	%0,8(%0)\n"
+		"	brct	%1,0b\n"
+		"1:\n"
+#endif
+		: "+a" (bytes), "+d" (size)
+		: "d" (-1UL), "a" (addr), "m" (*(addrtype *) addr)
+		: "cc" );
+	return bytes;
+}
+
+/**
+ * __ffs_word_loop - find byte offset of first long != 0UL
+ * @addr: pointer to array of unsigned long
+ * @size: size of the array in bits
+ */
+static inline unsigned long __ffs_word_loop(const unsigned long *addr,
+					    unsigned long size)
+{
+	typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
+	unsigned long bytes = 0;
+
+	asm volatile(
+#ifndef __s390x__
+		"	ahi	%1,31\n"
+		"	srl	%1,5\n"
+		"0:	c	%2,0(%0,%3)\n"
+		"	jne	1f\n"
+		"	la	%0,4(%0)\n"
+		"	brct	%1,0b\n"
+		"1:\n"
+#else
+		"	aghi	%1,63\n"
+		"	srlg	%1,%1,6\n"
+		"0:	cg	%2,0(%0,%3)\n"
+		"	jne	1f\n"
+		"	la	%0,8(%0)\n"
+		"	brct	%1,0b\n"
+		"1:\n"
+#endif
+		: "+a" (bytes), "+a" (size)
+		: "d" (0UL), "a" (addr), "m" (*(addrtype *) addr)
+		: "cc" );
+	return bytes;
+}
+
+/**
+ * __ffz_word - add number of the first unset bit
+ * @nr: base value the bit number is added to
+ * @word: the word that is searched for unset bits
+ */
+static inline unsigned long __ffz_word(unsigned long nr, unsigned long word)
+{
 #ifdef __s390x__
 	if (likely((word & 0xffffffff) == 0xffffffff)) {
 		word >>= 32;
-		bit += 32;
+		nr += 32;
 	}
 #endif
 	if (likely((word & 0xffff) == 0xffff)) {
 		word >>= 16;
-		bit += 16;
+		nr += 16;
 	}
 	if (likely((word & 0xff) == 0xff)) {
 		word >>= 8;
-		bit += 8;
+		nr += 8;
 	}
-	return bit + _zb_findmap[word & 0xff];
+	return nr + _zb_findmap[(unsigned char) word];
 }
 
-/*
- * __ffs = find first bit in word. Undefined if no bit exists,
- * so code should check against 0UL first..
+/**
+ * __ffs_word - add number of the first set bit
+ * @nr: base value the bit number is added to
+ * @word: the word that is searched for set bits
  */
-static inline unsigned long __ffs (unsigned long word)
+static inline unsigned long __ffs_word(unsigned long nr, unsigned long word)
 {
-	unsigned long bit = 0;
-
 #ifdef __s390x__
 	if (likely((word & 0xffffffff) == 0)) {
 		word >>= 32;
-		bit += 32;
+		nr += 32;
 	}
 #endif
 	if (likely((word & 0xffff) == 0)) {
 		word >>= 16;
-		bit += 16;
+		nr += 16;
 	}
 	if (likely((word & 0xff) == 0)) {
 		word >>= 8;
-		bit += 8;
+		nr += 8;
 	}
-	return bit + _sb_findmap[word & 0xff];
+	return nr + _sb_findmap[(unsigned char) word];
+}
+
+
+/**
+ * __load_ulong_be - load big endian unsigned long
+ * @p: pointer to array of unsigned long
+ * @offset: byte offset of source value in the array
+ */
+static inline unsigned long __load_ulong_be(const unsigned long *p,
+					    unsigned long offset)
+{
+	p = (unsigned long *)((unsigned long) p + offset);
+	return *p;
+}
+
+/**
+ * __load_ulong_le - load little endian unsigned long
+ * @p: pointer to array of unsigned long
+ * @offset: byte offset of source value in the array
+ */
+static inline unsigned long __load_ulong_le(const unsigned long *p,
+					    unsigned long offset)
+{
+	unsigned long word;
+
+	p = (unsigned long *)((unsigned long) p + offset);
+#ifndef __s390x__
+	asm volatile(
+		"	ic	%0,0(%1)\n"
+		"	icm	%0,2,1(%1)\n"
+		"	icm	%0,4,2(%1)\n"
+		"	icm	%0,8,3(%1)"
+		: "=&d" (word) : "a" (p), "m" (*p) : "cc");
+#else
+	asm volatile(
+		"	lrvg	%0,%1"
+		: "=d" (word) : "m" (*p) );
+#endif
+	return word;
 }
 
 /*
- * Find-bit routines..
+ * The various find bit functions.
  */
 
-#ifndef __s390x__
-
-static inline int
-find_first_zero_bit(const unsigned long * addr, unsigned long size)
+/*
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline unsigned long ffz(unsigned long word)
 {
-	typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-	unsigned long cmp, count;
-        unsigned int res;
+	return __ffz_word(0, word);
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs (unsigned long word)
+{
+	return __ffs_word(0, word);
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static inline int ffs(int x)
+{
+	if (!x)
+		return 0;
+	return __ffs_word(1, x);
+}
+
+/**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
+ */
+static inline unsigned long find_first_zero_bit(const unsigned long *addr,
+						unsigned long size)
+{
+	unsigned long bytes, bits;
 
         if (!size)
                 return 0;
-	asm volatile(
-		"	lhi	%1,-1\n"
-		"	lr	%2,%3\n"
-		"	slr	%0,%0\n"
-		"	ahi	%2,31\n"
-		"	srl	%2,5\n"
-		"0:	c	%1,0(%0,%4)\n"
-		"	jne	1f\n"
-		"	la	%0,4(%0)\n"
-		"	brct	%2,0b\n"
-		"	lr	%0,%3\n"
-		"	j	4f\n"
-		"1:	l	%2,0(%0,%4)\n"
-		"	sll	%0,3\n"
-		"	lhi	%1,0xff\n"
-		"	tml	%2,0xffff\n"
-		"	jno	2f\n"
-		"	ahi	%0,16\n"
-		"	srl	%2,16\n"
-		"2:	tml	%2,0x00ff\n"
-		"	jno	3f\n"
-		"	ahi	%0,8\n"
-		"	srl	%2,8\n"
-		"3:	nr	%2,%1\n"
-		"	ic	%2,0(%2,%5)\n"
-		"	alr	%0,%2\n"
-		"4:"
-		: "=&a" (res), "=&d" (cmp), "=&a" (count)
-		: "a" (size), "a" (addr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) addr) : "cc");
-        return (res < size) ? res : size;
+	bytes = __ffz_word_loop(addr, size);
+	bits = __ffz_word(bytes*8, __load_ulong_be(addr, bytes));
+	return (bits < size) ? bits : size;
 }
 
-static inline int
-find_first_bit(const unsigned long * addr, unsigned long size)
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
+ */
+static inline unsigned long find_first_bit(const unsigned long * addr,
+					   unsigned long size)
 {
-	typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-	unsigned long cmp, count;
-        unsigned int res;
+	unsigned long bytes, bits;
 
         if (!size)
                 return 0;
-	asm volatile(
-		"	slr	%1,%1\n"
-		"	lr	%2,%3\n"
-		"	slr	%0,%0\n"
-		"	ahi	%2,31\n"
-		"	srl	%2,5\n"
-		"0:	c	%1,0(%0,%4)\n"
-		"	jne	1f\n"
-		"	la	%0,4(%0)\n"
-		"	brct	%2,0b\n"
-		"	lr	%0,%3\n"
-		"	j	4f\n"
-		"1:	l	%2,0(%0,%4)\n"
-		"	sll	%0,3\n"
-		"	lhi	%1,0xff\n"
-		"	tml	%2,0xffff\n"
-		"	jnz	2f\n"
-		"	ahi	%0,16\n"
-		"	srl	%2,16\n"
-		"2:	tml	%2,0x00ff\n"
-		"	jnz	3f\n"
-		"	ahi	%0,8\n"
-		"	srl	%2,8\n"
-		"3:	nr	%2,%1\n"
-		"	ic	%2,0(%2,%5)\n"
-		"	alr	%0,%2\n"
-		"4:"
-		: "=&a" (res), "=&d" (cmp), "=&a" (count)
-		: "a" (size), "a" (addr), "a" (&_sb_findmap),
-		  "m" (*(addrtype *) addr) : "cc");
-        return (res < size) ? res : size;
+	bytes = __ffs_word_loop(addr, size);
+	bits = __ffs_word(bytes*8, __load_ulong_be(addr, bytes));
+	return (bits < size) ? bits : size;
 }
 
-#else /* __s390x__ */
-
-static inline unsigned long
-find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
-	typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-        unsigned long res, cmp, count;
-
-        if (!size)
-                return 0;
-	asm volatile(
-		"	lghi	%1,-1\n"
-		"	lgr	%2,%3\n"
-		"	slgr	%0,%0\n"
-		"	aghi	%2,63\n"
-		"	srlg	%2,%2,6\n"
-		"0:	cg	%1,0(%0,%4)\n"
-		"	jne	1f\n"
-		"	la	%0,8(%0)\n"
-		"	brct	%2,0b\n"
-		"	lgr	%0,%3\n"
-		"	j	5f\n"
-		"1:	lg	%2,0(%0,%4)\n"
-		"	sllg	%0,%0,3\n"
-		"	clr	%2,%1\n"
-		"	jne	2f\n"
-		"	aghi	%0,32\n"
-		"	srlg	%2,%2,32\n"
-		"2:	lghi	%1,0xff\n"
-		"	tmll	%2,0xffff\n"
-		"	jno	3f\n"
-		"	aghi	%0,16\n"
-		"	srl	%2,16\n"
-		"3:	tmll	%2,0x00ff\n"
-		"	jno	4f\n"
-		"	aghi	%0,8\n"
-		"	srl	%2,8\n"
-		"4:	ngr	%2,%1\n"
-		"	ic	%2,0(%2,%5)\n"
-		"	algr	%0,%2\n"
-		"5:"
-		: "=&a" (res), "=&d" (cmp), "=&a" (count)
-		: "a" (size), "a" (addr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) addr) : "cc");
-        return (res < size) ? res : size;
-}
-
-static inline unsigned long
-find_first_bit(const unsigned long * addr, unsigned long size)
-{
-	typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-        unsigned long res, cmp, count;
-
-        if (!size)
-                return 0;
-	asm volatile(
-		"	slgr	%1,%1\n"
-		"	lgr	%2,%3\n"
-		"	slgr	%0,%0\n"
-		"	aghi	%2,63\n"
-		"	srlg	%2,%2,6\n"
-		"0:	cg	%1,0(%0,%4)\n"
-		"	jne	1f\n"
-		"	aghi	%0,8\n"
-		"	brct	%2,0b\n"
-		"	lgr	%0,%3\n"
-		"	j	5f\n"
-		"1:	lg	%2,0(%0,%4)\n"
-		"	sllg	%0,%0,3\n"
-		"	clr	%2,%1\n"
-		"	jne	2f\n"
-		"	aghi	%0,32\n"
-		"	srlg	%2,%2,32\n"
-		"2:	lghi	%1,0xff\n"
-		"	tmll	%2,0xffff\n"
-		"	jnz	3f\n"
-		"	aghi	%0,16\n"
-		"	srl	%2,16\n"
-		"3:	tmll	%2,0x00ff\n"
-		"	jnz	4f\n"
-		"	aghi	%0,8\n"
-		"	srl	%2,8\n"
-		"4:	ngr	%2,%1\n"
-		"	ic	%2,0(%2,%5)\n"
-		"	algr	%0,%2\n"
-		"5:"
-		: "=&a" (res), "=&d" (cmp), "=&a" (count)
-		: "a" (size), "a" (addr), "a" (&_sb_findmap),
-		  "m" (*(addrtype *) addr) : "cc");
-        return (res < size) ? res : size;
-}
-
-#endif /* __s390x__ */
-
-static inline int
-find_next_zero_bit (const unsigned long * addr, unsigned long size,
-		    unsigned long offset)
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static inline int find_next_zero_bit (const unsigned long * addr,
+				      unsigned long size,
+				      unsigned long offset)
 {
         const unsigned long *p;
 	unsigned long bit, set;
@@ -688,10 +702,10 @@
 	p = addr + offset / __BITOPS_WORDSIZE;
 	if (bit) {
 		/*
-		 * s390 version of ffz returns __BITOPS_WORDSIZE
+		 * __ffz_word returns __BITOPS_WORDSIZE
 		 * if no zero bit is present in the word.
 		 */
-		set = ffz(*p >> bit) + bit;
+		set = __ffz_word(0, *p >> bit) + bit;
 		if (set >= size)
 			return size + offset;
 		if (set < __BITOPS_WORDSIZE)
@@ -703,9 +717,15 @@
 	return offset + find_first_zero_bit(p, size);
 }
 
-static inline int
-find_next_bit (const unsigned long * addr, unsigned long size,
-	       unsigned long offset)
+/**
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static inline int find_next_bit (const unsigned long * addr,
+				 unsigned long size,
+				 unsigned long offset)
 {
         const unsigned long *p;
 	unsigned long bit, set;
@@ -718,10 +738,10 @@
 	p = addr + offset / __BITOPS_WORDSIZE;
 	if (bit) {
 		/*
-		 * s390 version of __ffs returns __BITOPS_WORDSIZE
+		 * __ffs_word returns __BITOPS_WORDSIZE
 		 * if no one bit is present in the word.
 		 */
-		set = __ffs(*p & (~0UL << bit));
+		set = __ffs_word(0, *p & (~0UL << bit));
 		if (set >= size)
 			return size + offset;
 		if (set < __BITOPS_WORDSIZE)
@@ -744,8 +764,6 @@
 	return find_first_bit(b, 140);
 }
 
-#include <asm-generic/bitops/ffs.h>
-
 #include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/fls64.h>
 
@@ -772,108 +790,23 @@
 	test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr)
 #define ext2_test_bit(nr, addr)      \
 	test_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr)
-#define ext2_find_next_bit(addr, size, off) \
-	generic_find_next_le_bit((unsigned long *)(addr), (size), (off))
 
-#ifndef __s390x__
-
-static inline int 
-ext2_find_first_zero_bit(void *vaddr, unsigned int size)
+static inline int ext2_find_first_zero_bit(void *vaddr, unsigned int size)
 {
-	typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-	unsigned long cmp, count;
-        unsigned int res;
+	unsigned long bytes, bits;
 
         if (!size)
                 return 0;
-	asm volatile(
-		"	lhi	%1,-1\n"
-		"	lr	%2,%3\n"
-		"	ahi	%2,31\n"
-		"	srl	%2,5\n"
-		"	slr	%0,%0\n"
-		"0:	cl	%1,0(%0,%4)\n"
-		"	jne	1f\n"
-		"	ahi	%0,4\n"
-		"	brct	%2,0b\n"
-		"	lr	%0,%3\n"
-		"	j	4f\n"
-		"1:	l	%2,0(%0,%4)\n"
-		"	sll	%0,3\n"
-		"	ahi	%0,24\n"
-		"	lhi	%1,0xff\n"
-		"	tmh	%2,0xffff\n"
-		"	jo	2f\n"
-		"	ahi	%0,-16\n"
-		"	srl	%2,16\n"
-		"2:	tml	%2,0xff00\n"
-		"	jo	3f\n"
-		"	ahi	%0,-8\n"
-		"	srl	%2,8\n"
-		"3:	nr	%2,%1\n"
-		"	ic	%2,0(%2,%5)\n"
-		"	alr	%0,%2\n"
-		"4:"
-		: "=&a" (res), "=&d" (cmp), "=&a" (count)
-		: "a" (size), "a" (vaddr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) vaddr) : "cc");
-        return (res < size) ? res : size;
+	bytes = __ffz_word_loop(vaddr, size);
+	bits = __ffz_word(bytes*8, __load_ulong_le(vaddr, bytes));
+	return (bits < size) ? bits : size;
 }
 
-#else /* __s390x__ */
-
-static inline unsigned long
-ext2_find_first_zero_bit(void *vaddr, unsigned long size)
-{
-	typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-        unsigned long res, cmp, count;
-
-        if (!size)
-                return 0;
-	asm volatile(
-		"	lghi	%1,-1\n"
-		"	lgr	%2,%3\n"
-		"	aghi	%2,63\n"
-		"	srlg	%2,%2,6\n"
-		"	slgr	%0,%0\n"
-		"0:	clg	%1,0(%0,%4)\n"
-		"	jne	1f\n"
-		"	aghi	%0,8\n"
-		"	brct	%2,0b\n"
-		"	lgr	%0,%3\n"
-		"	j	5f\n"
-		"1:	cl	%1,0(%0,%4)\n"
-		"	jne	2f\n"
-		"	aghi	%0,4\n"
-		"2:	l	%2,0(%0,%4)\n"
-		"	sllg	%0,%0,3\n"
-		"	aghi	%0,24\n"
-		"	lghi	%1,0xff\n"
-		"	tmlh	%2,0xffff\n"
-		"	jo	3f\n"
-		"	aghi	%0,-16\n"
-		"	srl	%2,16\n"
-		"3:	tmll	%2,0xff00\n"
-		"	jo	4f\n"
-		"	aghi	%0,-8\n"
-		"	srl	%2,8\n"
-		"4:	ngr	%2,%1\n"
-		"	ic	%2,0(%2,%5)\n"
-		"	algr	%0,%2\n"
-		"5:"
-		: "=&a" (res), "=&d" (cmp), "=&a" (count)
-		: "a" (size), "a" (vaddr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) vaddr) : "cc");
-        return (res < size) ? res : size;
-}
-
-#endif /* __s390x__ */
-
-static inline int
-ext2_find_next_zero_bit(void *vaddr, unsigned long size, unsigned long offset)
+static inline int ext2_find_next_zero_bit(void *vaddr, unsigned long size,
+					  unsigned long offset)
 {
         unsigned long *addr = vaddr, *p;
-	unsigned long word, bit, set;
+	unsigned long bit, set;
 
         if (offset >= size)
                 return size;
@@ -882,23 +815,11 @@
 	size -= offset;
 	p = addr + offset / __BITOPS_WORDSIZE;
         if (bit) {
-#ifndef __s390x__
-		asm volatile(
-			"	ic	%0,0(%1)\n"
-			"	icm	%0,2,1(%1)\n"
-			"	icm	%0,4,2(%1)\n"
-			"	icm	%0,8,3(%1)"
-			: "=&a" (word) : "a" (p), "m" (*p) : "cc");
-#else
-		asm volatile(
-			"	lrvg	%0,%1"
-			: "=a" (word) : "m" (*p) );
-#endif
 		/*
 		 * s390 version of ffz returns __BITOPS_WORDSIZE
 		 * if no zero bit is present in the word.
 		 */
-		set = ffz(word >> bit) + bit;
+		set = ffz(__load_ulong_le(p, 0) >> bit) + bit;
 		if (set >= size)
 			return size + offset;
 		if (set < __BITOPS_WORDSIZE)
@@ -910,6 +831,47 @@
 	return offset + ext2_find_first_zero_bit(p, size);
 }
 
+static inline unsigned long ext2_find_first_bit(void *vaddr,
+						unsigned long size)
+{
+	unsigned long bytes, bits;
+
+	if (!size)
+		return 0;
+	bytes = __ffs_word_loop(vaddr, size);
+	bits = __ffs_word(bytes*8, __load_ulong_le(vaddr, bytes));
+	return (bits < size) ? bits : size;
+}
+
+static inline int ext2_find_next_bit(void *vaddr, unsigned long size,
+				     unsigned long offset)
+{
+	unsigned long *addr = vaddr, *p;
+	unsigned long bit, set;
+
+	if (offset >= size)
+		return size;
+	bit = offset & (__BITOPS_WORDSIZE - 1);
+	offset -= bit;
+	size -= offset;
+	p = addr + offset / __BITOPS_WORDSIZE;
+	if (bit) {
+		/*
+		 * s390 version of ffz returns __BITOPS_WORDSIZE
+		 * if no zero bit is present in the word.
+		 */
+		set = ffs(__load_ulong_le(p, 0) >> bit) + bit;
+		if (set >= size)
+			return size + offset;
+		if (set < __BITOPS_WORDSIZE)
+			return set + offset;
+		offset += __BITOPS_WORDSIZE;
+		size -= __BITOPS_WORDSIZE;
+		p++;
+	}
+	return offset + ext2_find_first_bit(p, size);
+}
+
 #include <asm-generic/bitops/minix.h>
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-s390/cacheflush.h b/include/asm-s390/cacheflush.h
index f7cade8..49d5af9 100644
--- a/include/asm-s390/cacheflush.h
+++ b/include/asm-s390/cacheflush.h
@@ -24,4 +24,8 @@
 #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
 	memcpy(dst, src, len)
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable);
+#endif
+
 #endif /* _S390_CACHEFLUSH_H */
diff --git a/include/asm-s390/ccwgroup.h b/include/asm-s390/ccwgroup.h
index 7109c7c..289053e 100644
--- a/include/asm-s390/ccwgroup.h
+++ b/include/asm-s390/ccwgroup.h
@@ -37,6 +37,7 @@
  * @remove: function called on remove
  * @set_online: function called when device is set online
  * @set_offline: function called when device is set offline
+ * @shutdown: function called when device is shut down
  * @driver: embedded driver structure
  */
 struct ccwgroup_driver {
@@ -49,6 +50,7 @@
 	void (*remove) (struct ccwgroup_device *);
 	int (*set_online) (struct ccwgroup_device *);
 	int (*set_offline) (struct ccwgroup_device *);
+	void (*shutdown)(struct ccwgroup_device *);
 
 	struct device_driver driver;
 };
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index 709dd17..6f6619b 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -57,10 +57,10 @@
 }
 
 #define pud_alloc_one(mm,address)		({ BUG(); ((pud_t *)2); })
-#define pud_free(x)				do { } while (0)
+#define pud_free(mm, x)				do { } while (0)
 
 #define pmd_alloc_one(mm,address)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(x)				do { } while (0)
+#define pmd_free(mm, x)				do { } while (0)
 
 #define pgd_populate(mm, pgd, pud)		BUG()
 #define pgd_populate_kernel(mm, pgd, pud)	BUG()
@@ -76,7 +76,7 @@
 }
 
 #define pud_alloc_one(mm,address)		({ BUG(); ((pud_t *)2); })
-#define pud_free(x)				do { } while (0)
+#define pud_free(mm, x)				do { } while (0)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 {
@@ -85,7 +85,7 @@
 		crst_table_init(crst, _SEGMENT_ENTRY_EMPTY);
 	return (pmd_t *) crst;
 }
-#define pmd_free(pmd) crst_table_free((unsigned long *) pmd)
+#define pmd_free(mm, pmd) crst_table_free((unsigned long *)pmd)
 
 #define pgd_populate(mm, pgd, pud)		BUG()
 #define pgd_populate_kernel(mm, pgd, pud)	BUG()
@@ -115,7 +115,7 @@
 		crst_table_init(crst, pgd_entry_type(mm));
 	return (pgd_t *) crst;
 }
-#define pgd_free(pgd) crst_table_free((unsigned long *) pgd)
+#define pgd_free(mm, pgd) crst_table_free((unsigned long *) pgd)
 
 static inline void 
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
@@ -151,9 +151,9 @@
 #define pte_alloc_one(mm, vmaddr) \
 	virt_to_page(page_table_alloc(s390_noexec))
 
-#define pte_free_kernel(pte) \
+#define pte_free_kernel(mm, pte) \
 	page_table_free((unsigned long *) pte)
-#define pte_free(pte) \
+#define pte_free(mm, pte) \
 	page_table_free((unsigned long *) page_to_phys((struct page *) pte))
 
 #endif /* _S390_PGALLOC_H */
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 79b9eab..3f52075 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -115,15 +115,21 @@
 #ifndef __s390x__
 #define VMALLOC_START	0x78000000UL
 #define VMALLOC_END	0x7e000000UL
-#define VMEM_MAP_MAX	0x80000000UL
+#define VMEM_MAP_END	0x80000000UL
 #else /* __s390x__ */
 #define VMALLOC_START	0x3e000000000UL
 #define VMALLOC_END	0x3e040000000UL
-#define VMEM_MAP_MAX	0x40000000000UL
+#define VMEM_MAP_END	0x40000000000UL
 #endif /* __s390x__ */
 
+/*
+ * VMEM_MAX_PHYS is the highest physical address that can be added to the 1:1
+ * mapping. This needs to be calculated at compile time since the size of the
+ * VMEM_MAP is static but the size of struct page can change.
+ */
+#define VMEM_MAX_PHYS	min(VMALLOC_START, ((VMEM_MAP_END - VMALLOC_END) / \
+			  sizeof(struct page) * PAGE_SIZE) & ~((16 << 20) - 1))
 #define VMEM_MAP	((struct page *) VMALLOC_END)
-#define VMEM_MAP_SIZE	((VMALLOC_START / PAGE_SIZE) * sizeof(struct page))
 
 /*
  * A 31 bit pagetable entry of S390 has following format:
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index c86b982..4f74460 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -70,8 +70,9 @@
 
 #else /* __s390x__ */
 
-# define TASK_SIZE		(test_thread_flag(TIF_31BIT) ? \
+# define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_31BIT) ? \
 					(0x80000000UL) : (0x40000000000UL))
+# define TASK_SIZE		TASK_SIZE_OF(current)
 # define TASK_UNMAPPED_BASE	(TASK_SIZE / 2)
 # define DEFAULT_TASK_SIZE	(0x40000000000UL)
 
diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h
index 618693cf..985de2b 100644
--- a/include/asm-s390/tlb.h
+++ b/include/asm-s390/tlb.h
@@ -65,9 +65,9 @@
 	if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pmds < TLB_NR_PTRS))
 		__tlb_flush_mm(tlb->mm);
 	while (tlb->nr_ptes > 0)
-		pte_free(tlb->array[--tlb->nr_ptes]);
+		pte_free(tlb->mm, tlb->array[--tlb->nr_ptes]);
 	while (tlb->nr_pmds < TLB_NR_PTRS)
-		pmd_free((pmd_t *) tlb->array[tlb->nr_pmds++]);
+		pmd_free(tlb->mm, (pmd_t *) tlb->array[tlb->nr_pmds++]);
 }
 
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
@@ -102,7 +102,7 @@
 		if (tlb->nr_ptes >= tlb->nr_pmds)
 			tlb_flush_mmu(tlb, 0, 0);
 	} else
-		pte_free(page);
+		pte_free(tlb->mm, page);
 }
 
 /*
@@ -117,7 +117,7 @@
 		if (tlb->nr_ptes >= tlb->nr_pmds)
 			tlb_flush_mmu(tlb, 0, 0);
 	} else
-		pmd_free(pmd);
+		pmd_free(tlb->mm, pmd);
 #endif
 }
 
diff --git a/include/asm-sh/pgalloc.h b/include/asm-sh/pgalloc.h
index 18b613c..59ca16d 100644
--- a/include/asm-sh/pgalloc.h
+++ b/include/asm-sh/pgalloc.h
@@ -36,7 +36,7 @@
 	return quicklist_alloc(QUICK_PGD, GFP_KERNEL | __GFP_REPEAT, pgd_ctor);
 }
 
-static inline void pgd_free(pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	quicklist_free(QUICK_PGD, NULL, pgd);
 }
@@ -54,12 +54,12 @@
 	return pg ? virt_to_page(pg) : NULL;
 }
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	quicklist_free(QUICK_PT, NULL, pte);
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	quicklist_free_page(QUICK_PT, NULL, pte);
 }
@@ -71,7 +71,7 @@
  * inside the pgd, so has no extra memory associated with it.
  */
 
-#define pmd_free(x)			do { } while (0)
+#define pmd_free(mm, x)			do { } while (0)
 #define __pmd_free_tlb(tlb,x)		do { } while (0)
 
 static inline void check_pgt_cache(void)
diff --git a/include/asm-sparc/pgalloc.h b/include/asm-sparc/pgalloc.h
index a449cd4..b5fbdd3 100644
--- a/include/asm-sparc/pgalloc.h
+++ b/include/asm-sparc/pgalloc.h
@@ -32,7 +32,7 @@
 BTFIXUPDEF_CALL(void, free_pgd_fast, pgd_t *)
 #define free_pgd_fast(pgd)	BTFIXUP_CALL(free_pgd_fast)(pgd)
 
-#define pgd_free(pgd)	free_pgd_fast(pgd)
+#define pgd_free(mm, pgd)	free_pgd_fast(pgd)
 #define pgd_alloc(mm)	get_pgd_fast()
 
 BTFIXUPDEF_CALL(void, pgd_set, pgd_t *, pmd_t *)
@@ -45,8 +45,8 @@
 BTFIXUPDEF_CALL(void, free_pmd_fast, pmd_t *)
 #define free_pmd_fast(pmd)	BTFIXUP_CALL(free_pmd_fast)(pmd)
 
-#define pmd_free(pmd)           free_pmd_fast(pmd)
-#define __pmd_free_tlb(tlb, pmd) pmd_free(pmd)
+#define pmd_free(mm, pmd)	free_pmd_fast(pmd)
+#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd)
 
 BTFIXUPDEF_CALL(void, pmd_populate, pmd_t *, struct page *)
 #define pmd_populate(MM, PMD, PTE)        BTFIXUP_CALL(pmd_populate)(PMD, PTE)
@@ -59,10 +59,10 @@
 #define pte_alloc_one_kernel(mm, addr)	BTFIXUP_CALL(pte_alloc_one_kernel)(mm, addr)
 
 BTFIXUPDEF_CALL(void, free_pte_fast, pte_t *)
-#define pte_free_kernel(pte)	BTFIXUP_CALL(free_pte_fast)(pte)
+#define pte_free_kernel(mm, pte)	BTFIXUP_CALL(free_pte_fast)(pte)
 
 BTFIXUPDEF_CALL(void, pte_free, struct page *)
-#define pte_free(pte)		BTFIXUP_CALL(pte_free)(pte)
-#define __pte_free_tlb(tlb, pte)	pte_free(pte)
+#define pte_free(mm, pte)	BTFIXUP_CALL(pte_free)(pte)
+#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
 
 #endif /* _SPARC_PGALLOC_H */
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index 5d66b85..b48f73c 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -20,7 +20,7 @@
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
 
-static inline void pgd_free(pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	quicklist_free(0, NULL, pgd);
 }
@@ -32,7 +32,7 @@
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
 }
 
-static inline void pmd_free(pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	quicklist_free(0, NULL, pmd);
 }
@@ -50,12 +50,12 @@
 	return pg ? virt_to_page(pg) : NULL;
 }
 		
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	quicklist_free(0, NULL, pte);
 }
 
-static inline void pte_free(struct page *ptepage)
+static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
 {
 	quicklist_free_page(0, NULL, ptepage);
 }
diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
index 349d1d3..ec81cde 100644
--- a/include/asm-sparc64/tlb.h
+++ b/include/asm-sparc64/tlb.h
@@ -100,8 +100,8 @@
 }
 
 #define tlb_remove_tlb_entry(mp,ptep,addr) do { } while (0)
-#define pte_free_tlb(mp,ptepage) pte_free(ptepage)
-#define pmd_free_tlb(mp,pmdp) pmd_free(pmdp)
+#define pte_free_tlb(mp, ptepage) pte_free((mp)->mm, ptepage)
+#define pmd_free_tlb(mp, pmdp) pmd_free((mp)->mm, pmdp)
 #define pud_free_tlb(tlb,pudp) __pud_free_tlb(tlb,pudp)
 
 #define tlb_migrate_finish(mm)	do { } while (0)
diff --git a/include/asm-um/a.out.h b/include/asm-um/a.out.h
index 9281dd8..f42ff145 100644
--- a/include/asm-um/a.out.h
+++ b/include/asm-um/a.out.h
@@ -13,11 +13,9 @@
 
 extern unsigned long stacksizelim;
 
-extern unsigned long host_task_size;
-
 #define STACK_ROOM (stacksizelim)
 
-#define STACK_TOP task_size
+#define STACK_TOP (TASK_SIZE - 2 * PAGE_SIZE)
 
 #define STACK_TOP_MAX STACK_TOP
 
diff --git a/include/asm-um/current.h b/include/asm-um/current.h
index 8fd72f6..c2191d9 100644
--- a/include/asm-um/current.h
+++ b/include/asm-um/current.h
@@ -1,32 +1,13 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #ifndef __UM_CURRENT_H
 #define __UM_CURRENT_H
 
-#ifndef __ASSEMBLY__
-
-#include "asm/page.h"
 #include "linux/thread_info.h"
 
 #define current (current_thread_info()->task)
 
-/*Backward compatibility - it's used inside arch/um.*/
-#define current_thread current_thread_info()
-
-#endif /* __ASSEMBLY__ */
-
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/include/asm-um/elf-i386.h b/include/asm-um/elf-i386.h
index ca94a13..23d6893 100644
--- a/include/asm-um/elf-i386.h
+++ b/include/asm-um/elf-i386.h
@@ -1,11 +1,11 @@
 /*
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 #ifndef __UM_ELF_I386_H
 #define __UM_ELF_I386_H
 
-#include <linux/sched.h>
+#include <asm/user.h>
 #include "skas.h"
 
 #define R_386_NONE	0
@@ -46,7 +46,7 @@
 	PT_REGS_EDI(regs) = 0; \
 	PT_REGS_EBP(regs) = 0; \
 	PT_REGS_EAX(regs) = 0; \
-} while(0)
+} while (0)
 
 #define USE_ELF_CORE_DUMP
 #define ELF_EXEC_PAGESIZE 4096
@@ -74,14 +74,9 @@
 	pr_reg[14] = PT_REGS_EFLAGS(regs);	\
 	pr_reg[15] = PT_REGS_SP(regs);		\
 	pr_reg[16] = PT_REGS_SS(regs);		\
-} while(0);
+} while (0);
 
-static inline int elf_core_copy_fpregs(struct task_struct *t,
-				       elf_fpregset_t *fpu)
-{
-	int cpu = ((struct thread_info *) t->stack)->cpu;
-	return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu);
-}
+extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
 
 #define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
 
@@ -91,7 +86,7 @@
 extern char * elf_aux_platform;
 #define ELF_PLATFORM (elf_aux_platform)
 
-#define SET_PERSONALITY(ex, ibcs2) do ; while(0)
+#define SET_PERSONALITY(ex, ibcs2) do { } while (0)
 
 extern unsigned long vsyscall_ehdr;
 extern unsigned long vsyscall_end;
@@ -166,14 +161,3 @@
 }
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/include/asm-um/elf-x86_64.h b/include/asm-um/elf-x86_64.h
index 3c9d543..3b2d522 100644
--- a/include/asm-um/elf-x86_64.h
+++ b/include/asm-um/elf-x86_64.h
@@ -7,7 +7,6 @@
 #ifndef __UM_ELF_X86_64_H
 #define __UM_ELF_X86_64_H
 
-#include <linux/sched.h>
 #include <asm/user.h>
 #include "skas.h"
 
@@ -96,12 +95,7 @@
 	(pr_reg)[25] = 0;					\
 	(pr_reg)[26] = 0;
 
-static inline int elf_core_copy_fpregs(struct task_struct *t,
-				       elf_fpregset_t *fpu)
-{
-	int cpu = current_thread->cpu;
-	return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu);
-}
+extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
 
 #define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
 
diff --git a/include/asm-um/fixmap.h b/include/asm-um/fixmap.h
index d352a35..89a87c1 100644
--- a/include/asm-um/fixmap.h
+++ b/include/asm-um/fixmap.h
@@ -1,9 +1,10 @@
 #ifndef __UM_FIXMAP_H
 #define __UM_FIXMAP_H
 
+#include <asm/system.h>
 #include <asm/kmap_types.h>
 #include <asm/archparam.h>
-#include <asm/elf.h>
+#include <asm/page.h>
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -55,9 +56,8 @@
  * the start of the fixmap, and leave one page empty
  * at the top of mem..
  */
-extern unsigned long get_kmem_end(void);
 
-#define FIXADDR_TOP	(get_kmem_end() - 0x2000)
+#define FIXADDR_TOP	(CONFIG_TOP_ADDR - 2 * PAGE_SIZE)
 #define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
 
diff --git a/include/asm-um/ldt.h b/include/asm-um/ldt.h
index b2553f3..52af512 100644
--- a/include/asm-um/ldt.h
+++ b/include/asm-um/ldt.h
@@ -8,7 +8,7 @@
 #ifndef __ASM_LDT_H
 #define __ASM_LDT_H
 
-#include "asm/semaphore.h"
+#include <linux/mutex.h>
 #include "asm/host_ldt.h"
 
 extern void ldt_host_info(void);
@@ -27,7 +27,7 @@
 
 typedef struct uml_ldt {
 	int entry_count;
-	struct semaphore semaphore;
+	struct mutex lock;
 	union {
 		struct ldt_entry * pages[LDT_PAGES_MAX];
 		struct ldt_entry entries[LDT_DIRECT_ENTRIES];
diff --git a/include/asm-um/linkage.h b/include/asm-um/linkage.h
index cdb3024..7dfce37 100644
--- a/include/asm-um/linkage.h
+++ b/include/asm-um/linkage.h
@@ -3,10 +3,4 @@
 
 #include "asm/arch/linkage.h"
 
-
-/* <linux/linkage.h> will pick sane defaults */
-#ifdef CONFIG_GPROF
-#undef fastcall
-#endif
-
 #endif
diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h
index 5f3b863..6686fc5 100644
--- a/include/asm-um/mmu_context.h
+++ b/include/asm-um/mmu_context.h
@@ -6,11 +6,12 @@
 #ifndef __UM_MMU_CONTEXT_H
 #define __UM_MMU_CONTEXT_H
 
-#include <asm-generic/mm_hooks.h>
-
 #include "linux/sched.h"
 #include "um_mmu.h"
 
+extern void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
+extern void arch_exit_mmap(struct mm_struct *mm);
+
 #define get_mmu_context(task) do ; while(0)
 #define activate_context(tsk) do ; while(0)
 
@@ -30,6 +31,8 @@
 	 */
 	if (old != new && (current->flags & PF_BORROWED_MM))
 		__switch_mm(&new->context.id);
+
+	arch_dup_mmap(old, new);
 }
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
diff --git a/include/asm-um/page.h b/include/asm-um/page.h
index 4b424c7..fe2374d 100644
--- a/include/asm-um/page.h
+++ b/include/asm-um/page.h
@@ -30,7 +30,7 @@
 #if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT)
 
 typedef struct { unsigned long pte_low, pte_high; } pte_t;
-typedef struct { unsigned long long pmd; } pmd_t;
+typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
 #define pte_val(x) ((x).pte_low | ((unsigned long long) (x).pte_high << 32))
 
@@ -106,8 +106,8 @@
 #define __pa(virt) to_phys((void *) (unsigned long) (virt))
 #define __va(phys) to_virt((unsigned long) (phys))
 
-#define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
-#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
+#define phys_to_pfn(p) ((pfn_t) ((p) >> PAGE_SHIFT))
+#define pfn_to_phys(pfn) ((phys_t) ((pfn) << PAGE_SHIFT))
 
 #define pfn_valid(pfn) ((pfn) < max_mapnr)
 #define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
diff --git a/include/asm-um/param.h b/include/asm-um/param.h
index f914e7d..4cd4a22 100644
--- a/include/asm-um/param.h
+++ b/include/asm-um/param.h
@@ -10,7 +10,7 @@
 #define MAXHOSTNAMELEN  64      /* max length of hostname */
 
 #ifdef __KERNEL__
-#define HZ 100
+#define HZ CONFIG_HZ
 #define USER_HZ	100	   /* .. some user interfaces are in "ticks" */
 #define CLOCKS_PER_SEC (USER_HZ)  /* frequency at which times() counts */
 #endif
diff --git a/include/asm-um/pgalloc.h b/include/asm-um/pgalloc.h
index 1490487..4f3e62b 100644
--- a/include/asm-um/pgalloc.h
+++ b/include/asm-um/pgalloc.h
@@ -23,17 +23,17 @@
  * Allocate and free page tables.
  */
 extern pgd_t *pgd_alloc(struct mm_struct *);
-extern void pgd_free(pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
 extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long) pte);
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	__free_page(pte);
 }
@@ -42,7 +42,7 @@
 
 #ifdef CONFIG_3_LEVEL_PGTABLES
 
-static inline void pmd_free(pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	free_page((unsigned long)pmd);
 }
diff --git a/include/asm-um/pgtable-2level.h b/include/asm-um/pgtable-2level.h
index 172a75f..f534b73 100644
--- a/include/asm-um/pgtable-2level.h
+++ b/include/asm-um/pgtable-2level.h
@@ -41,9 +41,6 @@
 #define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot))
 #define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot))
 
-#define pmd_page_vaddr(pmd) \
-	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
 /*
  * Bits 0 through 4 are taken
  */
diff --git a/include/asm-um/pgtable-3level.h b/include/asm-um/pgtable-3level.h
index 3ebafba..0446f45 100644
--- a/include/asm-um/pgtable-3level.h
+++ b/include/asm-um/pgtable-3level.h
@@ -11,7 +11,11 @@
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
 
+#ifdef CONFIG_64BIT
 #define PGDIR_SHIFT	30
+#else
+#define PGDIR_SHIFT	31
+#endif
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
@@ -28,9 +32,15 @@
  */
 
 #define PTRS_PER_PTE 512
+#ifdef CONFIG_64BIT
 #define PTRS_PER_PMD 512
-#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE)
 #define PTRS_PER_PGD 512
+#else
+#define PTRS_PER_PMD 1024
+#define PTRS_PER_PGD 1024
+#endif
+
+#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE)
 #define FIRST_USER_ADDRESS	0
 
 #define pte_ERROR(e) \
@@ -49,7 +59,12 @@
 #define pud_populate(mm, pud, pmd) \
 	set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
 
+#ifdef CONFIG_64BIT
 #define set_pud(pudptr, pudval) set_64bit((phys_t *) (pudptr), pud_val(pudval))
+#else
+#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
+#endif
+
 static inline int pgd_newpage(pgd_t pgd)
 {
 	return(pgd_val(pgd) & _PAGE_NEWPAGE);
@@ -57,17 +72,14 @@
 
 static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
 
+#ifdef CONFIG_64BIT
 #define set_pmd(pmdptr, pmdval) set_64bit((phys_t *) (pmdptr), pmd_val(pmdval))
+#else
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#endif
 
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-        pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
-
-        if(pmd)
-                memset(pmd, 0, PAGE_SIZE);
-
-        return pmd;
-}
+struct mm_struct;
+extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address);
 
 static inline void pud_clear (pud_t *pud)
 {
@@ -75,8 +87,7 @@
 }
 
 #define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK)
-#define pud_page_vaddr(pud) \
-	((struct page *) __va(pud_val(pud) & PAGE_MASK))
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
 
 /* Find an entry in the second-level page table.. */
 #define pmd_offset(pud, address) ((pmd_t *) pud_page_vaddr(*(pud)) + \
diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
index 830fc6e..4102b44 100644
--- a/include/asm-um/pgtable.h
+++ b/include/asm-um/pgtable.h
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright 2003 PathScale, Inc.
  * Derived from include/asm-i386/pgtable.h
  * Licensed under the GPL
@@ -8,11 +8,7 @@
 #ifndef __UM_PGTABLE_H
 #define __UM_PGTABLE_H
 
-#include "linux/sched.h"
-#include "linux/linkage.h"
-#include "asm/processor.h"
-#include "asm/page.h"
-#include "asm/fixmap.h"
+#include <asm/fixmap.h>
 
 #define _PAGE_PRESENT	0x001
 #define _PAGE_NEWPAGE	0x002
@@ -34,22 +30,11 @@
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
-extern void *um_virt_to_phys(struct task_struct *task, unsigned long virt,
-			     pte_t *pte_out);
-
 /* zero page used for uninitialized stuff */
 extern unsigned long *empty_zero_page;
 
 #define pgtable_cache_init() do ; while (0)
 
-/*
- * pgd entries used up by user/kernel:
- */
-
-#define USER_PGD_PTRS (TASK_SIZE >> PGDIR_SHIFT)
-#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
-
-#ifndef __ASSEMBLY__
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
@@ -62,16 +47,12 @@
 
 #define VMALLOC_OFFSET	(__va_space)
 #define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
-
 #ifdef CONFIG_HIGHMEM
 # define VMALLOC_END	(PKMAP_BASE-2*PAGE_SIZE)
 #else
 # define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
 #endif
 
-#define REGION_SHIFT	(sizeof(pte_t) * 8 - 4)
-#define REGION_MASK	(((unsigned long) 0xf) << REGION_SHIFT)
-
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -81,11 +62,12 @@
 #define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
-#define PAGE_KERNEL_RO	__pgprot(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
 
 /*
- * The i386 can't do page protection for execute, and considers that the same are read.
- * Also, write permissions imply read permissions. This is the closest we can get..
+ * The i386 can't do page protection for execute, and considers that the same
+ * are read.
+ * Also, write permissions imply read permissions. This is the closest we can
+ * get..
  */
 #define __P000	PAGE_NONE
 #define __P001	PAGE_READONLY
@@ -106,40 +88,16 @@
 #define __S111	PAGE_SHARED
 
 /*
- * Define this if things work differently on an i386 and an i486:
- * it will (on an i486) warn about kernel memory accesses that are
- * done without a 'access_ok(VERIFY_WRITE,..)'
- */
-#undef TEST_VERIFY_AREA
-
-/* page table for 0-4MB for everybody */
-extern unsigned long pg0[1024];
-
-/*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
  */
-
 #define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
 
-/* number of bits that fit into a memory pointer */
-#define BITS_PER_PTR			(8*sizeof(unsigned long))
-
-/* to align the pointer to a pointer address */
-#define PTR_MASK			(~(sizeof(void*)-1))
-
-/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */
-/* 64-bit machines, beware!  SRB. */
-#define SIZEOF_PTR_LOG2			3
-
-/* to find an entry in a page-table */
-#define PAGE_PTR(address) \
-((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK)
-
 #define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE))
 
 #define pmd_none(x)	(!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE))
 #define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+
 #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
 #define pmd_clear(xp)	do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0)
 
@@ -149,14 +107,9 @@
 #define pud_newpage(x)  (pud_val(x) & _PAGE_NEWPAGE)
 #define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE)
 
-#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
-
 #define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK)
 
 #define pte_page(x) pfn_to_page(pte_pfn(x))
-#define pte_address(x) (__va(pte_val(x) & PAGE_MASK))
-#define mk_phys(a, r) ((a) + (((unsigned long) r) << REGION_SHIFT))
-#define phys_addr(p) ((p) & ~REGION_MASK)
 
 #define pte_present(x)	pte_get_bits(x, (_PAGE_PRESENT | _PAGE_PROTNONE))
 
@@ -309,7 +262,8 @@
 
 #define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys))
 #define __virt_to_page(virt) phys_to_page(__pa(virt))
-#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+#define page_to_phys(page) pfn_to_phys((pfn_t) page_to_pfn(page))
+#define virt_to_page(addr) __virt_to_page((const unsigned long) addr)
 
 #define mk_pte(page, pgprot) \
 	({ pte_t pte;					\
@@ -325,8 +279,6 @@
 	return pte; 
 }
 
-#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
 /*
  * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
  *
@@ -335,8 +287,6 @@
  */
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
 
-#define pgd_index_k(addr) pgd_index(addr)
-
 /*
  * pgd_offset() returns a (pgd_t *)
  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
@@ -355,8 +305,12 @@
  * this macro returns the index of the entry in the pmd page which would
  * control the given virtual address
  */
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 
+#define pmd_page_vaddr(pmd) \
+	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
 /*
  * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
  *
@@ -372,6 +326,9 @@
 #define pte_unmap(pte) do { } while (0)
 #define pte_unmap_nested(pte) do { } while (0)
 
+struct mm_struct;
+extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
+
 #define update_mmu_cache(vma,address,pte) do ; while (0)
 
 /* Encode and de-code a swap entry */
@@ -388,29 +345,4 @@
 
 #include <asm-generic/pgtable.h>
 
-#include <asm-generic/pgtable-nopud.h>
-
-#ifdef CONFIG_HIGHMEM
-/* Clear a kernel PTE and flush it from the TLB */
-#define kpte_clear_flush(ptep, vaddr)					\
-do {									\
-	pte_clear(&init_mm, vaddr, ptep);				\
-	__flush_tlb_one(vaddr);						\
-} while (0)
 #endif
-
-#endif
-#endif
-
-#define virt_to_page(addr) __virt_to_page((const unsigned long) addr)
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h
index 78c0599..b7d9a16 100644
--- a/include/asm-um/processor-generic.h
+++ b/include/asm-um/processor-generic.h
@@ -11,6 +11,7 @@
 struct task_struct;
 
 #include "asm/ptrace.h"
+#include "asm/pgtable.h"
 #include "registers.h"
 #include "sysdep/archsetjmp.h"
 
@@ -26,7 +27,6 @@
 	 * as of 2.6.11).
 	 */
 	int forking;
-	int nsyscalls;
 	struct pt_regs regs;
 	int singlestep_syscall;
 	void *fault_addr;
@@ -58,7 +58,6 @@
 #define INIT_THREAD \
 { \
 	.forking		= 0, \
-	.nsyscalls		= 0, \
 	.regs		   	= EMPTY_REGS,	\
 	.fault_addr		= NULL, \
 	.prev_sched		= NULL, \
@@ -68,10 +67,6 @@
 	.request		= { 0 } \
 }
 
-typedef struct {
-	unsigned long seg;
-} mm_segment_t;
-
 extern struct task_struct *alloc_task_struct(void);
 
 static inline void release_thread(struct task_struct *task)
@@ -97,9 +92,7 @@
 /*
  * User space process size: 3GB (default).
  */
-extern unsigned long task_size;
-
-#define TASK_SIZE	(task_size)
+#define TASK_SIZE (CONFIG_TOP_ADDR & PGDIR_MASK)
 
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
@@ -128,6 +121,6 @@
 
 
 #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
-#define get_wchan(p) (0)
+extern unsigned long get_wchan(struct task_struct *p);
 
 #endif
diff --git a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h
index 595f1c3..a2b7fe1 100644
--- a/include/asm-um/processor-i386.h
+++ b/include/asm-um/processor-i386.h
@@ -10,7 +10,6 @@
 #include "asm/host_ldt.h"
 #include "asm/segment.h"
 
-extern int host_has_xmm;
 extern int host_has_cmov;
 
 /* include faultinfo structure */
diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
index 6e5fd5c..356b83e 100644
--- a/include/asm-um/thread_info.h
+++ b/include/asm-um/thread_info.h
@@ -1,5 +1,5 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -8,8 +8,9 @@
 
 #ifndef __ASSEMBLY__
 
-#include <asm/processor.h>
 #include <asm/types.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
@@ -75,8 +76,8 @@
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_POLLING_NRFLAG      3       /* true if poll_idle() is polling 
-					 * TIF_NEED_RESCHED 
+#define TIF_POLLING_NRFLAG      3       /* true if poll_idle() is polling
+					 * TIF_NEED_RESCHED
 					 */
 #define TIF_RESTART_BLOCK 	4
 #define TIF_MEMDIE	 	5
diff --git a/include/asm-um/tlb.h b/include/asm-um/tlb.h
index c640033..39fc475 100644
--- a/include/asm-um/tlb.h
+++ b/include/asm-um/tlb.h
@@ -1,6 +1,126 @@
 #ifndef __UM_TLB_H
 #define __UM_TLB_H
 
-#include <asm/arch/tlb.h>
+#include <linux/swap.h>
+#include <asm/percpu.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+
+/* struct mmu_gather is an opaque type used by the mm code for passing around
+ * any data needed by arch specific code for tlb_remove_page.
+ */
+struct mmu_gather {
+	struct mm_struct	*mm;
+	unsigned int		need_flush; /* Really unmapped some ptes? */
+	unsigned long		start;
+	unsigned long		end;
+	unsigned int		fullmm; /* non-zero means full mm flush */
+};
+
+/* Users of the generic TLB shootdown code must declare this storage space. */
+DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
+					  unsigned long address)
+{
+	if (tlb->start > address)
+		tlb->start = address;
+	if (tlb->end < address + PAGE_SIZE)
+		tlb->end = address + PAGE_SIZE;
+}
+
+static inline void init_tlb_gather(struct mmu_gather *tlb)
+{
+	tlb->need_flush = 0;
+
+	tlb->start = TASK_SIZE;
+	tlb->end = 0;
+
+	if (tlb->fullmm) {
+		tlb->start = 0;
+		tlb->end = TASK_SIZE;
+	}
+}
+
+/* tlb_gather_mmu
+ *	Return a pointer to an initialized struct mmu_gather.
+ */
+static inline struct mmu_gather *
+tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+{
+	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+
+	tlb->mm = mm;
+	tlb->fullmm = full_mm_flush;
+
+	init_tlb_gather(tlb);
+
+	return tlb;
+}
+
+extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+			       unsigned long end);
+
+static inline void
+tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+{
+	if (!tlb->need_flush)
+		return;
+
+	flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end);
+	init_tlb_gather(tlb);
+}
+
+/* tlb_finish_mmu
+ *	Called at the end of the shootdown operation to free up any resources
+ *	that were required.
+ */
+static inline void
+tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+{
+	tlb_flush_mmu(tlb, start, end);
+
+	/* keep the page table cache within bounds */
+	check_pgt_cache();
+
+	put_cpu_var(mmu_gathers);
+}
+
+/* tlb_remove_page
+ *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)),
+ *	while handling the additional races in SMP caused by other CPUs
+ *	caching valid mappings in their TLBs.
+ */
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	tlb->need_flush = 1;
+	free_page_and_swap_cache(page);
+	return;
+}
+
+/**
+ * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
+ *
+ * Record the fact that pte's were really umapped in ->need_flush, so we can
+ * later optimise away the tlb invalidate.   This helps when userspace is
+ * unmapping already-unmapped pages, which happens quite a lot.
+ */
+#define tlb_remove_tlb_entry(tlb, ptep, address)		\
+	do {							\
+		tlb->need_flush = 1;				\
+		__tlb_remove_tlb_entry(tlb, ptep, address);	\
+	} while (0)
+
+#define pte_free_tlb(tlb, ptep) __pte_free_tlb(tlb, ptep)
+
+#define pud_free_tlb(tlb, pudp) __pud_free_tlb(tlb, pudp)
+
+#define pmd_free_tlb(tlb, pmdp) __pmd_free_tlb(tlb, pmdp)
+
+#define tlb_migrate_finish(mm) do {} while (0)
 
 #endif
diff --git a/include/asm-um/uaccess.h b/include/asm-um/uaccess.h
index 077032d..b9a895d 100644
--- a/include/asm-um/uaccess.h
+++ b/include/asm-um/uaccess.h
@@ -6,7 +6,15 @@
 #ifndef __UM_UACCESS_H
 #define __UM_UACCESS_H
 
-#include "linux/sched.h"
+#include <asm/errno.h>
+#include <asm/processor.h>
+
+/* thread_info has a mm_segment_t in it, so put the definition up here */
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+#include "linux/thread_info.h"
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index 48adbf5..aaf1519 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -37,12 +37,6 @@
   ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \
 	find_next_zero_bit(addr,size,off)))
 
-/* 
- * Find string of zero bits in a bitmap. -1 when not found.
- */ 
-extern unsigned long 
-find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len);
-
 static inline void set_bit_string(unsigned long *bitmap, unsigned long i, 
 				  int len) 
 { 
@@ -53,16 +47,6 @@
 	}
 } 
 
-static inline void __clear_bit_string(unsigned long *bitmap, unsigned long i, 
-				    int len) 
-{ 
-	unsigned long end = i + len; 
-	while (i < end) {
-		__clear_bit(i, bitmap); 
-		i++;
-	}
-} 
-
 /**
  * ffz - find first zero in word.
  * @word: The word to search
diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h
index c25cfca..479767c 100644
--- a/include/asm-x86/highmem.h
+++ b/include/asm-x86/highmem.h
@@ -38,11 +38,6 @@
  * easily, subsequent pte tables have to be allocated in one physical
  * chunk of RAM.
  */
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
 /*
  * Ordering is:
  *
@@ -58,7 +53,6 @@
  * VMALLOC_START
  * high_memory
  */
-#define PKMAP_BASE ( (FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK )
 #define LAST_PKMAP_MASK (LAST_PKMAP-1)
 #define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h
index 6c21ef9..bab1271 100644
--- a/include/asm-x86/pgalloc_32.h
+++ b/include/asm-x86/pgalloc_32.h
@@ -36,17 +36,17 @@
  * Allocate and free page tables.
  */
 extern pgd_t *pgd_alloc(struct mm_struct *);
-extern void pgd_free(pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
 extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	__free_page(pte);
 }
@@ -63,7 +63,7 @@
 	return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 }
 
-static inline void pmd_free(pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
 	free_page((unsigned long)pmd);
diff --git a/include/asm-x86/pgalloc_64.h b/include/asm-x86/pgalloc_64.h
index 8bb5646..315314c 100644
--- a/include/asm-x86/pgalloc_64.h
+++ b/include/asm-x86/pgalloc_64.h
@@ -17,7 +17,7 @@
 	set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
 }
 
-static inline void pmd_free(pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
 	free_page((unsigned long)pmd);
@@ -33,7 +33,7 @@
 	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 }
 
-static inline void pud_free (pud_t *pud)
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
 	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
 	free_page((unsigned long)pud);
@@ -77,7 +77,7 @@
 	return pgd;
 }
 
-static inline void pgd_free(pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
 	pgd_list_del(pgd);
@@ -100,13 +100,13 @@
 /* Should really implement gc for free page table pages. This could be
    done with a reference count in struct page. */
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
 	free_page((unsigned long)pte); 
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	__free_page(pte);
 } 
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 935630d..80dd438 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -66,6 +66,14 @@
 #define VMALLOC_OFFSET	(8*1024*1024)
 #define VMALLOC_START	(((unsigned long) high_memory + \
 			2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK)
+
 #ifdef CONFIG_HIGHMEM
 # define VMALLOC_END	(PKMAP_BASE-2*PAGE_SIZE)
 #else
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index 8d8f9b5..984123a 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -327,9 +327,11 @@
 #define __NR_epoll_pwait	319
 #define __NR_utimensat		320
 #define __NR_signalfd		321
-#define __NR_timerfd		322
+#define __NR_timerfd_create	322
 #define __NR_eventfd		323
 #define __NR_fallocate		324
+#define __NR_timerfd_settime	325
+#define __NR_timerfd_gettime	326
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index 5ff4d3e..3883ceb 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -629,12 +629,17 @@
 __SYSCALL(__NR_epoll_pwait, sys_epoll_pwait)
 #define __NR_signalfd				282
 __SYSCALL(__NR_signalfd, sys_signalfd)
-#define __NR_timerfd				283
-__SYSCALL(__NR_timerfd, sys_timerfd)
+#define __NR_timerfd_create			283
+__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
 #define __NR_eventfd				284
 __SYSCALL(__NR_eventfd, sys_eventfd)
 #define __NR_fallocate				285
 __SYSCALL(__NR_fallocate, sys_fallocate)
+#define __NR_timerfd_settime			286
+__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
+#define __NR_timerfd_gettime			287
+__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
+
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/include/asm-xtensa/pgalloc.h b/include/asm-xtensa/pgalloc.h
index 3e5b565..1d51ba5 100644
--- a/include/asm-xtensa/pgalloc.h
+++ b/include/asm-xtensa/pgalloc.h
@@ -31,7 +31,7 @@
 	return (pgd_t*) __get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
 }
 
-static inline void pgd_free(pgd_t *pgd)
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	free_page((unsigned long)pgd);
 }
@@ -52,12 +52,12 @@
 	return virt_to_page(pte_alloc_one_kernel(mm, addr));
 }
 
-static inline void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	kmem_cache_free(pgtable_cache, pte);
 }
 
-static inline void pte_free(struct page *page)
+static inline void pte_free(struct mm_struct *mm, struct page *page)
 {
 	kmem_cache_free(pgtable_cache, page_address(page));
 }
diff --git a/include/asm-xtensa/tlb.h b/include/asm-xtensa/tlb.h
index 4830232..31c220f 100644
--- a/include/asm-xtensa/tlb.h
+++ b/include/asm-xtensa/tlb.h
@@ -42,6 +42,6 @@
 
 #include <asm-generic/tlb.h>
 
-#define __pte_free_tlb(tlb,pte)			pte_free(pte)
+#define __pte_free_tlb(tlb, pte)		pte_free((tlb)->mm, pte)
 
 #endif	/* _XTENSA_TLB_H */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index c0f9bb7..9363122 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -219,6 +219,7 @@
 unifdef-y += icmp.h
 unifdef-y += icmpv6.h
 unifdef-y += if_addr.h
+unifdef-y += if_addrlabel.h
 unifdef-y += if_arp.h
 unifdef-y += if_bridge.h
 unifdef-y += if_ec.h
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index abc521c..03e3454 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -109,6 +109,7 @@
 extern void agp_enable(struct agp_bridge_data *, u32);
 extern struct agp_bridge_data *agp_backend_acquire(struct pci_dev *);
 extern void agp_backend_release(struct agp_bridge_data *);
+extern void agp_flush_chipset(struct agp_bridge_data *);
 
 #endif				/* __KERNEL__ */
 #endif				/* _AGP_BACKEND_H */
diff --git a/include/linux/agpgart.h b/include/linux/agpgart.h
index 09fbf7e..62aef58 100644
--- a/include/linux/agpgart.h
+++ b/include/linux/agpgart.h
@@ -38,6 +38,7 @@
 #define AGPIOC_DEALLOCATE _IOW (AGPIOC_BASE, 7, int)
 #define AGPIOC_BIND       _IOW (AGPIOC_BASE, 8, struct agp_bind*)
 #define AGPIOC_UNBIND     _IOW (AGPIOC_BASE, 9, struct agp_unbind*)
+#define AGPIOC_CHIPSET_FLUSH _IO (AGPIOC_BASE, 10)
 
 #define AGP_DEVICE      "/dev/agpgart"
 
diff --git a/drivers/serial/atmel_serial.h b/include/linux/atmel_serial.h
similarity index 98%
rename from drivers/serial/atmel_serial.h
rename to include/linux/atmel_serial.h
index e014177..fd68337 100644
--- a/drivers/serial/atmel_serial.h
+++ b/include/linux/atmel_serial.h
@@ -1,5 +1,5 @@
 /*
- * drivers/serial/atmel_serial.h
+ * include/linux/atmel_serial.h
  *
  * Copyright (C) 2005 Ivan Kokshaysky
  * Copyright (C) SAN People
diff --git a/include/linux/capability.h b/include/linux/capability.h
index bb017ed..7d50ff6 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -14,7 +14,6 @@
 #define _LINUX_CAPABILITY_H
 
 #include <linux/types.h>
-#include <linux/compiler.h>
 
 struct task_struct;
 
@@ -23,13 +22,20 @@
    kernel might be somewhat backwards compatible, but don't bet on
    it. */
 
-/* XXX - Note, cap_t, is defined by POSIX to be an "opaque" pointer to
+/* Note, cap_t, is defined by POSIX (draft) to be an "opaque" pointer to
    a set of three capability sets.  The transposition of 3*the
    following structure to such a composite is better handled in a user
    library since the draft standard requires the use of malloc/free
    etc.. */
 
-#define _LINUX_CAPABILITY_VERSION  0x19980330
+#define _LINUX_CAPABILITY_VERSION_1  0x19980330
+#define _LINUX_CAPABILITY_U32S_1     1
+
+#define _LINUX_CAPABILITY_VERSION_2  0x20071026
+#define _LINUX_CAPABILITY_U32S_2     2
+
+#define _LINUX_CAPABILITY_VERSION    _LINUX_CAPABILITY_VERSION_2
+#define _LINUX_CAPABILITY_U32S       _LINUX_CAPABILITY_U32S_2
 
 typedef struct __user_cap_header_struct {
 	__u32 version;
@@ -42,41 +48,42 @@
         __u32 inheritable;
 } __user *cap_user_data_t;
 
+
 #define XATTR_CAPS_SUFFIX "capability"
 #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
 
-#define XATTR_CAPS_SZ (3*sizeof(__le32))
 #define VFS_CAP_REVISION_MASK	0xFF000000
-#define VFS_CAP_REVISION_1	0x01000000
-
-#define VFS_CAP_REVISION	VFS_CAP_REVISION_1
-
 #define VFS_CAP_FLAGS_MASK	~VFS_CAP_REVISION_MASK
 #define VFS_CAP_FLAGS_EFFECTIVE	0x000001
 
+#define VFS_CAP_REVISION_1	0x01000000
+#define VFS_CAP_U32_1           1
+#define XATTR_CAPS_SZ_1         (sizeof(__le32)*(1 + 2*VFS_CAP_U32_1))
+
+#define VFS_CAP_REVISION_2	0x02000000
+#define VFS_CAP_U32_2           2
+#define XATTR_CAPS_SZ_2         (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2))
+
+#define XATTR_CAPS_SZ           XATTR_CAPS_SZ_2
+#define VFS_CAP_U32             VFS_CAP_U32_2
+#define VFS_CAP_REVISION	VFS_CAP_REVISION_2
+
+
 struct vfs_cap_data {
-	__u32 magic_etc;  /* Little endian */
-	__u32 permitted;    /* Little endian */
-	__u32 inheritable;  /* Little endian */
+	__le32 magic_etc;            /* Little endian */
+	struct {
+		__le32 permitted;    /* Little endian */
+		__le32 inheritable;  /* Little endian */
+	} data[VFS_CAP_U32];
 };
 
 #ifdef __KERNEL__
 
-/* #define STRICT_CAP_T_TYPECHECKS */
-
-#ifdef STRICT_CAP_T_TYPECHECKS
-
 typedef struct kernel_cap_struct {
-	__u32 cap;
+	__u32 cap[_LINUX_CAPABILITY_U32S];
 } kernel_cap_t;
 
-#else
-
-typedef __u32 kernel_cap_t;
-
-#endif
-
-#define _USER_CAP_HEADER_SIZE  (2*sizeof(__u32))
+#define _USER_CAP_HEADER_SIZE  (sizeof(struct __user_cap_header_struct))
 #define _KERNEL_CAP_T_SIZE     (sizeof(kernel_cap_t))
 
 #endif
@@ -119,10 +126,6 @@
 
 #define CAP_FSETID           4
 
-/* Used to decide between falling back on the old suser() or fsuser(). */
-
-#define CAP_FS_MASK          0x1f
-
 /* Overrides the restriction that the real or effective user ID of a
    process sending a signal must match the real or effective user ID
    of the process receiving the signal. */
@@ -145,8 +148,14 @@
  ** Linux-specific capabilities
  **/
 
-/* Transfer any capability in your permitted set to any pid,
-   remove any capability in your permitted set from any pid */
+/* Without VFS support for capabilities:
+ *   Transfer any capability in your permitted set to any pid,
+ *   remove any capability in your permitted set from any pid
+ * With VFS support for capabilities (neither of above, but)
+ *   Add any capability from current's capability bounding set
+ *       to the current process' inheritable set
+ *   Allow taking bits out of capability bounding set
+ */
 
 #define CAP_SETPCAP          8
 
@@ -195,7 +204,6 @@
 #define CAP_IPC_OWNER        15
 
 /* Insert and remove kernel modules - modify kernel without limit */
-/* Modify cap_bset */
 #define CAP_SYS_MODULE       16
 
 /* Allow ioperm/iopl access */
@@ -307,74 +315,183 @@
 
 #define CAP_SETFCAP	     31
 
+/* Override MAC access.
+   The base kernel enforces no MAC policy.
+   An LSM may enforce a MAC policy, and if it does and it chooses
+   to implement capability based overrides of that policy, this is
+   the capability it should use to do so. */
+
+#define CAP_MAC_OVERRIDE     32
+
+/* Allow MAC configuration or state changes.
+   The base kernel requires no MAC configuration.
+   An LSM may enforce a MAC policy, and if it does and it chooses
+   to implement capability based checks on modifications to that
+   policy or the data required to maintain it, this is the
+   capability it should use to do so. */
+
+#define CAP_MAC_ADMIN        33
+
+#define CAP_LAST_CAP         CAP_MAC_ADMIN
+
+#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
+
+/*
+ * Bit location of each capability (used by user-space library and kernel)
+ */
+
+#define CAP_TO_INDEX(x)     ((x) >> 5)        /* 1 << 5 == bits in __u32 */
+#define CAP_TO_MASK(x)      (1 << ((x) & 31)) /* mask for indexed __u32 */
+
 #ifdef __KERNEL__
 
 /*
  * Internal kernel functions only
  */
 
-#ifdef STRICT_CAP_T_TYPECHECKS
+#define CAP_FOR_EACH_U32(__capi)  \
+	for (__capi = 0; __capi < _LINUX_CAPABILITY_U32S; ++__capi)
 
-#define to_cap_t(x) { x }
-#define cap_t(x) (x).cap
+# define CAP_FS_MASK_B0     (CAP_TO_MASK(CAP_CHOWN)		\
+			    | CAP_TO_MASK(CAP_DAC_OVERRIDE)	\
+			    | CAP_TO_MASK(CAP_DAC_READ_SEARCH)	\
+			    | CAP_TO_MASK(CAP_FOWNER)		\
+			    | CAP_TO_MASK(CAP_FSETID))
 
-#else
+# define CAP_FS_MASK_B1     (CAP_TO_MASK(CAP_MAC_OVERRIDE))
 
-#define to_cap_t(x) (x)
-#define cap_t(x) (x)
+#if _LINUX_CAPABILITY_U32S != 2
+# error Fix up hand-coded capability macro initializers
+#else /* HAND-CODED capability initializers */
 
-#endif
+# define CAP_EMPTY_SET    {{ 0, 0 }}
+# define CAP_FULL_SET     {{ ~0, ~0 }}
+# define CAP_INIT_EFF_SET {{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }}
+# define CAP_FS_SET       {{ CAP_FS_MASK_B0, CAP_FS_MASK_B1 } }
+# define CAP_NFSD_SET     {{ CAP_FS_MASK_B0|CAP_TO_MASK(CAP_SYS_RESOURCE), \
+			     CAP_FS_MASK_B1 } }
 
-#define CAP_EMPTY_SET       to_cap_t(0)
-#define CAP_FULL_SET        to_cap_t(~0)
-#define CAP_INIT_EFF_SET    to_cap_t(~0 & ~CAP_TO_MASK(CAP_SETPCAP))
-#define CAP_INIT_INH_SET    to_cap_t(0)
+#endif /* _LINUX_CAPABILITY_U32S != 2 */
 
-#define CAP_TO_MASK(x) (1 << (x))
-#define cap_raise(c, flag)   (cap_t(c) |=  CAP_TO_MASK(flag))
-#define cap_lower(c, flag)   (cap_t(c) &= ~CAP_TO_MASK(flag))
-#define cap_raised(c, flag)  (cap_t(c) & CAP_TO_MASK(flag))
+#define CAP_INIT_INH_SET    CAP_EMPTY_SET
 
-static inline kernel_cap_t cap_combine(kernel_cap_t a, kernel_cap_t b)
+# define cap_clear(c)         do { (c) = __cap_empty_set; } while (0)
+# define cap_set_full(c)      do { (c) = __cap_full_set; } while (0)
+# define cap_set_init_eff(c)  do { (c) = __cap_init_eff_set; } while (0)
+
+#define cap_raise(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
+#define cap_lower(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag))
+#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag))
+
+#define CAP_BOP_ALL(c, a, b, OP)                                    \
+do {                                                                \
+	unsigned __capi;                                            \
+	CAP_FOR_EACH_U32(__capi) {                                  \
+		c.cap[__capi] = a.cap[__capi] OP b.cap[__capi];     \
+	}                                                           \
+} while (0)
+
+#define CAP_UOP_ALL(c, a, OP)                                       \
+do {                                                                \
+	unsigned __capi;                                            \
+	CAP_FOR_EACH_U32(__capi) {                                  \
+		c.cap[__capi] = OP a.cap[__capi];                   \
+	}                                                           \
+} while (0)
+
+static inline kernel_cap_t cap_combine(const kernel_cap_t a,
+				       const kernel_cap_t b)
 {
-     kernel_cap_t dest;
-     cap_t(dest) = cap_t(a) | cap_t(b);
-     return dest;
+	kernel_cap_t dest;
+	CAP_BOP_ALL(dest, a, b, |);
+	return dest;
 }
 
-static inline kernel_cap_t cap_intersect(kernel_cap_t a, kernel_cap_t b)
+static inline kernel_cap_t cap_intersect(const kernel_cap_t a,
+					 const kernel_cap_t b)
 {
-     kernel_cap_t dest;
-     cap_t(dest) = cap_t(a) & cap_t(b);
-     return dest;
+	kernel_cap_t dest;
+	CAP_BOP_ALL(dest, a, b, &);
+	return dest;
 }
 
-static inline kernel_cap_t cap_drop(kernel_cap_t a, kernel_cap_t drop)
+static inline kernel_cap_t cap_drop(const kernel_cap_t a,
+				    const kernel_cap_t drop)
 {
-     kernel_cap_t dest;
-     cap_t(dest) = cap_t(a) & ~cap_t(drop);
-     return dest;
+	kernel_cap_t dest;
+	CAP_BOP_ALL(dest, a, drop, &~);
+	return dest;
 }
 
-static inline kernel_cap_t cap_invert(kernel_cap_t c)
+static inline kernel_cap_t cap_invert(const kernel_cap_t c)
 {
-     kernel_cap_t dest;
-     cap_t(dest) = ~cap_t(c);
-     return dest;
+	kernel_cap_t dest;
+	CAP_UOP_ALL(dest, c, ~);
+	return dest;
 }
 
-#define cap_isclear(c)       (!cap_t(c))
-#define cap_issubset(a,set)  (!(cap_t(a) & ~cap_t(set)))
+static inline int cap_isclear(const kernel_cap_t a)
+{
+	unsigned __capi;
+	CAP_FOR_EACH_U32(__capi) {
+		if (a.cap[__capi] != 0)
+			return 0;
+	}
+	return 1;
+}
 
-#define cap_clear(c)         do { cap_t(c) =  0; } while(0)
-#define cap_set_full(c)      do { cap_t(c) = ~0; } while(0)
-#define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
+static inline int cap_issubset(const kernel_cap_t a, const kernel_cap_t set)
+{
+	kernel_cap_t dest;
+	dest = cap_drop(a, set);
+	return cap_isclear(dest);
+}
 
-#define cap_is_fs_cap(c)     (CAP_TO_MASK(c) & CAP_FS_MASK)
+/* Used to decide between falling back on the old suser() or fsuser(). */
+
+static inline int cap_is_fs_cap(int cap)
+{
+	const kernel_cap_t __cap_fs_set = CAP_FS_SET;
+	return !!(CAP_TO_MASK(cap) & __cap_fs_set.cap[CAP_TO_INDEX(cap)]);
+}
+
+static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a)
+{
+	const kernel_cap_t __cap_fs_set = CAP_FS_SET;
+	return cap_drop(a, __cap_fs_set);
+}
+
+static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a,
+					    const kernel_cap_t permitted)
+{
+	const kernel_cap_t __cap_fs_set = CAP_FS_SET;
+	return cap_combine(a,
+			   cap_intersect(permitted, __cap_fs_set));
+}
+
+static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a)
+{
+	const kernel_cap_t __cap_fs_set = CAP_NFSD_SET;
+	return cap_drop(a, __cap_fs_set);
+}
+
+static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a,
+					      const kernel_cap_t permitted)
+{
+	const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET;
+	return cap_combine(a,
+			   cap_intersect(permitted, __cap_nfsd_set));
+}
+
+extern const kernel_cap_t __cap_empty_set;
+extern const kernel_cap_t __cap_full_set;
+extern const kernel_cap_t __cap_init_eff_set;
 
 int capable(int cap);
 int __capable(struct task_struct *t, int cap);
 
+extern long cap_prctl_drop(unsigned long cap);
+
 #endif /* __KERNEL__ */
 
 #endif /* !_LINUX_CAPABILITY_H */
diff --git a/include/linux/compat.h b/include/linux/compat.h
index d38655f..ae0a483 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -279,8 +279,11 @@
 asmlinkage long compat_sys_signalfd(int ufd,
 				const compat_sigset_t __user *sigmask,
                                 compat_size_t sigsetsize);
-asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags,
-				const struct compat_itimerspec __user *utmr);
+asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
+				   const struct compat_itimerspec __user *utmr,
+				   struct compat_itimerspec __user *otmr);
+asmlinkage long compat_sys_timerfd_gettime(int ufd,
+				   struct compat_itimerspec __user *otmr);
 
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff --git a/include/linux/device.h b/include/linux/device.h
index 479c0b3..2258d89 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -410,6 +410,15 @@
 extern void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp);
 extern void devm_kfree(struct device *dev, void *p);
 
+struct device_dma_parameters {
+	/*
+	 * a low level driver may set these to teach IOMMU code about
+	 * sg limitations.
+	 */
+	unsigned int max_segment_size;
+	unsigned long segment_boundary_mask;
+};
+
 struct device {
 	struct klist		klist_children;
 	struct klist_node	knode_parent;	/* node in sibling list */
@@ -445,6 +454,8 @@
 					     64 bit addresses for consistent
 					     allocations such descriptors. */
 
+	struct device_dma_parameters *dma_parms;
+
 	struct list_head	dma_pools;	/* dma pools (if dma'ble) */
 
 	struct dma_coherent_mem	*dma_mem; /* internal for coherent mem
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4470950..3320307 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -60,6 +60,36 @@
 
 extern u64 dma_get_required_mask(struct device *dev);
 
+static inline unsigned int dma_get_max_seg_size(struct device *dev)
+{
+	return dev->dma_parms ? dev->dma_parms->max_segment_size : 65536;
+}
+
+static inline unsigned int dma_set_max_seg_size(struct device *dev,
+						unsigned int size)
+{
+	if (dev->dma_parms) {
+		dev->dma_parms->max_segment_size = size;
+		return 0;
+	} else
+		return -EIO;
+}
+
+static inline unsigned long dma_get_seg_boundary(struct device *dev)
+{
+	return dev->dma_parms ?
+		dev->dma_parms->segment_boundary_mask : 0xffffffff;
+}
+
+static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
+{
+	if (dev->dma_parms) {
+		dev->dma_parms->segment_boundary_mask = mask;
+		return 0;
+	} else
+		return -EIO;
+}
+
 /* flags for the coherent memory api */
 #define	DMA_MEMORY_MAP			0x01
 #define DMA_MEMORY_IO			0x02
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7e93a9a..0c6ce51 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -228,5 +228,7 @@
 
 void page_alloc_init(void);
 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
+void drain_all_pages(void);
+void drain_local_pages(void *dummy);
 
 #endif /* __LINUX_GFP_H */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 1fcb003..7dcbc82 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -68,8 +68,6 @@
 	void *addr = kmap_atomic(page, KM_USER0);
 	clear_user_page(addr, vaddr, page);
 	kunmap_atomic(addr, KM_USER0);
-	/* Make sure this page is cleared on other CPU's too before using it */
-	smp_wmb();
 }
 
 #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
@@ -124,28 +122,40 @@
 	kunmap_atomic(kaddr, KM_USER0);
 }
 
-/*
- * Same but also flushes aliased cache contents to RAM.
- *
- * This must be a macro because KM_USER0 and friends aren't defined if
- * !CONFIG_HIGHMEM
- */
-#define zero_user_page(page, offset, size, km_type)		\
-	do {							\
-		void *kaddr;					\
-								\
-		BUG_ON((offset) + (size) > PAGE_SIZE);		\
-								\
-		kaddr = kmap_atomic(page, km_type);		\
-		memset((char *)kaddr + (offset), 0, (size));	\
-		flush_dcache_page(page);			\
-		kunmap_atomic(kaddr, (km_type));		\
-	} while (0)
+static inline void zero_user_segments(struct page *page,
+	unsigned start1, unsigned end1,
+	unsigned start2, unsigned end2)
+{
+	void *kaddr = kmap_atomic(page, KM_USER0);
+
+	BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE);
+
+	if (end1 > start1)
+		memset(kaddr + start1, 0, end1 - start1);
+
+	if (end2 > start2)
+		memset(kaddr + start2, 0, end2 - start2);
+
+	kunmap_atomic(kaddr, KM_USER0);
+	flush_dcache_page(page);
+}
+
+static inline void zero_user_segment(struct page *page,
+	unsigned start, unsigned end)
+{
+	zero_user_segments(page, start, end, 0, 0);
+}
+
+static inline void zero_user(struct page *page,
+	unsigned start, unsigned size)
+{
+	zero_user_segments(page, start, start + size, 0, 0);
+}
 
 static inline void __deprecated memclear_highpage_flush(struct page *page,
 			unsigned int offset, unsigned int size)
 {
-	zero_user_page(page, offset, size, KM_USER0);
+	zero_user(page, offset, size);
 }
 
 #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE
@@ -160,8 +170,6 @@
 	copy_user_page(vto, vfrom, vaddr, to);
 	kunmap_atomic(vfrom, KM_USER0);
 	kunmap_atomic(vto, KM_USER1);
-	/* Make sure this page is cleared on other CPU's too before using it */
-	smp_wmb();
 }
 
 #endif
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f79dcba..8371b66 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -301,9 +301,16 @@
 }
 
 /* Forward a hrtimer so it expires after now: */
-extern unsigned long
+extern u64
 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
 
+/* Forward a hrtimer so it expires after the hrtimer's current now */
+static inline u64 hrtimer_forward_now(struct hrtimer *timer,
+				      ktime_t interval)
+{
+	return hrtimer_forward(timer, timer->base->get_time(), interval);
+}
+
 /* Precise sleep: */
 extern long hrtimer_nanosleep(struct timespec *rqtp,
 			      struct timespec *rmtp,
@@ -322,9 +329,9 @@
 extern void __init hrtimers_init(void);
 
 #if BITS_PER_LONG < 64
-extern unsigned long ktime_divns(const ktime_t kt, s64 div);
+extern u64 ktime_divns(const ktime_t kt, s64 div);
 #else /* BITS_PER_LONG < 64 */
-# define ktime_divns(kt, div)		(unsigned long)((kt).tv64 / (div))
+# define ktime_divns(kt, div)		(u64)((kt).tv64 / (div))
 #endif
 
 /* Show pending timers: */
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 85d1191..4213182 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -44,7 +44,15 @@
 /** Register a new Hardware Random Number Generator driver. */
 extern int hwrng_register(struct hwrng *rng);
 /** Unregister a Hardware Random Number Generator driver. */
-extern void hwrng_unregister(struct hwrng *rng);
+extern void __hwrng_unregister(struct hwrng *rng, bool suspended);
+static inline void hwrng_unregister(struct hwrng *rng)
+{
+	__hwrng_unregister(rng, false);
+}
+static inline void hwrng_unregister_suspended(struct hwrng *rng)
+{
+	__hwrng_unregister(rng, true);
+}
 
 #endif /* __KERNEL__ */
 #endif /* LINUX_HWRANDOM_H_ */
diff --git a/include/linux/i2c/pca9539.h b/include/linux/i2c/pca9539.h
new file mode 100644
index 0000000..611d84a
--- /dev/null
+++ b/include/linux/i2c/pca9539.h
@@ -0,0 +1,18 @@
+/* platform data for the PCA9539 16-bit I/O expander driver */
+
+struct pca9539_platform_data {
+	/* number of the first GPIO */
+	unsigned	gpio_base;
+
+	/* initial polarity inversion setting */
+	uint16_t	invert;
+
+	void		*context;	/* param to setup/teardown */
+
+	int		(*setup)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+	int		(*teardown)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+};
diff --git a/include/linux/i2c/pcf857x.h b/include/linux/i2c/pcf857x.h
new file mode 100644
index 0000000..ba8ea6e
--- /dev/null
+++ b/include/linux/i2c/pcf857x.h
@@ -0,0 +1,45 @@
+#ifndef __LINUX_PCF857X_H
+#define __LINUX_PCF857X_H
+
+/**
+ * struct pcf857x_platform_data - data to set up pcf857x driver
+ * @gpio_base: number of the chip's first GPIO
+ * @n_latch: optional bit-inverse of initial register value; if
+ *	you leave this initialized to zero the driver will act
+ *	like the chip was just reset
+ * @setup: optional callback issued once the GPIOs are valid
+ * @teardown: optional callback issued before the GPIOs are invalidated
+ * @context: optional parameter passed to setup() and teardown()
+ *
+ * In addition to the I2C_BOARD_INFO() state appropriate to each chip,
+ * the i2c_board_info used with the pcf875x driver must provide the
+ * chip "type" ("pcf8574", "pcf8574a", "pcf8575", "pcf8575c") and its
+ * platform_data (pointer to one of these structures) with at least
+ * the gpio_base value initialized.
+ *
+ * The @setup callback may be used with the kind of board-specific glue
+ * which hands the (now-valid) GPIOs to other drivers, or which puts
+ * devices in their initial states using these GPIOs.
+ *
+ * These GPIO chips are only "quasi-bidirectional"; read the chip specs
+ * to understand the behavior.  They don't have separate registers to
+ * record which pins are used for input or output, record which output
+ * values are driven, or provide access to input values.  That must be
+ * inferred by reading the chip's value and knowing the last value written
+ * to it.  If you leave n_latch initialized to zero, that last written
+ * value is presumed to be all ones (as if the chip were just reset).
+ */
+struct pcf857x_platform_data {
+	unsigned	gpio_base;
+	unsigned	n_latch;
+
+	int		(*setup)(struct i2c_client *client,
+					int gpio, unsigned ngpio,
+					void *context);
+	int		(*teardown)(struct i2c_client *client,
+					int gpio, unsigned ngpio,
+					void *context);
+	void		*context;
+};
+
+#endif /* __LINUX_PCF857X_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f42663e..1f74e1d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -121,6 +121,18 @@
 #else
 #define INIT_IDS
 #endif
+
+#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
+/*
+ * Because of the reduced scope of CAP_SETPCAP when filesystem
+ * capabilities are in effect, it is safe to allow CAP_SETPCAP to
+ * be available in the default configuration.
+ */
+# define CAP_INIT_BSET  CAP_FULL_SET
+#else
+# define CAP_INIT_BSET  CAP_INIT_EFF_SET
+#endif
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -156,6 +168,7 @@
 	.cap_effective	= CAP_INIT_EFF_SET,				\
 	.cap_inheritable = CAP_INIT_INH_SET,				\
 	.cap_permitted	= CAP_FULL_SET,					\
+	.cap_bset 	= CAP_INIT_BSET,				\
 	.keep_capabilities = 0,						\
 	.user		= INIT_USER,					\
 	.comm		= "swapper",					\
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
new file mode 100644
index 0000000..4dd4c04
--- /dev/null
+++ b/include/linux/iommu-helper.h
@@ -0,0 +1,7 @@
+extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
+				      unsigned long start, unsigned int nr,
+				      unsigned long shift,
+				      unsigned long boundary_size,
+				      unsigned long align_mask);
+extern void iommu_area_free(unsigned long *map, unsigned long start,
+			    unsigned int nr);
diff --git a/include/linux/latency.h b/include/linux/latency.h
deleted file mode 100644
index c08b52b..0000000
--- a/include/linux/latency.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * latency.h: Explicit system-wide latency-expectation infrastructure
- *
- * (C) Copyright 2006 Intel Corporation
- * Author: Arjan van de Ven <arjan@linux.intel.com>
- *
- */
-
-#ifndef _INCLUDE_GUARD_LATENCY_H_
-#define _INCLUDE_GUARD_LATENCY_H_
-
-#include <linux/notifier.h>
-
-void set_acceptable_latency(char *identifier, int usecs);
-void modify_acceptable_latency(char *identifier, int usecs);
-void remove_acceptable_latency(char *identifier);
-void synchronize_acceptable_latency(void);
-int system_latency_constraint(void);
-
-int register_latency_notifier(struct notifier_block * nb);
-int unregister_latency_notifier(struct notifier_block * nb);
-
-#define INFINITE_LATENCY 1000000
-
-#endif
diff --git a/include/linux/leds.h b/include/linux/leds.h
index b4130ff..00f89fd 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -54,7 +54,15 @@
 
 extern int led_classdev_register(struct device *parent,
 				 struct led_classdev *led_cdev);
-extern void led_classdev_unregister(struct led_classdev *led_cdev);
+extern void __led_classdev_unregister(struct led_classdev *led_cdev, bool sus);
+static inline void led_classdev_unregister(struct led_classdev *lcd)
+{
+	__led_classdev_unregister(lcd, false);
+}
+static inline void led_classdev_unregister_suspended(struct led_classdev *lcd)
+{
+	__led_classdev_unregister(lcd, true);
+}
 extern void led_classdev_suspend(struct led_classdev *led_cdev);
 extern void led_classdev_resume(struct led_classdev *led_cdev);
 
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index dff9ea3..24b30b9 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -43,7 +43,15 @@
 };
 
 extern int misc_register(struct miscdevice * misc);
-extern int misc_deregister(struct miscdevice * misc);
+extern int __misc_deregister(struct miscdevice *misc, bool suspended);
+static inline int misc_deregister(struct miscdevice *misc)
+{
+	return __misc_deregister(misc, false);
+}
+static inline int misc_deregister_suspended(struct miscdevice *misc)
+{
+	return __misc_deregister(misc, true);
+}
 
 #define MODULE_ALIAS_MISCDEV(minor)				\
 	MODULE_ALIAS("char-major-" __stringify(MISC_MAJOR)	\
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1bba678..89d7c69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -227,10 +227,22 @@
  */
 static inline int get_page_unless_zero(struct page *page)
 {
-	VM_BUG_ON(PageCompound(page));
+	VM_BUG_ON(PageTail(page));
 	return atomic_inc_not_zero(&page->_count);
 }
 
+/* Support for virtually mapped pages */
+struct page *vmalloc_to_page(const void *addr);
+unsigned long vmalloc_to_pfn(const void *addr);
+
+/* Determine if an address is within the vmalloc range */
+static inline int is_vmalloc_addr(const void *x)
+{
+	unsigned long addr = (unsigned long)x;
+
+	return addr >= VMALLOC_START && addr < VMALLOC_END;
+}
+
 static inline struct page *compound_head(struct page *page)
 {
 	if (unlikely(PageTail(page)))
@@ -706,6 +718,28 @@
 		struct vm_area_struct *start_vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *);
+
+/**
+ * mm_walk - callbacks for walk_page_range
+ * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
+ * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
+ * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
+ * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
+ * @pte_hole: if set, called for each hole at all levels
+ *
+ * (see walk_page_range for more details)
+ */
+struct mm_walk {
+	int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *);
+	int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *);
+	int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *);
+	int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *);
+	int (*pte_hole)(unsigned long, unsigned long, void *);
+};
+
+int walk_page_range(const struct mm_struct *, unsigned long addr,
+		    unsigned long end, const struct mm_walk *walk,
+		    void *private);
 void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
@@ -1089,8 +1123,6 @@
 
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
-struct page *vmalloc_to_page(void *addr);
-unsigned long vmalloc_to_pfn(void *addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4c4522a..8d8d197 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -113,7 +113,7 @@
 };
 
 struct per_cpu_pageset {
-	struct per_cpu_pages pcp[2];	/* 0: hot.  1: cold */
+	struct per_cpu_pages pcp;
 #ifdef CONFIG_NUMA
 	s8 expire;
 #endif
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index e9fddb4..139d49d 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -343,7 +343,8 @@
 	__u8	class;			/* Standard interface or SDIO_ANY_ID */
 	__u16	vendor;			/* Vendor or SDIO_ANY_ID */
 	__u16	device;			/* Device ID or SDIO_ANY_ID */
-	kernel_ulong_t driver_data;	/* Data private to the driver */
+	kernel_ulong_t driver_data	/* Data private to the driver */
+		__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
 /* SSB core, see drivers/ssb/ */
diff --git a/include/linux/nubus.h b/include/linux/nubus.h
index cdb3e9b..c435507 100644
--- a/include/linux/nubus.h
+++ b/include/linux/nubus.h
@@ -132,10 +132,12 @@
 	NUBUS_DRHW_RDIUS_DCGX     = 0x027C, /* Radius DirectColor/GX */
 	NUBUS_DRHW_RDIUS_PC8      = 0x0291, /* Radius PrecisionColor 8 */
 	NUBUS_DRHW_LAPIS_PCS8     = 0x0292, /* Lapis ProColorServer 8 */
-	NUBUS_DRHW_RASTER_24LXI   = 0x02A0, /* RasterOps 8/24 XLi */
+	NUBUS_DRHW_RASTER_24XLI   = 0x02A0, /* RasterOps 8/24 XLi */
 	NUBUS_DRHW_RASTER_PBPGT   = 0x02A5, /* RasterOps PaintBoard Prism GT */
 	NUBUS_DRHW_EMACH_FSX      = 0x02AE, /* E-Machines Futura SX */
+	NUBUS_DRHW_RASTER_24XLTV  = 0x02B7, /* RasterOps 24XLTV */
 	NUBUS_DRHW_SMAC_THUND24   = 0x02CB, /* SuperMac Thunder/24 */
+	NUBUS_DRHW_SMAC_THUNDLGHT = 0x03D9, /* SuperMac ThunderLight */
 	NUBUS_DRHW_RDIUS_PC24XP   = 0x0406, /* Radius PrecisionColor 24Xp */
 	NUBUS_DRHW_RDIUS_PC24X    = 0x040A, /* Radius PrecisionColor 24X */
 	NUBUS_DRHW_RDIUS_PC8XJ    = 0x040B, /* Radius PrecisionColor 8XJ */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 209d3a4..bbad43f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -131,16 +131,52 @@
 #define ClearPageReferenced(page)	clear_bit(PG_referenced, &(page)->flags)
 #define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
 
-#define PageUptodate(page)	test_bit(PG_uptodate, &(page)->flags)
+static inline int PageUptodate(struct page *page)
+{
+	int ret = test_bit(PG_uptodate, &(page)->flags);
+
+	/*
+	 * Must ensure that the data we read out of the page is loaded
+	 * _after_ we've loaded page->flags to check for PageUptodate.
+	 * We can skip the barrier if the page is not uptodate, because
+	 * we wouldn't be reading anything from it.
+	 *
+	 * See SetPageUptodate() for the other side of the story.
+	 */
+	if (ret)
+		smp_rmb();
+
+	return ret;
+}
+
+static inline void __SetPageUptodate(struct page *page)
+{
+	smp_wmb();
+	__set_bit(PG_uptodate, &(page)->flags);
 #ifdef CONFIG_S390
+	page_clear_dirty(page);
+#endif
+}
+
 static inline void SetPageUptodate(struct page *page)
 {
+#ifdef CONFIG_S390
 	if (!test_and_set_bit(PG_uptodate, &page->flags))
 		page_clear_dirty(page);
-}
 #else
-#define SetPageUptodate(page)	set_bit(PG_uptodate, &(page)->flags)
+	/*
+	 * Memory barrier must be issued before setting the PG_uptodate bit,
+	 * so that all previous stores issued in order to bring the page
+	 * uptodate are actually visible before PageUptodate becomes true.
+	 *
+	 * s390 doesn't need an explicit smp_wmb here because the test and
+	 * set bit already provides full barriers.
+	 */
+	smp_wmb();
+	set_bit(PG_uptodate, &(page)->flags);
 #endif
+}
+
 #define ClearPageUptodate(page)	clear_bit(PG_uptodate, &(page)->flags)
 
 #define PageDirty(page)		test_bit(PG_dirty, &(page)->flags)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cee75c0..7215d3b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -159,6 +159,8 @@
 					   this if your device has broken DMA
 					   or supports 64-bit transfers.  */
 
+	struct device_dma_parameters dma_parms;
+
 	pci_power_t     current_state;  /* Current operating state. In ACPI-speak,
 					   this is D0-D3, D0 being fully functional,
 					   and D3 being off. */
@@ -580,6 +582,8 @@
 void pci_msi_off(struct pci_dev *dev);
 int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
 int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask);
+int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size);
+int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask);
 int pcix_get_max_mmrbc(struct pci_dev *dev);
 int pcix_get_mmrbc(struct pci_dev *dev);
 int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc);
@@ -822,6 +826,18 @@
 	return -EIO;
 }
 
+static inline int pci_set_dma_max_seg_size(struct pci_dev *dev,
+					unsigned int size)
+{
+	return -EIO;
+}
+
+static inline int pci_set_dma_seg_boundary(struct pci_dev *dev,
+					unsigned long mask)
+{
+	return -EIO;
+}
+
 static inline int pci_assign_resource(struct pci_dev *dev, int i)
 {
 	return -EBUSY;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 41f6f28..39d3283 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2043,6 +2043,23 @@
 #define PCI_VENDOR_ID_QUICKNET		0x15e2
 #define PCI_DEVICE_ID_QUICKNET_XJ	0x0500
 
+/*
+ * ADDI-DATA GmbH communication cards <info@addi-data.com>
+ */
+#define PCI_VENDOR_ID_ADDIDATA_OLD             0x10E8
+#define PCI_VENDOR_ID_ADDIDATA                 0x15B8
+#define PCI_DEVICE_ID_ADDIDATA_APCI7500        0x7000
+#define PCI_DEVICE_ID_ADDIDATA_APCI7420        0x7001
+#define PCI_DEVICE_ID_ADDIDATA_APCI7300        0x7002
+#define PCI_DEVICE_ID_ADDIDATA_APCI7800        0x818E
+#define PCI_DEVICE_ID_ADDIDATA_APCI7500_2      0x7009
+#define PCI_DEVICE_ID_ADDIDATA_APCI7420_2      0x700A
+#define PCI_DEVICE_ID_ADDIDATA_APCI7300_2      0x700B
+#define PCI_DEVICE_ID_ADDIDATA_APCI7500_3      0x700C
+#define PCI_DEVICE_ID_ADDIDATA_APCI7420_3      0x700D
+#define PCI_DEVICE_ID_ADDIDATA_APCI7300_3      0x700E
+#define PCI_DEVICE_ID_ADDIDATA_APCI7800_3      0x700F
+
 #define PCI_VENDOR_ID_PDC		0x15e9
 
 #define PCI_VENDOR_ID_FARSITE           0x1619
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 1c1dba9..40fac8c 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -459,7 +459,8 @@
 #define	TCF_EM_U32		3
 #define	TCF_EM_META		4
 #define	TCF_EM_TEXT		5
-#define	TCF_EM_MAX		5
+#define        TCF_EM_VLAN		6
+#define	TCF_EM_MAX		6
 
 enum
 {
diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h
new file mode 100644
index 0000000..2e4e97b
--- /dev/null
+++ b/include/linux/pm_qos_params.h
@@ -0,0 +1,25 @@
+/* interface for the pm_qos_power infrastructure of the linux kernel.
+ *
+ * Mark Gross
+ */
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/miscdevice.h>
+
+#define PM_QOS_RESERVED 0
+#define PM_QOS_CPU_DMA_LATENCY 1
+#define PM_QOS_NETWORK_LATENCY 2
+#define PM_QOS_NETWORK_THROUGHPUT 3
+
+#define PM_QOS_NUM_CLASSES 4
+#define PM_QOS_DEFAULT_VALUE -1
+
+int pm_qos_add_requirement(int qos, char *name, s32 value);
+int pm_qos_update_requirement(int qos, char *name, s32 new_value);
+void pm_qos_remove_requirement(int qos, char *name);
+
+int pm_qos_requirement(int qos);
+
+int pm_qos_add_notifier(int qos, struct notifier_block *notifier);
+int pm_qos_remove_notifier(int qos, struct notifier_block *notifier);
+
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index e2eff90..3800639 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -63,4 +63,8 @@
 #define PR_GET_SECCOMP	21
 #define PR_SET_SECCOMP	22
 
+/* Get/set the capability bounding set */
+#define PR_CAPBSET_READ 23
+#define PR_CAPBSET_DROP 24
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 8f92546..e435515 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -19,6 +19,8 @@
  */
 #define FIRST_PROCESS_ENTRY 256
 
+/* Worst case buffer size needed for holding an integer. */
+#define PROC_NUMBUF 13
 
 /*
  * We always define these enumerators
@@ -117,7 +119,6 @@
 unsigned long task_vsize(struct mm_struct *);
 int task_statm(struct mm_struct *, int *, int *, int *, int *);
 char *task_mem(struct mm_struct *, char *);
-void clear_refs_smap(struct mm_struct *mm);
 
 struct proc_dir_entry *de_get(struct proc_dir_entry *de);
 void de_put(struct proc_dir_entry *de);
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 85ea63f..b93b541 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -59,8 +59,6 @@
  * Architecture independent implemenations of sys_reboot commands.
  */
 
-extern void kernel_shutdown_prepare(enum system_states state);
-
 extern void kernel_restart(char *cmd);
 extern void kernel_halt(void);
 extern void kernel_power_off(void);
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index b014f6b..b9e1740 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -602,24 +602,12 @@
 
 #include <linux/mutex.h>
 
-extern size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size);
 static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str)
 {
 	int len = strlen(str) + 1;
 	return len > rta->rta_len || memcmp(RTA_DATA(rta), str, len);
 }
 
-extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len);
-extern int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
-				        struct rtattr *rta, int len);
-
-#define rtattr_parse_nested(tb, max, rta) \
-	rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
-
-#define rtattr_parse_nested_compat(tb, max, rta, data, len) \
-({	data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \
-	__rtattr_parse_nested_compat(tb, max, rta, len); })
-
 extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
 extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid);
 extern int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index af6947e..9c13be3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -555,6 +555,13 @@
 #define SIGNAL_STOP_CONTINUED	0x00000004 /* SIGCONT since WCONTINUED reap */
 #define SIGNAL_GROUP_EXIT	0x00000008 /* group exit in progress */
 
+/* If true, all threads except ->group_exit_task have pending SIGKILL */
+static inline int signal_group_exit(const struct signal_struct *sig)
+{
+	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
+		(sig->group_exit_task != NULL);
+}
+
 /*
  * Some day this will be a full-fledged user tracking system..
  */
@@ -1091,7 +1098,7 @@
 	uid_t uid,euid,suid,fsuid;
 	gid_t gid,egid,sgid,fsgid;
 	struct group_info *group_info;
-	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
+	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted, cap_bset;
 	unsigned keep_capabilities:1;
 	struct user_struct *user;
 #ifdef CONFIG_KEYS
@@ -1770,7 +1777,7 @@
 struct task_struct *fork_idle(int);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
-extern void get_task_comm(char *to, struct task_struct *tsk);
+extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
 extern void wait_task_inactive(struct task_struct * p);
@@ -2080,6 +2087,10 @@
 }
 #endif
 
+#ifndef TASK_SIZE_OF
+#define TASK_SIZE_OF(tsk)	TASK_SIZE
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif
diff --git a/include/linux/security.h b/include/linux/security.h
index d249742..fe52cde 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -40,11 +40,6 @@
 #define ROOTCONTEXT_MNT		0x04
 #define DEFCONTEXT_MNT		0x08
 
-/*
- * Bounding set
- */
-extern kernel_cap_t cap_bset;
-
 extern unsigned securebits;
 
 struct ctl_table;
@@ -423,15 +418,12 @@
  * 	identified by @name for @dentry.
  * 	Return 0 if permission is granted.
  * @inode_getsecurity:
- *	Copy the extended attribute representation of the security label 
- *	associated with @name for @inode into @buffer.  @buffer may be
- *	NULL to request the size of the buffer required.  @size indicates
- *	the size of @buffer in bytes.  Note that @name is the remainder
- *	of the attribute name after the security. prefix has been removed.
- *	@err is the return value from the preceding fs getxattr call,
- *	and can be used by the security module to determine whether it
- *	should try and canonicalize the attribute value.
- *	Return number of bytes used/required on success.
+ *	Retrieve a copy of the extended attribute representation of the
+ *	security label associated with @name for @inode via @buffer.  Note that
+ *	@name is the remainder of the attribute name after the security prefix
+ *	has been removed. @alloc is used to specify of the call should return a
+ *	value via the buffer or just the value length Return size of buffer on
+ *	success.
  * @inode_setsecurity:
  *	Set the security label associated with @name for @inode from the
  *	extended attribute value @value.  @size indicates the size of the
@@ -1304,7 +1296,7 @@
 	int (*inode_removexattr) (struct dentry *dentry, char *name);
 	int (*inode_need_killpriv) (struct dentry *dentry);
 	int (*inode_killpriv) (struct dentry *dentry);
-  	int (*inode_getsecurity)(const struct inode *inode, const char *name, void *buffer, size_t size, int err);
+	int (*inode_getsecurity)(const struct inode *inode, const char *name, void **buffer, bool alloc);
   	int (*inode_setsecurity)(struct inode *inode, const char *name, const void *value, size_t size, int flags);
   	int (*inode_listsecurity)(struct inode *inode, char *buffer, size_t buffer_size);
 
@@ -1565,7 +1557,7 @@
 int security_inode_removexattr(struct dentry *dentry, char *name);
 int security_inode_need_killpriv(struct dentry *dentry);
 int security_inode_killpriv(struct dentry *dentry);
-int security_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err);
+int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc);
 int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags);
 int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size);
 int security_file_permission(struct file *file, int mask);
@@ -1967,7 +1959,7 @@
 	return cap_inode_killpriv(dentry);
 }
 
-static inline int security_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err)
+static inline int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index afe0f6d..00b65c0 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -23,6 +23,7 @@
 	resource_size_t	mapbase;	/* resource base */
 	unsigned int	irq;		/* interrupt number */
 	unsigned int	uartclk;	/* UART clock rate */
+	void            *private_data;
 	unsigned char	regshift;	/* register shift */
 	unsigned char	iotype;		/* UPIO_* */
 	unsigned char	hub6;
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 40801e7..ddb1a70 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -12,11 +12,11 @@
 #include <linux/kobject.h>
 
 struct kmem_cache_cpu {
-	void **freelist;
-	struct page *page;
-	int node;
-	unsigned int offset;
-	unsigned int objsize;
+	void **freelist;	/* Pointer to first free per cpu object */
+	struct page *page;	/* The slab from which we are allocating */
+	int node;		/* The node of the page (or -1 for debug) */
+	unsigned int offset;	/* Freepointer offset (in word units) */
+	unsigned int objsize;	/* Size of an object (from kmem_cache) */
 };
 
 struct kmem_cache_node {
@@ -59,7 +59,10 @@
 #endif
 
 #ifdef CONFIG_NUMA
-	int defrag_ratio;
+	/*
+	 * Defragmentation by allocating from a remote node.
+	 */
+	int remote_node_defrag_ratio;
 	struct kmem_cache_node *node[MAX_NUMNODES];
 #endif
 #ifdef CONFIG_SMP
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
new file mode 100644
index 0000000..835ddf4
--- /dev/null
+++ b/include/linux/spi/mcp23s08.h
@@ -0,0 +1,24 @@
+
+/* FIXME driver should be able to handle all four slaves that
+ * can be hooked up to each chipselect, as well as IRQs...
+ */
+
+struct mcp23s08_platform_data {
+	/* four slaves can share one SPI chipselect */
+	u8		slave;
+
+	/* number assigned to the first GPIO */
+	unsigned	base;
+
+	/* pins with pullups */
+	u8		pullups;
+
+	void		*context;	/* param to setup/teardown */
+
+	int		(*setup)(struct spi_device *spi,
+					int gpio, unsigned ngpio,
+					void *context);
+	int		(*teardown)(struct spi_device *spi,
+					int gpio, unsigned ngpio,
+					void *context);
+};
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 646ce2d..1d7d4c5 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -130,7 +130,6 @@
 };
 
 /* mm/page_alloc.c */
-extern void drain_local_pages(void);
 extern void mark_free_pages(struct zone *zone);
 
 /**
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4f3838a..353153e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -158,9 +158,6 @@
 /* Swap 50% full? Release swapcache more aggressively.. */
 #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
 
-/* linux/mm/memory.c */
-extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
-
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
@@ -223,16 +220,17 @@
 #define total_swapcache_pages  swapper_space.nrpages
 extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *, gfp_t);
+extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
 extern void __delete_from_swap_cache(struct page *);
 extern void delete_from_swap_cache(struct page *);
-extern int move_to_swap_cache(struct page *, swp_entry_t);
-extern int move_from_swap_cache(struct page *, unsigned long,
-		struct address_space *);
 extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
-extern struct page * lookup_swap_cache(swp_entry_t);
-extern struct page * read_swap_cache_async(swp_entry_t, struct vm_area_struct *vma,
-					   unsigned long addr);
+extern struct page *lookup_swap_cache(swp_entry_t);
+extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
+			struct vm_area_struct *vma, unsigned long addr);
+extern struct page *swapin_readahead(swp_entry_t, gfp_t,
+			struct vm_area_struct *vma, unsigned long addr);
+
 /* linux/mm/swapfile.c */
 extern long total_swap_pages;
 extern unsigned int nr_swapfiles;
@@ -306,7 +304,7 @@
 {
 }
 
-static inline struct page *read_swap_cache_async(swp_entry_t swp,
+static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
 			struct vm_area_struct *vma, unsigned long addr)
 {
 	return NULL;
@@ -317,22 +315,12 @@
 	return NULL;
 }
 
-static inline int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
-{
-	return 0;
-}
-
 #define can_share_swap_page(p)			(page_mapcount(p) == 1)
 
-static inline int move_to_swap_cache(struct page *page, swp_entry_t entry)
+static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
+							gfp_t gfp_mask)
 {
-	return 1;
-}
-
-static inline int move_from_swap_cache(struct page *page, unsigned long index,
-					struct address_space *mapping)
-{
-	return 1;
+	return -1;
 }
 
 static inline void __delete_from_swap_cache(struct page *page)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index ceb6cc5..7bf2d14 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -42,6 +42,12 @@
 	return entry.val & SWP_OFFSET_MASK(entry);
 }
 
+/* check whether a pte points to a swap entry */
+static inline int is_swap_pte(pte_t pte)
+{
+	return !pte_none(pte) && !pte_present(pte) && !pte_file(pte);
+}
+
 /*
  * Convert the arch-dependent pte representation of a swp_entry_t into an
  * arch-independent swp_entry_t.
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 61def7c8..4c2577b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -607,8 +607,11 @@
 				    size_t len);
 asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
 asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask);
-asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
-			    const struct itimerspec __user *utmr);
+asmlinkage long sys_timerfd_create(int clockid, int flags);
+asmlinkage long sys_timerfd_settime(int ufd, int flags,
+				    const struct itimerspec __user *utmr,
+				    struct itimerspec __user *otmr);
+asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
 asmlinkage long sys_eventfd(unsigned int count);
 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index bf4ae4e..571f01d 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -102,7 +102,6 @@
 	KERN_NODENAME=7,
 	KERN_DOMAINNAME=8,
 
-	KERN_CAP_BSET=14,	/* int: capability bounding set */
 	KERN_PANIC=15,		/* int: panic timeout */
 	KERN_REALROOTDEV=16,	/* real root device to mount after initrd */
 
@@ -965,8 +964,6 @@
 			 void __user *, size_t *, loff_t *);
 extern int proc_dointvec(struct ctl_table *, int, struct file *,
 			 void __user *, size_t *, loff_t *);
-extern int proc_dointvec_bset(struct ctl_table *, int, struct file *,
-			      void __user *, size_t *, loff_t *);
 extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *,
 				void __user *, size_t *, loff_t *);
 extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *,
diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h
index e21937c..c50d2ba 100644
--- a/include/linux/tc_ematch/tc_em_meta.h
+++ b/include/linux/tc_ematch/tc_em_meta.h
@@ -81,6 +81,7 @@
  	TCF_META_ID_SK_SNDTIMEO,
  	TCF_META_ID_SK_SENDMSG_OFF,
  	TCF_META_ID_SK_WRITE_PENDING,
+	TCF_META_ID_VLAN_TAG,
 	__TCF_META_ID_MAX
 };
 #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 89338b4..ce8e7da 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -45,11 +45,11 @@
 extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
 extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
 				pgprot_t prot);
-extern void vfree(void *addr);
+extern void vfree(const void *addr);
 
 extern void *vmap(struct page **pages, unsigned int count,
 			unsigned long flags, pgprot_t prot);
-extern void vunmap(void *addr);
+extern void vunmap(const void *addr);
 
 extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 							unsigned long pgoff);
@@ -71,7 +71,7 @@
 extern struct vm_struct *get_vm_area_node(unsigned long size,
 					  unsigned long flags, int node,
 					  gfp_t gfp_mask);
-extern struct vm_struct *remove_vm_area(void *addr);
+extern struct vm_struct *remove_vm_area(const void *addr);
 
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
 			struct page ***pages);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1f4fb0a..33a2aa9 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -162,6 +162,22 @@
 #define wake_up_interruptible_all(x)	__wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
 #define wake_up_interruptible_sync(x)	__wake_up_sync((x), TASK_INTERRUPTIBLE, 1)
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/*
+ * macro to avoid include hell
+ */
+#define wake_up_nested(x, s)						\
+do {									\
+	unsigned long flags;						\
+									\
+	spin_lock_irqsave_nested(&(x)->lock, flags, (s));		\
+	wake_up_locked(x); 						\
+	spin_unlock_irqrestore(&(x)->lock, flags);			\
+} while (0)
+#else
+#define wake_up_nested(x, s)		wake_up(x)
+#endif
+
 #define __wait_event(wq, condition) 					\
 do {									\
 	DEFINE_WAIT(__wait);						\
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c6148bb..b7b3362 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -62,6 +62,7 @@
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
 	unsigned for_writepages:1;	/* This is a writepages() call */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
+	unsigned more_io:1;		/* more io to be dispatched */
 };
 
 /*
@@ -100,6 +101,7 @@
 extern int vm_dirty_ratio;
 extern int dirty_writeback_interval;
 extern int dirty_expire_interval;
+extern int vm_highmem_is_dirtyable;
 extern int block_dump;
 extern int laptop_mode;
 
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index def131a..df6b95d 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -46,6 +46,7 @@
 		   size_t size, int flags);
 };
 
+ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
 ssize_t vfs_getxattr(struct dentry *, char *, void *, size_t);
 ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
 int vfs_setxattr(struct dentry *, char *, void *, size_t, int);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 48ac620..97dc35a 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -389,7 +389,7 @@
 }
 
 extern int __inet_hash_connect(struct inet_timewait_death_row *death_row,
-		struct sock *sk,
+		struct sock *sk, u32 port_offset,
 		int (*check_established)(struct inet_timewait_death_row *,
 			struct sock *, __u16, struct inet_timewait_sock **),
 			       void (*hash)(struct sock *sk));
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 90d1175..8b12667 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -266,6 +266,14 @@
 #ifdef CONFIG_PROC_FS
 extern int __net_init  fib_proc_init(struct net *net);
 extern void __net_exit fib_proc_exit(struct net *net);
+#else
+static inline int fib_proc_init(struct net *net)
+{
+	return 0;
+}
+static inline void fib_proc_exit(struct net *net)
+{
+}
 #endif
 
 #endif  /* _NET_FIB_H */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index fa80ea4..c0c019f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -110,7 +110,6 @@
 
 /* sysctls */
 extern int sysctl_mld_max_msf;
-
 extern struct ctl_path net_ipv6_ctl_path[];
 
 #define _DEVINC(statname, modifier, idev, field)			\
@@ -586,9 +585,6 @@
 			 int __user *optlen);
 
 #ifdef CONFIG_PROC_FS
-extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
-extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
-
 extern int  ac6_proc_init(void);
 extern void ac6_proc_exit(void);
 extern int  raw6_proc_init(void);
@@ -621,6 +617,8 @@
 extern ctl_table ipv6_route_table_template[];
 extern ctl_table ipv6_icmp_table_template[];
 
+extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
+extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
 extern int ipv6_sysctl_register(void);
 extern void ipv6_sysctl_unregister(void);
 #endif
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index b3213c7..0ca67d7 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -36,6 +36,8 @@
 #include <net/netlink.h>
 #include <asm/atomic.h>
 
+struct cipso_v4_doi;
+
 /*
  * NetLabel - A management interface for maintaining network packet label
  *            mapping tables for explicit packet labling protocols.
@@ -103,12 +105,6 @@
 	uid_t loginuid;
 };
 
-/* Domain mapping definition struct */
-struct netlbl_dom_map;
-
-/* Domain mapping operations */
-int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
-
 /*
  * LSM security attributes
  */
@@ -344,6 +340,19 @@
 
 #ifdef CONFIG_NETLABEL
 /*
+ * LSM configuration operations
+ */
+int netlbl_cfg_map_del(const char *domain, struct netlbl_audit *audit_info);
+int netlbl_cfg_unlbl_add_map(const char *domain,
+			     struct netlbl_audit *audit_info);
+int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def,
+			   struct netlbl_audit *audit_info);
+int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
+			       const char *domain,
+			       struct netlbl_audit *audit_info);
+int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info);
+
+/*
  * LSM security attribute operations
  */
 int netlbl_secattr_catmap_walk(struct netlbl_lsm_secattr_catmap *catmap,
@@ -378,6 +387,32 @@
 int netlbl_cache_add(const struct sk_buff *skb,
 		     const struct netlbl_lsm_secattr *secattr);
 #else
+static inline int netlbl_cfg_map_del(const char *domain,
+				     struct netlbl_audit *audit_info)
+{
+	return -ENOSYS;
+}
+static inline int netlbl_cfg_unlbl_add_map(const char *domain,
+					   struct netlbl_audit *audit_info)
+{
+	return -ENOSYS;
+}
+static inline int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def,
+					 struct netlbl_audit *audit_info)
+{
+	return -ENOSYS;
+}
+static inline int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
+					     const char *domain,
+					     struct netlbl_audit *audit_info)
+{
+	return -ENOSYS;
+}
+static inline int netlbl_cfg_cipsov4_del(u32 doi,
+					 struct netlbl_audit *audit_info)
+{
+	return -ENOSYS;
+}
 static inline int netlbl_secattr_catmap_walk(
 	                              struct netlbl_lsm_secattr_catmap *catmap,
 				      u32 offset)
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index d5838c3..87a260e 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -147,11 +147,11 @@
 
 /* For RequestIO and ReleaseIO */
 typedef struct io_req_t {
-    ioaddr_t	BasePort1;
-    ioaddr_t	NumPorts1;
+    u_int	BasePort1;
+    u_int	NumPorts1;
     u_int	Attributes1;
-    ioaddr_t	BasePort2;
-    ioaddr_t	NumPorts2;
+    u_int	BasePort2;
+    u_int	NumPorts2;
     u_int	Attributes2;
     u_int	IOAddrLines;
 } io_req_t;
diff --git a/include/pcmcia/cs_types.h b/include/pcmcia/cs_types.h
index 5f38803..9a6bcc4 100644
--- a/include/pcmcia/cs_types.h
+++ b/include/pcmcia/cs_types.h
@@ -27,7 +27,6 @@
 #else
 typedef u_short	ioaddr_t;
 #endif
-typedef unsigned long kio_addr_t;
 
 typedef u_short	socket_t;
 typedef u_int	event_t;
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 6e84258..f95dca0 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -92,7 +92,7 @@
     u_char	map;
     u_char	flags;
     u_short	speed;
-    kio_addr_t	start, stop;
+    u_int	start, stop;
 } pccard_io_map;
 
 typedef struct pccard_mem_map {
@@ -155,7 +155,7 @@
 struct pcmcia_socket;
 
 typedef struct io_window_t {
-	kio_addr_t		InUse, Config;
+	u_int			InUse, Config;
 	struct resource		*res;
 } io_window_t;
 
@@ -208,7 +208,7 @@
 	u_int				features;
 	u_int				irq_mask;
 	u_int				map_size;
-	kio_addr_t			io_offset;
+	u_int				io_offset;
 	u_char				pci_irq;
 	struct pci_dev *		cb_dev;
 
diff --git a/init/Kconfig b/init/Kconfig
index b2acdeb..87f50df 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -582,7 +582,6 @@
 config TIMERFD
 	bool "Enable timerfd() system call" if EMBEDDED
 	select ANON_INODES
-	depends on BROKEN
 	default y
 	help
 	  Enable the timerfd() system call that allows to receive timer
@@ -657,11 +656,9 @@
 	depends on EMBEDDED
 	bool "SLOB (Simple Allocator)"
 	help
-	   SLOB replaces the SLAB allocator with a drastically simpler
-	   allocator.  SLOB is more space efficient than SLAB but does not
-	   scale well (single lock for all operations) and is also highly
-	   susceptible to fragmentation. SLUB can accomplish a higher object
-	   density. It is usually better to use SLUB instead of SLOB.
+	   SLOB replaces the stock allocator with a drastically simpler
+	   allocator. SLOB is generally more space efficient but
+	   does not perform as well on large systems.
 
 endchoice
 
@@ -679,6 +676,16 @@
 
 source "arch/Kconfig"
 
+config PROC_PAGE_MONITOR
+ 	default y
+	depends on PROC_FS && MMU
+	bool "Enable /proc page monitoring" if EMBEDDED
+ 	help
+	  Various /proc files exist to monitor process memory utilization:
+	  /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap,
+	  /proc/kpagecount, and /proc/kpageflags. Disabling these
+          interfaces will reduce the size of the kernel by approximately 4kb.
+
 endmenu		# General setup
 
 config SLABINFO
diff --git a/kernel/Makefile b/kernel/Makefile
index db9af70..135a1b9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,8 +8,8 @@
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
-	    hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
-	    utsname.o notifier.o ksysfs.o
+	    hrtimer.o rwsem.o nsproxy.o srcu.o \
+	    utsname.o notifier.o ksysfs.o pm_qos_params.o
 
 obj-$(CONFIG_SYSCTL) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/kernel/capability.c b/kernel/capability.c
index efbd9cd..39e8193 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -22,6 +22,37 @@
 static DEFINE_SPINLOCK(task_capability_lock);
 
 /*
+ * Leveraged for setting/resetting capabilities
+ */
+
+const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
+const kernel_cap_t __cap_full_set = CAP_FULL_SET;
+const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET;
+
+EXPORT_SYMBOL(__cap_empty_set);
+EXPORT_SYMBOL(__cap_full_set);
+EXPORT_SYMBOL(__cap_init_eff_set);
+
+/*
+ * More recent versions of libcap are available from:
+ *
+ *   http://www.kernel.org/pub/linux/libs/security/linux-privs/
+ */
+
+static void warn_legacy_capability_use(void)
+{
+	static int warned;
+	if (!warned) {
+		char name[sizeof(current->comm)];
+
+		printk(KERN_INFO "warning: `%s' uses 32-bit capabilities"
+		       " (legacy support in use)\n",
+		       get_task_comm(name, current));
+		warned = 1;
+	}
+}
+
+/*
  * For sys_getproccap() and sys_setproccap(), any of the three
  * capability set pointers may be NULL -- indicating that that set is
  * uninteresting and/or not to be changed.
@@ -42,12 +73,21 @@
 	pid_t pid;
 	__u32 version;
 	struct task_struct *target;
-	struct __user_cap_data_struct data;
+	unsigned tocopy;
+	kernel_cap_t pE, pI, pP;
 
 	if (get_user(version, &header->version))
 		return -EFAULT;
 
-	if (version != _LINUX_CAPABILITY_VERSION) {
+	switch (version) {
+	case _LINUX_CAPABILITY_VERSION_1:
+		warn_legacy_capability_use();
+		tocopy = _LINUX_CAPABILITY_U32S_1;
+		break;
+	case _LINUX_CAPABILITY_VERSION_2:
+		tocopy = _LINUX_CAPABILITY_U32S_2;
+		break;
+	default:
 		if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
 			return -EFAULT;
 		return -EINVAL;
@@ -71,14 +111,47 @@
 	} else
 		target = current;
 
-	ret = security_capget(target, &data.effective, &data.inheritable, &data.permitted);
+	ret = security_capget(target, &pE, &pI, &pP);
 
 out:
 	read_unlock(&tasklist_lock);
 	spin_unlock(&task_capability_lock);
 
-	if (!ret && copy_to_user(dataptr, &data, sizeof data))
-		return -EFAULT;
+	if (!ret) {
+		struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S];
+		unsigned i;
+
+		for (i = 0; i < tocopy; i++) {
+			kdata[i].effective = pE.cap[i];
+			kdata[i].permitted = pP.cap[i];
+			kdata[i].inheritable = pI.cap[i];
+		}
+
+		/*
+		 * Note, in the case, tocopy < _LINUX_CAPABILITY_U32S,
+		 * we silently drop the upper capabilities here. This
+		 * has the effect of making older libcap
+		 * implementations implicitly drop upper capability
+		 * bits when they perform a: capget/modify/capset
+		 * sequence.
+		 *
+		 * This behavior is considered fail-safe
+		 * behavior. Upgrading the application to a newer
+		 * version of libcap will enable access to the newer
+		 * capabilities.
+		 *
+		 * An alternative would be to return an error here
+		 * (-ERANGE), but that causes legacy applications to
+		 * unexpectidly fail; the capget/modify/capset aborts
+		 * before modification is attempted and the application
+		 * fails.
+		 */
+
+		if (copy_to_user(dataptr, kdata, tocopy
+				 * sizeof(struct __user_cap_data_struct))) {
+			return -EFAULT;
+		}
+	}
 
 	return ret;
 }
@@ -167,6 +240,8 @@
  */
 asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 {
+	struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S];
+	unsigned i, tocopy;
 	kernel_cap_t inheritable, permitted, effective;
 	__u32 version;
 	struct task_struct *target;
@@ -176,7 +251,15 @@
 	if (get_user(version, &header->version))
 		return -EFAULT;
 
-	if (version != _LINUX_CAPABILITY_VERSION) {
+	switch (version) {
+	case _LINUX_CAPABILITY_VERSION_1:
+		warn_legacy_capability_use();
+		tocopy = _LINUX_CAPABILITY_U32S_1;
+		break;
+	case _LINUX_CAPABILITY_VERSION_2:
+		tocopy = _LINUX_CAPABILITY_U32S_2;
+		break;
+	default:
 		if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
 			return -EFAULT;
 		return -EINVAL;
@@ -188,10 +271,22 @@
 	if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
 		return -EPERM;
 
-	if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
-	    copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) ||
-	    copy_from_user(&permitted, &data->permitted, sizeof(permitted)))
+	if (copy_from_user(&kdata, data, tocopy
+			   * sizeof(struct __user_cap_data_struct))) {
 		return -EFAULT;
+	}
+
+	for (i = 0; i < tocopy; i++) {
+		effective.cap[i] = kdata[i].effective;
+		permitted.cap[i] = kdata[i].permitted;
+		inheritable.cap[i] = kdata[i].inheritable;
+	}
+	while (i < _LINUX_CAPABILITY_U32S) {
+		effective.cap[i] = 0;
+		permitted.cap[i] = 0;
+		inheritable.cap[i] = 0;
+		i++;
+	}
 
 	spin_lock(&task_capability_lock);
 	read_lock(&tasklist_lock);
diff --git a/kernel/exit.c b/kernel/exit.c
index 9e459fe..9d3d0f0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1083,11 +1083,12 @@
 		struct signal_struct *const sig = current->signal;
 		struct sighand_struct *const sighand = current->sighand;
 		spin_lock_irq(&sighand->siglock);
-		if (sig->flags & SIGNAL_GROUP_EXIT)
+		if (signal_group_exit(sig))
 			/* Another thread got here before we took the lock.  */
 			exit_code = sig->group_exit_code;
 		else {
 			sig->group_exit_code = exit_code;
+			sig->flags = SIGNAL_GROUP_EXIT;
 			zap_other_threads(current);
 		}
 		spin_unlock_irq(&sighand->siglock);
diff --git a/kernel/fork.c b/kernel/fork.c
index 05e0b6f..2b55b74 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -325,7 +325,7 @@
 
 static inline void mm_free_pgd(struct mm_struct * mm)
 {
-	pgd_free(mm->pgd);
+	pgd_free(mm, mm->pgd);
 }
 #else
 #define dup_mmap(mm, oldmm)	(0)
@@ -1118,6 +1118,7 @@
 #ifdef CONFIG_SECURITY
 	p->security = NULL;
 #endif
+	p->cap_bset = current->cap_bset;
 	p->io_context = NULL;
 	p->audit_context = NULL;
 	cgroup_fork(p);
@@ -1450,6 +1451,23 @@
 	int trace = 0;
 	long nr;
 
+	/*
+	 * We hope to recycle these flags after 2.6.26
+	 */
+	if (unlikely(clone_flags & CLONE_STOPPED)) {
+		static int __read_mostly count = 100;
+
+		if (count > 0 && printk_ratelimit()) {
+			char comm[TASK_COMM_LEN];
+
+			count--;
+			printk(KERN_INFO "fork(): process `%s' used deprecated "
+					"clone flags 0x%lx\n",
+				get_task_comm(comm, current),
+				clone_flags & CLONE_STOPPED);
+		}
+	}
+
 	if (unlikely(current->ptrace)) {
 		trace = fork_traceflag (clone_flags);
 		if (trace)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 1069998..668f396 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -306,7 +306,7 @@
 /*
  * Divide a ktime value by a nanosecond value
  */
-unsigned long ktime_divns(const ktime_t kt, s64 div)
+u64 ktime_divns(const ktime_t kt, s64 div)
 {
 	u64 dclc, inc, dns;
 	int sft = 0;
@@ -321,7 +321,7 @@
 	dclc >>= sft;
 	do_div(dclc, (unsigned long) div);
 
-	return (unsigned long) dclc;
+	return dclc;
 }
 #endif /* BITS_PER_LONG >= 64 */
 
@@ -656,10 +656,9 @@
  * Forward the timer expiry so it will expire in the future.
  * Returns the number of overruns.
  */
-unsigned long
-hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
+u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
 {
-	unsigned long orun = 1;
+	u64 orun = 1;
 	ktime_t delta;
 
 	delta = ktime_sub(now, timer->expires);
diff --git a/kernel/latency.c b/kernel/latency.c
deleted file mode 100644
index e63fcac..0000000
--- a/kernel/latency.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * latency.c: Explicit system-wide latency-expectation infrastructure
- *
- * The purpose of this infrastructure is to allow device drivers to set
- * latency constraint they have and to collect and summarize these
- * expectations globally. The cummulated result can then be used by
- * power management and similar users to make decisions that have
- * tradoffs with a latency component.
- *
- * An example user of this are the x86 C-states; each higher C state saves
- * more power, but has a higher exit latency. For the idle loop power
- * code to make a good decision which C-state to use, information about
- * acceptable latencies is required.
- *
- * An example announcer of latency is an audio driver that knowns it
- * will get an interrupt when the hardware has 200 usec of samples
- * left in the DMA buffer; in that case the driver can set a latency
- * constraint of, say, 150 usec.
- *
- * Multiple drivers can each announce their maximum accepted latency,
- * to keep these appart, a string based identifier is used.
- *
- *
- * (C) Copyright 2006 Intel Corporation
- * Author: Arjan van de Ven <arjan@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#include <linux/latency.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/jiffies.h>
-#include <asm/atomic.h>
-
-struct latency_info {
-	struct list_head list;
-	int usecs;
-	char *identifier;
-};
-
-/*
- * locking rule: all modifications to current_max_latency and
- * latency_list need to be done while holding the latency_lock.
- * latency_lock needs to be taken _irqsave.
- */
-static atomic_t current_max_latency;
-static DEFINE_SPINLOCK(latency_lock);
-
-static LIST_HEAD(latency_list);
-static BLOCKING_NOTIFIER_HEAD(latency_notifier);
-
-/*
- * This function returns the maximum latency allowed, which
- * happens to be the minimum of all maximum latencies on the
- * list.
- */
-static int __find_max_latency(void)
-{
-	int min = INFINITE_LATENCY;
-	struct latency_info *info;
-
-	list_for_each_entry(info, &latency_list, list) {
-		if (info->usecs < min)
-			min = info->usecs;
-	}
-	return min;
-}
-
-/**
- * set_acceptable_latency - sets the maximum latency acceptable
- * @identifier: string that identifies this driver
- * @usecs: maximum acceptable latency for this driver
- *
- * This function informs the kernel that this device(driver)
- * can accept at most usecs latency. This setting is used for
- * power management and similar tradeoffs.
- *
- * This function sleeps and can only be called from process
- * context.
- * Calling this function with an existing identifier is valid
- * and will cause the existing latency setting to be changed.
- */
-void set_acceptable_latency(char *identifier, int usecs)
-{
-	struct latency_info *info, *iter;
-	unsigned long flags;
-	int found_old = 0;
-
-	info = kzalloc(sizeof(struct latency_info), GFP_KERNEL);
-	if (!info)
-		return;
-	info->usecs = usecs;
-	info->identifier = kstrdup(identifier, GFP_KERNEL);
-	if (!info->identifier)
-		goto free_info;
-
-	spin_lock_irqsave(&latency_lock, flags);
-	list_for_each_entry(iter, &latency_list, list) {
-		if (strcmp(iter->identifier, identifier)==0) {
-			found_old = 1;
-			iter->usecs = usecs;
-			break;
-		}
-	}
-	if (!found_old)
-		list_add(&info->list, &latency_list);
-
-	if (usecs < atomic_read(&current_max_latency))
-		atomic_set(&current_max_latency, usecs);
-
-	spin_unlock_irqrestore(&latency_lock, flags);
-
-	blocking_notifier_call_chain(&latency_notifier,
-		atomic_read(&current_max_latency), NULL);
-
-	/*
-	 * if we inserted the new one, we're done; otherwise there was
-	 * an existing one so we need to free the redundant data
-	 */
-	if (!found_old)
-		return;
-
-	kfree(info->identifier);
-free_info:
-	kfree(info);
-}
-EXPORT_SYMBOL_GPL(set_acceptable_latency);
-
-/**
- * modify_acceptable_latency - changes the maximum latency acceptable
- * @identifier: string that identifies this driver
- * @usecs: maximum acceptable latency for this driver
- *
- * This function informs the kernel that this device(driver)
- * can accept at most usecs latency. This setting is used for
- * power management and similar tradeoffs.
- *
- * This function does not sleep and can be called in any context.
- * Trying to use a non-existing identifier silently gets ignored.
- *
- * Due to the atomic nature of this function, the modified latency
- * value will only be used for future decisions; past decisions
- * can still lead to longer latencies in the near future.
- */
-void modify_acceptable_latency(char *identifier, int usecs)
-{
-	struct latency_info *iter;
-	unsigned long flags;
-
-	spin_lock_irqsave(&latency_lock, flags);
-	list_for_each_entry(iter, &latency_list, list) {
-		if (strcmp(iter->identifier, identifier) == 0) {
-			iter->usecs = usecs;
-			break;
-		}
-	}
-	if (usecs < atomic_read(&current_max_latency))
-		atomic_set(&current_max_latency, usecs);
-	spin_unlock_irqrestore(&latency_lock, flags);
-}
-EXPORT_SYMBOL_GPL(modify_acceptable_latency);
-
-/**
- * remove_acceptable_latency - removes the maximum latency acceptable
- * @identifier: string that identifies this driver
- *
- * This function removes a previously set maximum latency setting
- * for the driver and frees up any resources associated with the
- * bookkeeping needed for this.
- *
- * This function does not sleep and can be called in any context.
- * Trying to use a non-existing identifier silently gets ignored.
- */
-void remove_acceptable_latency(char *identifier)
-{
-	unsigned long flags;
-	int newmax = 0;
-	struct latency_info *iter, *temp;
-
-	spin_lock_irqsave(&latency_lock, flags);
-
-	list_for_each_entry_safe(iter,  temp, &latency_list, list) {
-		if (strcmp(iter->identifier, identifier) == 0) {
-			list_del(&iter->list);
-			newmax = iter->usecs;
-			kfree(iter->identifier);
-			kfree(iter);
-			break;
-		}
-	}
-
-	/* If we just deleted the system wide value, we need to
-	 * recalculate with a full search
-	 */
-	if (newmax == atomic_read(&current_max_latency)) {
-		newmax = __find_max_latency();
-		atomic_set(&current_max_latency, newmax);
-	}
-	spin_unlock_irqrestore(&latency_lock, flags);
-}
-EXPORT_SYMBOL_GPL(remove_acceptable_latency);
-
-/**
- * system_latency_constraint - queries the system wide latency maximum
- *
- * This function returns the system wide maximum latency in
- * microseconds.
- *
- * This function does not sleep and can be called in any context.
- */
-int system_latency_constraint(void)
-{
-	return atomic_read(&current_max_latency);
-}
-EXPORT_SYMBOL_GPL(system_latency_constraint);
-
-/**
- * synchronize_acceptable_latency - recalculates all latency decisions
- *
- * This function will cause a callback to various kernel pieces that
- * will make those pieces rethink their latency decisions. This implies
- * that if there are overlong latencies in hardware state already, those
- * latencies get taken right now. When this call completes no overlong
- * latency decisions should be active anymore.
- *
- * Typical usecase of this is after a modify_acceptable_latency() call,
- * which in itself is non-blocking and non-synchronizing.
- *
- * This function blocks and should not be called with locks held.
- */
-
-void synchronize_acceptable_latency(void)
-{
-	blocking_notifier_call_chain(&latency_notifier,
-		atomic_read(&current_max_latency), NULL);
-}
-EXPORT_SYMBOL_GPL(synchronize_acceptable_latency);
-
-/*
- * Latency notifier: this notifier gets called when a non-atomic new
- * latency value gets set. The expectation nof the caller of the
- * non-atomic set is that when the call returns, future latencies
- * are within bounds, so the functions on the notifier list are
- * expected to take the overlong latencies immediately, inside the
- * callback, and not make a overlong latency decision anymore.
- *
- * The callback gets called when the new latency value is made
- * active so system_latency_constraint() returns the new latency.
- */
-int register_latency_notifier(struct notifier_block * nb)
-{
-	return blocking_notifier_chain_register(&latency_notifier, nb);
-}
-EXPORT_SYMBOL_GPL(register_latency_notifier);
-
-int unregister_latency_notifier(struct notifier_block * nb)
-{
-	return blocking_notifier_chain_unregister(&latency_notifier, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_latency_notifier);
-
-static __init int latency_init(void)
-{
-	atomic_set(&current_max_latency, INFINITE_LATENCY);
-	/*
-	 * we don't want by default to have longer latencies than 2 ticks,
-	 * since that would cause lost ticks
-	 */
-	set_acceptable_latency("kernel", 2*1000000/HZ);
-	return 0;
-}
-
-module_init(latency_init);
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
new file mode 100644
index 0000000..0afe32b
--- /dev/null
+++ b/kernel/pm_qos_params.c
@@ -0,0 +1,425 @@
+/*
+ * This module exposes the interface to kernel space for specifying
+ * QoS dependencies.  It provides infrastructure for registration of:
+ *
+ * Dependents on a QoS value : register requirements
+ * Watchers of QoS value : get notified when target QoS value changes
+ *
+ * This QoS design is best effort based.  Dependents register their QoS needs.
+ * Watchers register to keep track of the current QoS needs of the system.
+ *
+ * There are 3 basic classes of QoS parameter: latency, timeout, throughput
+ * each have defined units:
+ * latency: usec
+ * timeout: usec <-- currently not used.
+ * throughput: kbs (kilo byte / sec)
+ *
+ * There are lists of pm_qos_objects each one wrapping requirements, notifiers
+ *
+ * User mode requirements on a QOS parameter register themselves to the
+ * subsystem by opening the device node /dev/... and writing there request to
+ * the node.  As long as the process holds a file handle open to the node the
+ * client continues to be accounted for.  Upon file release the usermode
+ * requirement is removed and a new qos target is computed.  This way when the
+ * requirement that the application has is cleaned up when closes the file
+ * pointer or exits the pm_qos_object will get an opportunity to clean up.
+ *
+ * mark gross mgross@linux.intel.com
+ */
+
+#include <linux/pm_qos_params.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/string.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+
+#include <linux/uaccess.h>
+
+/*
+ * locking rule: all changes to target_value or requirements or notifiers lists
+ * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
+ * held, taken with _irqsave.  One lock to rule them all
+ */
+struct requirement_list {
+	struct list_head list;
+	union {
+		s32 value;
+		s32 usec;
+		s32 kbps;
+	};
+	char *name;
+};
+
+static s32 max_compare(s32 v1, s32 v2);
+static s32 min_compare(s32 v1, s32 v2);
+
+struct pm_qos_object {
+	struct requirement_list requirements;
+	struct blocking_notifier_head *notifiers;
+	struct miscdevice pm_qos_power_miscdev;
+	char *name;
+	s32 default_value;
+	s32 target_value;
+	s32 (*comparitor)(s32, s32);
+};
+
+static struct pm_qos_object null_pm_qos;
+static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
+static struct pm_qos_object cpu_dma_pm_qos = {
+	.requirements = {LIST_HEAD_INIT(cpu_dma_pm_qos.requirements.list)},
+	.notifiers = &cpu_dma_lat_notifier,
+	.name = "cpu_dma_latency",
+	.default_value = 2000 * USEC_PER_SEC,
+	.target_value = 2000 * USEC_PER_SEC,
+	.comparitor = min_compare
+};
+
+static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
+static struct pm_qos_object network_lat_pm_qos = {
+	.requirements = {LIST_HEAD_INIT(network_lat_pm_qos.requirements.list)},
+	.notifiers = &network_lat_notifier,
+	.name = "network_latency",
+	.default_value = 2000 * USEC_PER_SEC,
+	.target_value = 2000 * USEC_PER_SEC,
+	.comparitor = min_compare
+};
+
+
+static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
+static struct pm_qos_object network_throughput_pm_qos = {
+	.requirements =
+		{LIST_HEAD_INIT(network_throughput_pm_qos.requirements.list)},
+	.notifiers = &network_throughput_notifier,
+	.name = "network_throughput",
+	.default_value = 0,
+	.target_value = 0,
+	.comparitor = max_compare
+};
+
+
+static struct pm_qos_object *pm_qos_array[] = {
+	&null_pm_qos,
+	&cpu_dma_pm_qos,
+	&network_lat_pm_qos,
+	&network_throughput_pm_qos
+};
+
+static DEFINE_SPINLOCK(pm_qos_lock);
+
+static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
+		size_t count, loff_t *f_pos);
+static int pm_qos_power_open(struct inode *inode, struct file *filp);
+static int pm_qos_power_release(struct inode *inode, struct file *filp);
+
+static const struct file_operations pm_qos_power_fops = {
+	.write = pm_qos_power_write,
+	.open = pm_qos_power_open,
+	.release = pm_qos_power_release,
+};
+
+/* static helper functions */
+static s32 max_compare(s32 v1, s32 v2)
+{
+	return max(v1, v2);
+}
+
+static s32 min_compare(s32 v1, s32 v2)
+{
+	return min(v1, v2);
+}
+
+
+static void update_target(int target)
+{
+	s32 extreme_value;
+	struct requirement_list *node;
+	unsigned long flags;
+	int call_notifier = 0;
+
+	spin_lock_irqsave(&pm_qos_lock, flags);
+	extreme_value = pm_qos_array[target]->default_value;
+	list_for_each_entry(node,
+			&pm_qos_array[target]->requirements.list, list) {
+		extreme_value = pm_qos_array[target]->comparitor(
+				extreme_value, node->value);
+	}
+	if (pm_qos_array[target]->target_value != extreme_value) {
+		call_notifier = 1;
+		pm_qos_array[target]->target_value = extreme_value;
+		pr_debug(KERN_ERR "new target for qos %d is %d\n", target,
+			pm_qos_array[target]->target_value);
+	}
+	spin_unlock_irqrestore(&pm_qos_lock, flags);
+
+	if (call_notifier)
+		blocking_notifier_call_chain(pm_qos_array[target]->notifiers,
+			(unsigned long) extreme_value, NULL);
+}
+
+static int register_pm_qos_misc(struct pm_qos_object *qos)
+{
+	qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR;
+	qos->pm_qos_power_miscdev.name = qos->name;
+	qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops;
+
+	return misc_register(&qos->pm_qos_power_miscdev);
+}
+
+static int find_pm_qos_object_by_minor(int minor)
+{
+	int pm_qos_class;
+
+	for (pm_qos_class = 0;
+		pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) {
+		if (minor ==
+			pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor)
+			return pm_qos_class;
+	}
+	return -1;
+}
+
+/**
+ * pm_qos_requirement - returns current system wide qos expectation
+ * @pm_qos_class: identification of which qos value is requested
+ *
+ * This function returns the current target value in an atomic manner.
+ */
+int pm_qos_requirement(int pm_qos_class)
+{
+	int ret_val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pm_qos_lock, flags);
+	ret_val = pm_qos_array[pm_qos_class]->target_value;
+	spin_unlock_irqrestore(&pm_qos_lock, flags);
+
+	return ret_val;
+}
+EXPORT_SYMBOL_GPL(pm_qos_requirement);
+
+/**
+ * pm_qos_add_requirement - inserts new qos request into the list
+ * @pm_qos_class: identifies which list of qos request to us
+ * @name: identifies the request
+ * @value: defines the qos request
+ *
+ * This function inserts a new entry in the pm_qos_class list of requested qos
+ * performance charactoistics.  It recomputes the agregate QoS expectations for
+ * the pm_qos_class of parrameters.
+ */
+int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value)
+{
+	struct requirement_list *dep;
+	unsigned long flags;
+
+	dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
+	if (dep) {
+		if (value == PM_QOS_DEFAULT_VALUE)
+			dep->value = pm_qos_array[pm_qos_class]->default_value;
+		else
+			dep->value = value;
+		dep->name = kstrdup(name, GFP_KERNEL);
+		if (!dep->name)
+			goto cleanup;
+
+		spin_lock_irqsave(&pm_qos_lock, flags);
+		list_add(&dep->list,
+			&pm_qos_array[pm_qos_class]->requirements.list);
+		spin_unlock_irqrestore(&pm_qos_lock, flags);
+		update_target(pm_qos_class);
+
+		return 0;
+	}
+
+cleanup:
+	kfree(dep);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(pm_qos_add_requirement);
+
+/**
+ * pm_qos_update_requirement - modifies an existing qos request
+ * @pm_qos_class: identifies which list of qos request to us
+ * @name: identifies the request
+ * @value: defines the qos request
+ *
+ * Updates an existing qos requierement for the pm_qos_class of parameters along
+ * with updating the target pm_qos_class value.
+ *
+ * If the named request isn't in the lest then no change is made.
+ */
+int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value)
+{
+	unsigned long flags;
+	struct requirement_list *node;
+	int pending_update = 0;
+
+	spin_lock_irqsave(&pm_qos_lock, flags);
+	list_for_each_entry(node,
+		&pm_qos_array[pm_qos_class]->requirements.list, list) {
+		if (strcmp(node->name, name) == 0) {
+			if (new_value == PM_QOS_DEFAULT_VALUE)
+				node->value =
+				pm_qos_array[pm_qos_class]->default_value;
+			else
+				node->value = new_value;
+			pending_update = 1;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&pm_qos_lock, flags);
+	if (pending_update)
+		update_target(pm_qos_class);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
+
+/**
+ * pm_qos_remove_requirement - modifies an existing qos request
+ * @pm_qos_class: identifies which list of qos request to us
+ * @name: identifies the request
+ *
+ * Will remove named qos request from pm_qos_class list of parrameters and
+ * recompute the current target value for the pm_qos_class.
+ */
+void pm_qos_remove_requirement(int pm_qos_class, char *name)
+{
+	unsigned long flags;
+	struct requirement_list *node;
+	int pending_update = 0;
+
+	spin_lock_irqsave(&pm_qos_lock, flags);
+	list_for_each_entry(node,
+		&pm_qos_array[pm_qos_class]->requirements.list, list) {
+		if (strcmp(node->name, name) == 0) {
+			kfree(node->name);
+			list_del(&node->list);
+			kfree(node);
+			pending_update = 1;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&pm_qos_lock, flags);
+	if (pending_update)
+		update_target(pm_qos_class);
+}
+EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
+
+/**
+ * pm_qos_add_notifier - sets notification entry for changes to target value
+ * @pm_qos_class: identifies which qos target changes should be notified.
+ * @notifier: notifier block managed by caller.
+ *
+ * will register the notifier into a notification chain that gets called
+ * uppon changes to the pm_qos_class target value.
+ */
+ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
+{
+	int retval;
+
+	retval = blocking_notifier_chain_register(
+			pm_qos_array[pm_qos_class]->notifiers, notifier);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_qos_add_notifier);
+
+/**
+ * pm_qos_remove_notifier - deletes notification entry from chain.
+ * @pm_qos_class: identifies which qos target changes are notified.
+ * @notifier: notifier block to be removed.
+ *
+ * will remove the notifier from the notification chain that gets called
+ * uppon changes to the pm_qos_class target value.
+ */
+int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
+{
+	int retval;
+
+	retval = blocking_notifier_chain_unregister(
+			pm_qos_array[pm_qos_class]->notifiers, notifier);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
+
+#define PID_NAME_LEN sizeof("process_1234567890")
+static char name[PID_NAME_LEN];
+
+static int pm_qos_power_open(struct inode *inode, struct file *filp)
+{
+	int ret;
+	long pm_qos_class;
+
+	pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
+	if (pm_qos_class >= 0) {
+		filp->private_data = (void *)pm_qos_class;
+		sprintf(name, "process_%d", current->pid);
+		ret = pm_qos_add_requirement(pm_qos_class, name,
+					PM_QOS_DEFAULT_VALUE);
+		if (ret >= 0)
+			return 0;
+	}
+
+	return -EPERM;
+}
+
+static int pm_qos_power_release(struct inode *inode, struct file *filp)
+{
+	int pm_qos_class;
+
+	pm_qos_class = (long)filp->private_data;
+	sprintf(name, "process_%d", current->pid);
+	pm_qos_remove_requirement(pm_qos_class, name);
+
+	return 0;
+}
+
+static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
+		size_t count, loff_t *f_pos)
+{
+	s32 value;
+	int pm_qos_class;
+
+	pm_qos_class = (long)filp->private_data;
+	if (count != sizeof(s32))
+		return -EINVAL;
+	if (copy_from_user(&value, buf, sizeof(s32)))
+		return -EFAULT;
+	sprintf(name, "process_%d", current->pid);
+	pm_qos_update_requirement(pm_qos_class, name, value);
+
+	return  sizeof(s32);
+}
+
+
+static int __init pm_qos_power_init(void)
+{
+	int ret = 0;
+
+	ret = register_pm_qos_misc(&cpu_dma_pm_qos);
+	if (ret < 0) {
+		printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n");
+		return ret;
+	}
+	ret = register_pm_qos_misc(&network_lat_pm_qos);
+	if (ret < 0) {
+		printk(KERN_ERR "pm_qos_param: network_latency setup failed\n");
+		return ret;
+	}
+	ret = register_pm_qos_misc(&network_throughput_pm_qos);
+	if (ret < 0)
+		printk(KERN_ERR
+			"pm_qos_param: network_throughput setup failed\n");
+
+	return ret;
+}
+
+late_initcall(pm_qos_power_init);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 36d563f..122d5c7 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -256,8 +256,9 @@
 	if (timr->it.real.interval.tv64 == 0)
 		return;
 
-	timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
-					    timr->it.real.interval);
+	timr->it_overrun += (unsigned int) hrtimer_forward(timer,
+						timer->base->get_time(),
+						timr->it.real.interval);
 
 	timr->it_overrun_last = timr->it_overrun;
 	timr->it_overrun = -1;
@@ -386,7 +387,7 @@
 					now = ktime_add(now, kj);
 			}
 #endif
-			timr->it_overrun +=
+			timr->it_overrun += (unsigned int)
 				hrtimer_forward(timer, now,
 						timr->it.real.interval);
 			ret = HRTIMER_RESTART;
@@ -662,7 +663,7 @@
 	 */
 	if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
 	    (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
-		timr->it_overrun += hrtimer_forward(timer, now, iv);
+		timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
 
 	remaining = ktime_sub(timer->expires, now);
 	/* Return 0 only, when the timer is expired and not pending */
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index d09da08..859a8e5 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -26,7 +26,7 @@
 
 
 static int noresume = 0;
-char resume_file[256] = CONFIG_PM_STD_PARTITION;
+static char resume_file[256] = CONFIG_PM_STD_PARTITION;
 dev_t swsusp_resume_device;
 sector_t swsusp_resume_block;
 
@@ -185,7 +185,7 @@
  *	reappears in this routine after a restore.
  */
 
-int create_image(int platform_mode)
+static int create_image(int platform_mode)
 {
 	int error;
 
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index f6a5df9..95250d7 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1203,7 +1203,7 @@
 
 	printk(KERN_INFO "PM: Creating hibernation image: \n");
 
-	drain_local_pages();
+	drain_local_pages(NULL);
 	nr_pages = count_data_pages();
 	nr_highmem = count_highmem_pages();
 	printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
@@ -1221,7 +1221,7 @@
 	/* During allocating of suspend pagedir, new cold pages may appear.
 	 * Kill them.
 	 */
-	drain_local_pages();
+	drain_local_pages(NULL);
 	copy_data_pages(&copy_bm, &orig_bm);
 
 	/*
diff --git a/kernel/signal.c b/kernel/signal.c
index 4333b6d..6a5f97c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -911,27 +911,6 @@
 			} while_each_thread(p, t);
 			return;
 		}
-
-		/*
-		 * There will be a core dump.  We make all threads other
-		 * than the chosen one go into a group stop so that nothing
-		 * happens until it gets scheduled, takes the signal off
-		 * the shared queue, and does the core dump.  This is a
-		 * little more complicated than strictly necessary, but it
-		 * keeps the signal state that winds up in the core dump
-		 * unchanged from the death state, e.g. which thread had
-		 * the core-dump signal unblocked.
-		 */
-		rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
-		rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
-		p->signal->group_stop_count = 0;
-		p->signal->group_exit_task = t;
-		p = t;
-		do {
-			p->signal->group_stop_count++;
-			signal_wake_up(t, t == p);
-		} while_each_thread(p, t);
-		return;
 	}
 
 	/*
@@ -978,7 +957,6 @@
 {
 	struct task_struct *t;
 
-	p->signal->flags = SIGNAL_GROUP_EXIT;
 	p->signal->group_stop_count = 0;
 
 	for (t = next_thread(p); t != p; t = next_thread(t)) {
@@ -1709,9 +1687,6 @@
 	struct signal_struct *sig = current->signal;
 	int stop_count;
 
-	if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED))
-		return 0;
-
 	if (sig->group_stop_count > 0) {
 		/*
 		 * There is a group stop in progress.  We don't need to
@@ -1719,12 +1694,15 @@
 		 */
 		stop_count = --sig->group_stop_count;
 	} else {
+		struct task_struct *t;
+
+		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
+		    unlikely(sig->group_exit_task))
+			return 0;
 		/*
 		 * There is no group stop already in progress.
 		 * We must initiate one now.
 		 */
-		struct task_struct *t;
-
 		sig->group_exit_code = signr;
 
 		stop_count = 0;
@@ -1752,47 +1730,6 @@
 	return 1;
 }
 
-/*
- * Do appropriate magic when group_stop_count > 0.
- * We return nonzero if we stopped, after releasing the siglock.
- * We return zero if we still hold the siglock and should look
- * for another signal without checking group_stop_count again.
- */
-static int handle_group_stop(void)
-{
-	int stop_count;
-
-	if (current->signal->group_exit_task == current) {
-		/*
-		 * Group stop is so we can do a core dump,
-		 * We are the initiating thread, so get on with it.
-		 */
-		current->signal->group_exit_task = NULL;
-		return 0;
-	}
-
-	if (current->signal->flags & SIGNAL_GROUP_EXIT)
-		/*
-		 * Group stop is so another thread can do a core dump,
-		 * or else we are racing against a death signal.
-		 * Just punt the stop so we can get the next signal.
-		 */
-		return 0;
-
-	/*
-	 * There is a group stop in progress.  We stop
-	 * without any associated signal being in our queue.
-	 */
-	stop_count = --current->signal->group_stop_count;
-	if (stop_count == 0)
-		current->signal->flags = SIGNAL_STOP_STOPPED;
-	current->exit_code = current->signal->group_exit_code;
-	set_current_state(TASK_STOPPED);
-	spin_unlock_irq(&current->sighand->siglock);
-	finish_stop(stop_count);
-	return 1;
-}
-
 int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
 			  struct pt_regs *regs, void *cookie)
 {
@@ -1807,7 +1744,7 @@
 		struct k_sigaction *ka;
 
 		if (unlikely(current->signal->group_stop_count > 0) &&
-		    handle_group_stop())
+		    do_signal_stop(0))
 			goto relock;
 
 		signr = dequeue_signal(current, mask, info);
diff --git a/kernel/sys.c b/kernel/sys.c
index d1fe71e..53de35f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -315,7 +315,7 @@
 #endif
 }
 
-void kernel_shutdown_prepare(enum system_states state)
+static void kernel_shutdown_prepare(enum system_states state)
 {
 	blocking_notifier_call_chain(&reboot_notifier_list,
 		(state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
@@ -1637,7 +1637,7 @@
 	mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
 	return mask;
 }
-    
+
 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 			  unsigned long arg4, unsigned long arg5)
 {
@@ -1742,6 +1742,17 @@
 			error = prctl_set_seccomp(arg2);
 			break;
 
+		case PR_CAPBSET_READ:
+			if (!cap_valid(arg2))
+				return -EINVAL;
+			return !!cap_raised(current->cap_bset, arg2);
+		case PR_CAPBSET_DROP:
+#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
+			return cap_prctl_drop(arg2);
+#else
+			return -EINVAL;
+#endif
+
 		default:
 			error = -EINVAL;
 			break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index beee5b3..5b9b467 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -154,7 +154,10 @@
 
 /* New file descriptors */
 cond_syscall(sys_signalfd);
-cond_syscall(sys_timerfd);
 cond_syscall(compat_sys_signalfd);
-cond_syscall(compat_sys_timerfd);
+cond_syscall(sys_timerfd_create);
+cond_syscall(sys_timerfd_settime);
+cond_syscall(sys_timerfd_gettime);
+cond_syscall(compat_sys_timerfd_settime);
+cond_syscall(compat_sys_timerfd_gettime);
 cond_syscall(sys_eventfd);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7cb1ac3..5e2ad5b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -84,8 +84,11 @@
 extern int latencytop_enabled;
 
 /* Constants used for minimum and  maximum */
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
 static int one = 1;
+#endif
+
+#ifdef CONFIG_DETECT_SOFTLOCKUP
 static int sixty = 60;
 #endif
 
@@ -416,15 +419,6 @@
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
-#ifdef CONFIG_SECURITY_CAPABILITIES
-	{
-		.procname	= "cap-bound",
-		.data		= &cap_bset,
-		.maxlen		= sizeof(kernel_cap_t),
-		.mode		= 0600,
-		.proc_handler	= &proc_dointvec_bset,
-	},
-#endif /* def CONFIG_SECURITY_CAPABILITIES */
 #ifdef CONFIG_BLK_DEV_INITRD
 	{
 		.ctl_name	= KERN_REALROOTDEV,
@@ -1150,6 +1144,19 @@
 		.extra1		= &zero,
 	},
 #endif
+#ifdef CONFIG_HIGHMEM
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "highmem_is_dirtyable",
+		.data		= &vm_highmem_is_dirtyable,
+		.maxlen		= sizeof(vm_highmem_is_dirtyable),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
@@ -2080,26 +2087,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_SECURITY_CAPABILITIES
-/*
- *	init may raise the set.
- */
-
-int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int op;
-
-	if (write && !capable(CAP_SYS_MODULE)) {
-		return -EPERM;
-	}
-
-	op = is_global_init(current) ? OP_SET : OP_AND;
-	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
-				do_proc_dointvec_bset_conv,&op);
-}
-#endif /* def CONFIG_SECURITY_CAPABILITIES */
-
 /*
  *	Taint values can only be increased
  */
@@ -2513,12 +2500,6 @@
 	return -ENOSYS;
 }
 
-int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
-
 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index c3206fa..006365b 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -37,10 +37,6 @@
 	{ KERN_NODENAME,		"hostname" },
 	{ KERN_DOMAINNAME,		"domainname" },
 
-#ifdef CONFIG_SECURITY_CAPABILITIES
-	{ KERN_CAP_BSET,		"cap-bound" },
-#endif /* def CONFIG_SECURITY_CAPABILITIES */
-
 	{ KERN_PANIC,			"panic" },
 	{ KERN_REALROOTDEV,		"real-root-dev" },
 
@@ -1498,9 +1494,6 @@
 			    (table->strategy == sysctl_ms_jiffies) ||
 			    (table->proc_handler == proc_dostring) ||
 			    (table->proc_handler == proc_dointvec) ||
-#ifdef CONFIG_SECURITY_CAPABILITIES
-			    (table->proc_handler == proc_dointvec_bset) ||
-#endif /* def CONFIG_SECURITY_CAPABILITIES */
 			    (table->proc_handler == proc_dointvec_minmax) ||
 			    (table->proc_handler == proc_dointvec_jiffies) ||
 			    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
diff --git a/lib/Makefile b/lib/Makefile
index 543f2502..a18062e4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -65,6 +65,7 @@
 obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
+obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
new file mode 100644
index 0000000..495575a
--- /dev/null
+++ b/lib/iommu-helper.c
@@ -0,0 +1,80 @@
+/*
+ * IOMMU helper functions for the free area management
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+
+static unsigned long find_next_zero_area(unsigned long *map,
+					 unsigned long size,
+					 unsigned long start,
+					 unsigned int nr,
+					 unsigned long align_mask)
+{
+	unsigned long index, end, i;
+again:
+	index = find_next_zero_bit(map, size, start);
+
+	/* Align allocation */
+	index = (index + align_mask) & ~align_mask;
+
+	end = index + nr;
+	if (end >= size)
+		return -1;
+	for (i = index; i < end; i++) {
+		if (test_bit(i, map)) {
+			start = i+1;
+			goto again;
+		}
+	}
+	return index;
+}
+
+static inline void set_bit_area(unsigned long *map, unsigned long i,
+				int len)
+{
+	unsigned long end = i + len;
+	while (i < end) {
+		__set_bit(i, map);
+		i++;
+	}
+}
+
+static inline int is_span_boundary(unsigned int index, unsigned int nr,
+				   unsigned long shift,
+				   unsigned long boundary_size)
+{
+	shift = (shift + index) & (boundary_size - 1);
+	return shift + nr > boundary_size;
+}
+
+unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
+			       unsigned long start, unsigned int nr,
+			       unsigned long shift, unsigned long boundary_size,
+			       unsigned long align_mask)
+{
+	unsigned long index;
+again:
+	index = find_next_zero_area(map, size, start, nr, align_mask);
+	if (index != -1) {
+		if (is_span_boundary(index, nr, shift, boundary_size)) {
+			/* we could do more effectively */
+			start = index + 1;
+			goto again;
+		}
+		set_bit_area(map, index, nr);
+	}
+	return index;
+}
+EXPORT_SYMBOL(iommu_area_alloc);
+
+void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
+{
+	unsigned long end = start + nr;
+
+	while (start < end) {
+		__clear_bit(start, map);
+		start++;
+	}
+}
+EXPORT_SYMBOL(iommu_area_free);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 48c250f..65f0e75 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -95,14 +95,17 @@
 static struct radix_tree_node *
 radix_tree_node_alloc(struct radix_tree_root *root)
 {
-	struct radix_tree_node *ret;
+	struct radix_tree_node *ret = NULL;
 	gfp_t gfp_mask = root_gfp_mask(root);
 
-	ret = kmem_cache_alloc(radix_tree_node_cachep,
-				set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
-	if (ret == NULL && !(gfp_mask & __GFP_WAIT)) {
+	if (!(gfp_mask & __GFP_WAIT)) {
 		struct radix_tree_preload *rtp;
 
+		/*
+		 * Provided the caller has preloaded here, we will always
+		 * succeed in getting a node here (and never reach
+		 * kmem_cache_alloc)
+		 */
 		rtp = &__get_cpu_var(radix_tree_preloads);
 		if (rtp->nr) {
 			ret = rtp->nodes[rtp->nr - 1];
@@ -110,6 +113,10 @@
 			rtp->nr--;
 		}
 	}
+	if (ret == NULL)
+		ret = kmem_cache_alloc(radix_tree_node_cachep,
+				set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
+
 	BUG_ON(radix_tree_is_indirect_ptr(ret));
 	return ret;
 }
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 1a8050a..4bb5a11 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -282,6 +282,15 @@
 	return (addr & ~mask) != 0;
 }
 
+static inline unsigned int is_span_boundary(unsigned int index,
+					    unsigned int nslots,
+					    unsigned long offset_slots,
+					    unsigned long max_slots)
+{
+	unsigned long offset = (offset_slots + index) & (max_slots - 1);
+	return offset + nslots > max_slots;
+}
+
 /*
  * Allocates bounce buffer and returns its kernel virtual address.
  */
@@ -292,6 +301,16 @@
 	char *dma_addr;
 	unsigned int nslots, stride, index, wrap;
 	int i;
+	unsigned long start_dma_addr;
+	unsigned long mask;
+	unsigned long offset_slots;
+	unsigned long max_slots;
+
+	mask = dma_get_seg_boundary(hwdev);
+	start_dma_addr = virt_to_bus(io_tlb_start) & mask;
+
+	offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	max_slots = ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 
 	/*
 	 * For mappings greater than a page, we limit the stride (and
@@ -311,10 +330,17 @@
 	 */
 	spin_lock_irqsave(&io_tlb_lock, flags);
 	{
-		wrap = index = ALIGN(io_tlb_index, stride);
-
+		index = ALIGN(io_tlb_index, stride);
 		if (index >= io_tlb_nslabs)
-			wrap = index = 0;
+			index = 0;
+
+		while (is_span_boundary(index, nslots, offset_slots,
+					max_slots)) {
+			index += stride;
+			if (index >= io_tlb_nslabs)
+				index = 0;
+		}
+		wrap = index;
 
 		do {
 			/*
@@ -341,9 +367,12 @@
 
 				goto found;
 			}
-			index += stride;
-			if (index >= io_tlb_nslabs)
-				index = 0;
+			do {
+				index += stride;
+				if (index >= io_tlb_nslabs)
+					index = 0;
+			} while (is_span_boundary(index, nslots, offset_slots,
+						  max_slots));
 		} while (index != wrap);
 
 		spin_unlock_irqrestore(&io_tlb_lock, flags);
diff --git a/mm/Makefile b/mm/Makefile
index 5c0b0ea..44e2528 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -13,6 +13,7 @@
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
 			   page_isolation.o $(mmu-y)
 
+obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 0df4c89..3c0f1e9 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -49,9 +49,21 @@
 		goto out;
 	}
 
-	if (mapping->a_ops->get_xip_page)
-		/* no bad return value, but ignore advice */
+	if (mapping->a_ops->get_xip_page) {
+		switch (advice) {
+		case POSIX_FADV_NORMAL:
+		case POSIX_FADV_RANDOM:
+		case POSIX_FADV_SEQUENTIAL:
+		case POSIX_FADV_WILLNEED:
+		case POSIX_FADV_NOREUSE:
+		case POSIX_FADV_DONTNEED:
+			/* no bad return value, but ignore advice */
+			break;
+		default:
+			ret = -EINVAL;
+		}
 		goto out;
+	}
 
 	/* Careful about overflows. Len == 0 means "as much as possible" */
 	endbyte = offset + len;
diff --git a/mm/filemap.c b/mm/filemap.c
index 76bea88..81fb9bf 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -65,7 +65,6 @@
  *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock		(exclusive_swap_page, others)
  *        ->mapping->tree_lock
- *          ->zone.lock
  *
  *  ->i_mutex
  *    ->i_mmap_lock		(truncate->unmap_mapping_range)
@@ -528,7 +527,7 @@
 	__wake_up_bit(page_waitqueue(page), &page->flags, bit);
 }
 
-void fastcall wait_on_page_bit(struct page *page, int bit_nr)
+void wait_on_page_bit(struct page *page, int bit_nr)
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
 
@@ -552,7 +551,7 @@
  * the clear_bit and the read of the waitqueue (to avoid SMP races with a
  * parallel wait_on_page_locked()).
  */
-void fastcall unlock_page(struct page *page)
+void unlock_page(struct page *page)
 {
 	smp_mb__before_clear_bit();
 	if (!TestClearPageLocked(page))
@@ -586,7 +585,7 @@
  * chances are that on the second loop, the block layer's plug list is empty,
  * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
  */
-void fastcall __lock_page(struct page *page)
+void __lock_page(struct page *page)
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
@@ -607,7 +606,7 @@
  * Variant of lock_page that does not require the caller to hold a reference
  * on the page's mapping.
  */
-void fastcall __lock_page_nosync(struct page *page)
+void __lock_page_nosync(struct page *page)
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 	__wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
@@ -1277,7 +1276,7 @@
  * This adds the requested page to the page cache if it isn't already there,
  * and schedules an I/O to read in its contents from disk.
  */
-static int fastcall page_cache_read(struct file * file, pgoff_t offset)
+static int page_cache_read(struct file *file, pgoff_t offset)
 {
 	struct address_space *mapping = file->f_mapping;
 	struct page *page; 
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index f874ae8..0420a02 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -431,7 +431,7 @@
 		else
 			return PTR_ERR(page);
 	}
-	zero_user_page(page, offset, length, KM_USER0);
+	zero_user(page, offset, length);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xip_truncate_page);
diff --git a/mm/fremap.c b/mm/fremap.c
index 14bd3bf..69a37c2 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -190,10 +190,13 @@
 		 */
 		if (mapping_cap_account_dirty(mapping)) {
 			unsigned long addr;
+			struct file *file = vma->vm_file;
 
 			flags &= MAP_NONBLOCK;
-			addr = mmap_region(vma->vm_file, start, size,
+			get_file(file);
+			addr = mmap_region(file, start, size,
 					flags, vma->vm_flags, pgoff, 1);
+			fput(file);
 			if (IS_ERR_VALUE(addr)) {
 				err = addr;
 			} else {
diff --git a/mm/highmem.c b/mm/highmem.c
index 7a967bc..35d4773 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -163,7 +163,7 @@
 	return vaddr;
 }
 
-void fastcall *kmap_high(struct page *page)
+void *kmap_high(struct page *page)
 {
 	unsigned long vaddr;
 
@@ -185,7 +185,7 @@
 
 EXPORT_SYMBOL(kmap_high);
 
-void fastcall kunmap_high(struct page *page)
+void kunmap_high(struct page *page)
 {
 	unsigned long vaddr;
 	unsigned long nr;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index db861d8..1a56420 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -813,6 +813,7 @@
 
 	spin_unlock(&mm->page_table_lock);
 	copy_huge_page(new_page, old_page, address, vma);
+	__SetPageUptodate(new_page);
 	spin_lock(&mm->page_table_lock);
 
 	ptep = huge_pte_offset(mm, address & HPAGE_MASK);
@@ -858,6 +859,7 @@
 			goto out;
 		}
 		clear_huge_page(page, address);
+		__SetPageUptodate(page);
 
 		if (vma->vm_flags & VM_SHARED) {
 			int err;
diff --git a/mm/internal.h b/mm/internal.h
index 953f941..5a9a620 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,7 +24,7 @@
  */
 static inline void set_page_refcounted(struct page *page)
 {
-	VM_BUG_ON(PageCompound(page) && PageTail(page));
+	VM_BUG_ON(PageTail(page));
 	VM_BUG_ON(atomic_read(&page->_count));
 	set_page_count(page, 1);
 }
@@ -34,7 +34,7 @@
 	atomic_dec(&page->_count);
 }
 
-extern void fastcall __init __free_pages_bootmem(struct page *page,
+extern void __init __free_pages_bootmem(struct page *page,
 						unsigned int order);
 
 /*
diff --git a/mm/memory.c b/mm/memory.c
index d902d0e..7bb7072 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -305,7 +305,7 @@
 	spin_lock(&mm->page_table_lock);
 	if (pmd_present(*pmd)) {	/* Another has populated it */
 		pte_lock_deinit(new);
-		pte_free(new);
+		pte_free(mm, new);
 	} else {
 		mm->nr_ptes++;
 		inc_zone_page_state(new, NR_PAGETABLE);
@@ -323,7 +323,7 @@
 
 	spin_lock(&init_mm.page_table_lock);
 	if (pmd_present(*pmd))		/* Another has populated it */
-		pte_free_kernel(new);
+		pte_free_kernel(&init_mm, new);
 	else
 		pmd_populate_kernel(&init_mm, pmd, new);
 	spin_unlock(&init_mm.page_table_lock);
@@ -1109,7 +1109,8 @@
 }
 EXPORT_SYMBOL(get_user_pages);
 
-pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
+pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
+			spinlock_t **ptl)
 {
 	pgd_t * pgd = pgd_offset(mm, addr);
 	pud_t * pud = pud_alloc(mm, pgd, addr);
@@ -1517,10 +1518,8 @@
 			memset(kaddr, 0, PAGE_SIZE);
 		kunmap_atomic(kaddr, KM_USER0);
 		flush_dcache_page(dst);
-		return;
-
-	}
-	copy_user_highpage(dst, src, va, vma);
+	} else
+		copy_user_highpage(dst, src, va, vma);
 }
 
 /*
@@ -1629,6 +1628,7 @@
 	if (!new_page)
 		goto oom;
 	cow_user_page(new_page, old_page, address, vma);
+	__SetPageUptodate(new_page);
 
 	/*
 	 * Re-check the pte - we dropped the lock
@@ -1909,50 +1909,49 @@
  */
 int vmtruncate(struct inode * inode, loff_t offset)
 {
-	struct address_space *mapping = inode->i_mapping;
-	unsigned long limit;
+	if (inode->i_size < offset) {
+		unsigned long limit;
 
-	if (inode->i_size < offset)
-		goto do_expand;
-	/*
-	 * truncation of in-use swapfiles is disallowed - it would cause
-	 * subsequent swapout to scribble on the now-freed blocks.
-	 */
-	if (IS_SWAPFILE(inode))
-		goto out_busy;
-	i_size_write(inode, offset);
+		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+		if (limit != RLIM_INFINITY && offset > limit)
+			goto out_sig;
+		if (offset > inode->i_sb->s_maxbytes)
+			goto out_big;
+		i_size_write(inode, offset);
+	} else {
+		struct address_space *mapping = inode->i_mapping;
 
-	/*
-	 * unmap_mapping_range is called twice, first simply for efficiency
-	 * so that truncate_inode_pages does fewer single-page unmaps. However
-	 * after this first call, and before truncate_inode_pages finishes,
-	 * it is possible for private pages to be COWed, which remain after
-	 * truncate_inode_pages finishes, hence the second unmap_mapping_range
-	 * call must be made for correctness.
-	 */
-	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-	truncate_inode_pages(mapping, offset);
-	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-	goto out_truncate;
+		/*
+		 * truncation of in-use swapfiles is disallowed - it would
+		 * cause subsequent swapout to scribble on the now-freed
+		 * blocks.
+		 */
+		if (IS_SWAPFILE(inode))
+			return -ETXTBSY;
+		i_size_write(inode, offset);
 
-do_expand:
-	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit != RLIM_INFINITY && offset > limit)
-		goto out_sig;
-	if (offset > inode->i_sb->s_maxbytes)
-		goto out_big;
-	i_size_write(inode, offset);
+		/*
+		 * unmap_mapping_range is called twice, first simply for
+		 * efficiency so that truncate_inode_pages does fewer
+		 * single-page unmaps.  However after this first call, and
+		 * before truncate_inode_pages finishes, it is possible for
+		 * private pages to be COWed, which remain after
+		 * truncate_inode_pages finishes, hence the second
+		 * unmap_mapping_range call must be made for correctness.
+		 */
+		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+		truncate_inode_pages(mapping, offset);
+		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+	}
 
-out_truncate:
 	if (inode->i_op && inode->i_op->truncate)
 		inode->i_op->truncate(inode);
 	return 0;
+
 out_sig:
 	send_sig(SIGXFSZ, current, 0);
 out_big:
 	return -EFBIG;
-out_busy:
-	return -ETXTBSY;
 }
 EXPORT_SYMBOL(vmtruncate);
 
@@ -1980,67 +1979,6 @@
 	return 0;
 }
 
-/**
- * swapin_readahead - swap in pages in hope we need them soon
- * @entry: swap entry of this memory
- * @addr: address to start
- * @vma: user vma this addresses belong to
- *
- * Primitive swap readahead code. We simply read an aligned block of
- * (1 << page_cluster) entries in the swap area. This method is chosen
- * because it doesn't cost us any seek time.  We also make sure to queue
- * the 'original' request together with the readahead ones...
- *
- * This has been extended to use the NUMA policies from the mm triggering
- * the readahead.
- *
- * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
- */
-void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struct *vma)
-{
-#ifdef CONFIG_NUMA
-	struct vm_area_struct *next_vma = vma ? vma->vm_next : NULL;
-#endif
-	int i, num;
-	struct page *new_page;
-	unsigned long offset;
-
-	/*
-	 * Get the number of handles we should do readahead io to.
-	 */
-	num = valid_swaphandles(entry, &offset);
-	for (i = 0; i < num; offset++, i++) {
-		/* Ok, do the async read-ahead now */
-		new_page = read_swap_cache_async(swp_entry(swp_type(entry),
-							   offset), vma, addr);
-		if (!new_page)
-			break;
-		page_cache_release(new_page);
-#ifdef CONFIG_NUMA
-		/*
-		 * Find the next applicable VMA for the NUMA policy.
-		 */
-		addr += PAGE_SIZE;
-		if (addr == 0)
-			vma = NULL;
-		if (vma) {
-			if (addr >= vma->vm_end) {
-				vma = next_vma;
-				next_vma = vma ? vma->vm_next : NULL;
-			}
-			if (vma && addr < vma->vm_start)
-				vma = NULL;
-		} else {
-			if (next_vma && addr >= next_vma->vm_start) {
-				vma = next_vma;
-				next_vma = vma->vm_next;
-			}
-		}
-#endif
-	}
-	lru_add_drain();	/* Push any new pages onto the LRU now */
-}
-
 /*
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
@@ -2068,8 +2006,8 @@
 	page = lookup_swap_cache(entry);
 	if (!page) {
 		grab_swap_token(); /* Contend for token _before_ read-in */
- 		swapin_readahead(entry, address, vma);
- 		page = read_swap_cache_async(entry, vma, address);
+		page = swapin_readahead(entry,
+					GFP_HIGHUSER_MOVABLE, vma, address);
 		if (!page) {
 			/*
 			 * Back out if somebody else faulted in this pte
@@ -2163,6 +2101,7 @@
 	page = alloc_zeroed_user_highpage_movable(vma, address);
 	if (!page)
 		goto oom;
+	__SetPageUptodate(page);
 
 	entry = mk_pte(page, vma->vm_page_prot);
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2263,6 +2202,7 @@
 				goto out;
 			}
 			copy_user_highpage(page, vmf.page, address, vma);
+			__SetPageUptodate(page);
 		} else {
 			/*
 			 * If the page will be shareable, see if the backing
@@ -2563,7 +2503,7 @@
 
 	spin_lock(&mm->page_table_lock);
 	if (pgd_present(*pgd))		/* Another has populated it */
-		pud_free(new);
+		pud_free(mm, new);
 	else
 		pgd_populate(mm, pgd, new);
 	spin_unlock(&mm->page_table_lock);
@@ -2585,12 +2525,12 @@
 	spin_lock(&mm->page_table_lock);
 #ifndef __ARCH_HAS_4LEVEL_HACK
 	if (pud_present(*pud))		/* Another has populated it */
-		pmd_free(new);
+		pmd_free(mm, new);
 	else
 		pud_populate(mm, pud, new);
 #else
 	if (pgd_present(*pud))		/* Another has populated it */
-		pmd_free(new);
+		pmd_free(mm, new);
 	else
 		pgd_populate(mm, pud, new);
 #endif /* __ARCH_HAS_4LEVEL_HACK */
@@ -2618,46 +2558,6 @@
 	return ret == len ? 0 : -1;
 }
 
-/* 
- * Map a vmalloc()-space virtual address to the physical page.
- */
-struct page * vmalloc_to_page(void * vmalloc_addr)
-{
-	unsigned long addr = (unsigned long) vmalloc_addr;
-	struct page *page = NULL;
-	pgd_t *pgd = pgd_offset_k(addr);
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *ptep, pte;
-  
-	if (!pgd_none(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd)) {
-				ptep = pte_offset_map(pmd, addr);
-				pte = *ptep;
-				if (pte_present(pte))
-					page = pte_page(pte);
-				pte_unmap(ptep);
-			}
-		}
-	}
-	return page;
-}
-
-EXPORT_SYMBOL(vmalloc_to_page);
-
-/*
- * Map a vmalloc()-space virtual address to the physical page frame number.
- */
-unsigned long vmalloc_to_pfn(void * vmalloc_addr)
-{
-	return page_to_pfn(vmalloc_to_page(vmalloc_addr));
-}
-
-EXPORT_SYMBOL(vmalloc_to_pfn);
-
 #if !defined(__HAVE_ARCH_GATE_AREA)
 
 #if defined(AT_SYSINFO_EHDR)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9512a54..7469c50 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -481,8 +481,6 @@
 	return offlined;
 }
 
-extern void drain_all_local_pages(void);
-
 int offline_pages(unsigned long start_pfn,
 		  unsigned long end_pfn, unsigned long timeout)
 {
@@ -540,7 +538,7 @@
 		lru_add_drain_all();
 		flush_scheduled_work();
 		cond_resched();
-		drain_all_local_pages();
+		drain_all_pages();
 	}
 
 	pfn = scan_lru_pages(start_pfn, end_pfn);
@@ -563,7 +561,7 @@
 	flush_scheduled_work();
 	yield();
 	/* drain pcp pages , this is synchrouns. */
-	drain_all_local_pages();
+	drain_all_pages();
 	/* check again */
 	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
 	if (offlined_pages < 0) {
diff --git a/mm/migrate.c b/mm/migrate.c
index 6a207e8..857a987 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -115,11 +115,6 @@
 	return count;
 }
 
-static inline int is_swap_pte(pte_t pte)
-{
-	return !pte_none(pte) && !pte_present(pte) && !pte_file(pte);
-}
-
 /*
  * Restore a potential migration pte to a working pte entry
  */
@@ -645,15 +640,33 @@
 		rcu_read_lock();
 		rcu_locked = 1;
 	}
+
 	/*
-	 * This is a corner case handling.
-	 * When a new swap-cache is read into, it is linked to LRU
-	 * and treated as swapcache but has no rmap yet.
-	 * Calling try_to_unmap() against a page->mapping==NULL page is
-	 * BUG. So handle it here.
+	 * Corner case handling:
+	 * 1. When a new swap-cache page is read into, it is added to the LRU
+	 * and treated as swapcache but it has no rmap yet.
+	 * Calling try_to_unmap() against a page->mapping==NULL page will
+	 * trigger a BUG.  So handle it here.
+	 * 2. An orphaned page (see truncate_complete_page) might have
+	 * fs-private metadata. The page can be picked up due to memory
+	 * offlining.  Everywhere else except page reclaim, the page is
+	 * invisible to the vm, so the page can not be migrated.  So try to
+	 * free the metadata, so the page can be freed.
 	 */
-	if (!page->mapping)
+	if (!page->mapping) {
+		if (!PageAnon(page) && PagePrivate(page)) {
+			/*
+			 * Go direct to try_to_free_buffers() here because
+			 * a) that's what try_to_release_page() would do anyway
+			 * b) we may be under rcu_read_lock() here, so we can't
+			 *    use GFP_KERNEL which is what try_to_release_page()
+			 *    needs to be effective.
+			 */
+			try_to_free_buffers(page);
+		}
 		goto rcu_unlock;
+	}
+
 	/* Establish migration ptes or remove ptes */
 	try_to_unmap(page, 1);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 8295577..bb4c963 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -36,6 +36,10 @@
 #define arch_mmap_check(addr, len, flags)	(0)
 #endif
 
+#ifndef arch_rebalance_pgtables
+#define arch_rebalance_pgtables(addr, len)		(addr)
+#endif
+
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
@@ -1424,7 +1428,7 @@
 	if (addr & ~PAGE_MASK)
 		return -EINVAL;
 
-	return addr;
+	return arch_rebalance_pgtables(addr, len);
 }
 
 EXPORT_SYMBOL(get_unmapped_area);
diff --git a/mm/nommu.c b/mm/nommu.c
index b989cb9..5d8ae08 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -10,6 +10,7 @@
  *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
  *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
  *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
+ *  Copyright (c) 2007      Paul Mundt <lethal@linux-sh.org>
  */
 
 #include <linux/module.h>
@@ -167,7 +168,7 @@
 DEFINE_RWLOCK(vmlist_lock);
 struct vm_struct *vmlist;
 
-void vfree(void *addr)
+void vfree(const void *addr)
 {
 	kfree(addr);
 }
@@ -183,13 +184,33 @@
 }
 EXPORT_SYMBOL(__vmalloc);
 
-struct page * vmalloc_to_page(void *addr)
+void *vmalloc_user(unsigned long size)
+{
+	void *ret;
+
+	ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+			PAGE_KERNEL);
+	if (ret) {
+		struct vm_area_struct *vma;
+
+		down_write(&current->mm->mmap_sem);
+		vma = find_vma(current->mm, (unsigned long)ret);
+		if (vma)
+			vma->vm_flags |= VM_USERMAP;
+		up_write(&current->mm->mmap_sem);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(vmalloc_user);
+
+struct page *vmalloc_to_page(const void *addr)
 {
 	return virt_to_page(addr);
 }
 EXPORT_SYMBOL(vmalloc_to_page);
 
-unsigned long vmalloc_to_pfn(void *addr)
+unsigned long vmalloc_to_pfn(const void *addr)
 {
 	return page_to_pfn(virt_to_page(addr));
 }
@@ -253,10 +274,17 @@
  *
  * The resulting memory area is 32bit addressable and zeroed so it can be
  * mapped to userspace without leaking data.
+ *
+ * VM_USERMAP is set on the corresponding VMA so that subsequent calls to
+ * remap_vmalloc_range() are permissible.
  */
 void *vmalloc_32_user(unsigned long size)
 {
-	return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+	/*
+	 * We'll have to sort out the ZONE_DMA bits for 64-bit,
+	 * but for now this can simply use vmalloc_user() directly.
+	 */
+	return vmalloc_user(size);
 }
 EXPORT_SYMBOL(vmalloc_32_user);
 
@@ -267,7 +295,7 @@
 }
 EXPORT_SYMBOL(vmap);
 
-void vunmap(void *addr)
+void vunmap(const void *addr)
 {
 	BUG();
 }
@@ -1216,6 +1244,21 @@
 }
 EXPORT_SYMBOL(remap_pfn_range);
 
+int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
+			unsigned long pgoff)
+{
+	unsigned int size = vma->vm_end - vma->vm_start;
+
+	if (!(vma->vm_flags & VM_USERMAP))
+		return -EINVAL;
+
+	vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT));
+	vma->vm_end = vma->vm_start + size;
+
+	return 0;
+}
+EXPORT_SYMBOL(remap_vmalloc_range);
+
 void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 96473b4..c1850bf 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -125,8 +125,7 @@
 	 * Superuser processes are usually more important, so we make it
 	 * less likely that we kill those.
 	 */
-	if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) ||
-				p->uid == 0 || p->euid == 0)
+	if (__capable(p, CAP_SYS_ADMIN) || __capable(p, CAP_SYS_RESOURCE))
 		points /= 4;
 
 	/*
@@ -135,7 +134,7 @@
 	 * tend to only have this flag set on applications they think
 	 * of as important.
 	 */
-	if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO))
+	if (__capable(p, CAP_SYS_RAWIO))
 		points /= 4;
 
 	/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 3d3848f..5e00f17 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -69,6 +69,12 @@
 int dirty_background_ratio = 5;
 
 /*
+ * free highmem will not be subtracted from the total free memory
+ * for calculating free ratios if vm_highmem_is_dirtyable is true
+ */
+int vm_highmem_is_dirtyable;
+
+/*
  * The generator of dirty data starts writeback at this percentage
  */
 int vm_dirty_ratio = 10;
@@ -219,7 +225,7 @@
  *
  *   dirty -= (dirty/8) * p_{t}
  */
-void task_dirty_limit(struct task_struct *tsk, long *pdirty)
+static void task_dirty_limit(struct task_struct *tsk, long *pdirty)
 {
 	long numerator, denominator;
 	long dirty = *pdirty;
@@ -287,7 +293,10 @@
 	x = global_page_state(NR_FREE_PAGES)
 		+ global_page_state(NR_INACTIVE)
 		+ global_page_state(NR_ACTIVE);
-	x -= highmem_dirtyable_memory(x);
+
+	if (!vm_highmem_is_dirtyable)
+		x -= highmem_dirtyable_memory(x);
+
 	return x + 1;	/* Ensure that we never return 0 */
 }
 
@@ -558,6 +567,7 @@
 			global_page_state(NR_UNSTABLE_NFS) < background_thresh
 				&& min_pages <= 0)
 			break;
+		wbc.more_io = 0;
 		wbc.encountered_congestion = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		wbc.pages_skipped = 0;
@@ -565,8 +575,9 @@
 		min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
 			/* Wrote less than expected */
-			congestion_wait(WRITE, HZ/10);
-			if (!wbc.encountered_congestion)
+			if (wbc.encountered_congestion || wbc.more_io)
+				congestion_wait(WRITE, HZ/10);
+			else
 				break;
 		}
 	}
@@ -631,11 +642,12 @@
 			global_page_state(NR_UNSTABLE_NFS) +
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 	while (nr_to_write > 0) {
+		wbc.more_io = 0;
 		wbc.encountered_congestion = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		writeback_inodes(&wbc);
 		if (wbc.nr_to_write > 0) {
-			if (wbc.encountered_congestion)
+			if (wbc.encountered_congestion || wbc.more_io)
 				congestion_wait(WRITE, HZ/10);
 			else
 				break;	/* All the old data is written */
@@ -1064,7 +1076,7 @@
 	return 0;
 }
 
-int fastcall set_page_dirty(struct page *page)
+int set_page_dirty(struct page *page)
 {
 	int ret = __set_page_dirty(page);
 	if (ret)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b2838c2..37576b8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -537,7 +537,7 @@
 /*
  * permit the bootmem allocator to evade page validation on high-order frees
  */
-void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order)
+void __init __free_pages_bootmem(struct page *page, unsigned int order)
 {
 	if (order == 0) {
 		__ClearPageReserved(page);
@@ -890,31 +890,51 @@
 }
 #endif
 
-static void __drain_pages(unsigned int cpu)
+/*
+ * Drain pages of the indicated processor.
+ *
+ * The processor must either be the current processor and the
+ * thread pinned to the current processor or a processor that
+ * is not online.
+ */
+static void drain_pages(unsigned int cpu)
 {
 	unsigned long flags;
 	struct zone *zone;
-	int i;
 
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pset;
+		struct per_cpu_pages *pcp;
 
 		if (!populated_zone(zone))
 			continue;
 
 		pset = zone_pcp(zone, cpu);
-		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
-			struct per_cpu_pages *pcp;
 
-			pcp = &pset->pcp[i];
-			local_irq_save(flags);
-			free_pages_bulk(zone, pcp->count, &pcp->list, 0);
-			pcp->count = 0;
-			local_irq_restore(flags);
-		}
+		pcp = &pset->pcp;
+		local_irq_save(flags);
+		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		pcp->count = 0;
+		local_irq_restore(flags);
 	}
 }
 
+/*
+ * Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ */
+void drain_local_pages(void *arg)
+{
+	drain_pages(smp_processor_id());
+}
+
+/*
+ * Spill all the per-cpu pages from all CPUs back into the buddy allocator
+ */
+void drain_all_pages(void)
+{
+	on_each_cpu(drain_local_pages, NULL, 0, 1);
+}
+
 #ifdef CONFIG_HIBERNATION
 
 void mark_free_pages(struct zone *zone)
@@ -952,40 +972,9 @@
 #endif /* CONFIG_PM */
 
 /*
- * Spill all of this CPU's per-cpu pages back into the buddy allocator.
- */
-void drain_local_pages(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);	
-	__drain_pages(smp_processor_id());
-	local_irq_restore(flags);	
-}
-
-void smp_drain_local_pages(void *arg)
-{
-	drain_local_pages();
-}
-
-/*
- * Spill all the per-cpu pages from all CPUs back into the buddy allocator
- */
-void drain_all_local_pages(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__drain_pages(smp_processor_id());
-	local_irq_restore(flags);
-
-	smp_call_function(smp_drain_local_pages, NULL, 0, 1);
-}
-
-/*
  * Free a 0-order page
  */
-static void fastcall free_hot_cold_page(struct page *page, int cold)
+static void free_hot_cold_page(struct page *page, int cold)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
@@ -1001,10 +990,13 @@
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 
-	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+	pcp = &zone_pcp(zone, get_cpu())->pcp;
 	local_irq_save(flags);
 	__count_vm_event(PGFREE);
-	list_add(&page->lru, &pcp->list);
+	if (cold)
+		list_add_tail(&page->lru, &pcp->list);
+	else
+		list_add(&page->lru, &pcp->list);
 	set_page_private(page, get_pageblock_migratetype(page));
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
@@ -1015,12 +1007,12 @@
 	put_cpu();
 }
 
-void fastcall free_hot_page(struct page *page)
+void free_hot_page(struct page *page)
 {
 	free_hot_cold_page(page, 0);
 }
 	
-void fastcall free_cold_page(struct page *page)
+void free_cold_page(struct page *page)
 {
 	free_hot_cold_page(page, 1);
 }
@@ -1062,7 +1054,7 @@
 	if (likely(order == 0)) {
 		struct per_cpu_pages *pcp;
 
-		pcp = &zone_pcp(zone, cpu)->pcp[cold];
+		pcp = &zone_pcp(zone, cpu)->pcp;
 		local_irq_save(flags);
 		if (!pcp->count) {
 			pcp->count = rmqueue_bulk(zone, 0,
@@ -1072,9 +1064,15 @@
 		}
 
 		/* Find a page of the appropriate migrate type */
-		list_for_each_entry(page, &pcp->list, lru)
-			if (page_private(page) == migratetype)
-				break;
+		if (cold) {
+			list_for_each_entry_reverse(page, &pcp->list, lru)
+				if (page_private(page) == migratetype)
+					break;
+		} else {
+			list_for_each_entry(page, &pcp->list, lru)
+				if (page_private(page) == migratetype)
+					break;
+		}
 
 		/* Allocate more to the pcp list if necessary */
 		if (unlikely(&page->lru == &pcp->list)) {
@@ -1569,7 +1567,7 @@
 	cond_resched();
 
 	if (order != 0)
-		drain_all_local_pages();
+		drain_all_pages();
 
 	if (likely(did_some_progress)) {
 		page = get_page_from_freelist(gfp_mask, order,
@@ -1643,7 +1641,7 @@
 /*
  * Common helper functions.
  */
-fastcall unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
+unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
 {
 	struct page * page;
 	page = alloc_pages(gfp_mask, order);
@@ -1654,7 +1652,7 @@
 
 EXPORT_SYMBOL(__get_free_pages);
 
-fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
+unsigned long get_zeroed_page(gfp_t gfp_mask)
 {
 	struct page * page;
 
@@ -1680,7 +1678,7 @@
 		free_hot_cold_page(pvec->pages[i], pvec->cold);
 }
 
-fastcall void __free_pages(struct page *page, unsigned int order)
+void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page)) {
 		if (order == 0)
@@ -1692,7 +1690,7 @@
 
 EXPORT_SYMBOL(__free_pages);
 
-fastcall void free_pages(unsigned long addr, unsigned int order)
+void free_pages(unsigned long addr, unsigned int order)
 {
 	if (addr != 0) {
 		VM_BUG_ON(!virt_addr_valid((void *)addr));
@@ -1801,12 +1799,9 @@
 
 			pageset = zone_pcp(zone, cpu);
 
-			printk("CPU %4d: Hot: hi:%5d, btch:%4d usd:%4d   "
-			       "Cold: hi:%5d, btch:%4d usd:%4d\n",
-			       cpu, pageset->pcp[0].high,
-			       pageset->pcp[0].batch, pageset->pcp[0].count,
-			       pageset->pcp[1].high, pageset->pcp[1].batch,
-			       pageset->pcp[1].count);
+			printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
+			       cpu, pageset->pcp.high,
+			       pageset->pcp.batch, pageset->pcp.count);
 		}
 	}
 
@@ -1879,6 +1874,8 @@
 		printk("= %lukB\n", K(total));
 	}
 
+	printk("%ld total pagecache pages\n", global_page_state(NR_FILE_PAGES));
+
 	show_swap_cache_info();
 }
 
@@ -2551,8 +2548,7 @@
 	}
 }
 
-static void __meminit zone_init_free_lists(struct pglist_data *pgdat,
-				struct zone *zone, unsigned long size)
+static void __meminit zone_init_free_lists(struct zone *zone)
 {
 	int order, t;
 	for_each_migratetype_order(order, t) {
@@ -2604,17 +2600,11 @@
 
 	memset(p, 0, sizeof(*p));
 
-	pcp = &p->pcp[0];		/* hot */
+	pcp = &p->pcp;
 	pcp->count = 0;
 	pcp->high = 6 * batch;
 	pcp->batch = max(1UL, 1 * batch);
 	INIT_LIST_HEAD(&pcp->list);
-
-	pcp = &p->pcp[1];		/* cold*/
-	pcp->count = 0;
-	pcp->high = 2 * batch;
-	pcp->batch = max(1UL, batch/2);
-	INIT_LIST_HEAD(&pcp->list);
 }
 
 /*
@@ -2627,7 +2617,7 @@
 {
 	struct per_cpu_pages *pcp;
 
-	pcp = &p->pcp[0]; /* hot list */
+	pcp = &p->pcp;
 	pcp->high = high;
 	pcp->batch = max(1UL, high/4);
 	if ((high/4) > (PAGE_SHIFT * 8))
@@ -2831,7 +2821,7 @@
 
 	memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
 
-	zone_init_free_lists(pgdat, zone, zone->spanned_pages);
+	zone_init_free_lists(zone);
 
 	return 0;
 }
@@ -3978,10 +3968,23 @@
 	int cpu = (unsigned long)hcpu;
 
 	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-		local_irq_disable();
-		__drain_pages(cpu);
+		drain_pages(cpu);
+
+		/*
+		 * Spill the event counters of the dead processor
+		 * into the current processors event counters.
+		 * This artificially elevates the count of the current
+		 * processor.
+		 */
 		vm_events_fold_cpu(cpu);
-		local_irq_enable();
+
+		/*
+		 * Zero the differential counters of the dead processor
+		 * so that the vm statistics are consistent.
+		 *
+		 * This is only okay since the processor is dead and cannot
+		 * race with what we are doing.
+		 */
 		refresh_cpu_vm_stats(cpu);
 	}
 	return NOTIFY_OK;
@@ -4480,7 +4483,7 @@
 out:
 	spin_unlock_irqrestore(&zone->lock, flags);
 	if (!ret)
-		drain_all_local_pages();
+		drain_all_pages();
 	return ret;
 }
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 3b97f68..065c448 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -126,7 +126,7 @@
 	int ret = 0;
 
 	BUG_ON(!PageLocked(page));
-	ClearPageUptodate(page);
+	BUG_ON(PageUptodate(page));
 	bio = get_swap_bio(GFP_KERNEL, page_private(page), page,
 				end_swap_bio_read);
 	if (bio == NULL) {
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
new file mode 100644
index 0000000..b4f27d2
--- /dev/null
+++ b/mm/pagewalk.c
@@ -0,0 +1,131 @@
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/sched.h>
+
+static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+			  const struct mm_walk *walk, void *private)
+{
+	pte_t *pte;
+	int err = 0;
+
+	pte = pte_offset_map(pmd, addr);
+	do {
+		err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, private);
+		if (err)
+		       break;
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	pte_unmap(pte);
+	return err;
+}
+
+static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
+			  const struct mm_walk *walk, void *private)
+{
+	pmd_t *pmd;
+	unsigned long next;
+	int err = 0;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd)) {
+			if (walk->pte_hole)
+				err = walk->pte_hole(addr, next, private);
+			if (err)
+				break;
+			continue;
+		}
+		if (walk->pmd_entry)
+			err = walk->pmd_entry(pmd, addr, next, private);
+		if (!err && walk->pte_entry)
+			err = walk_pte_range(pmd, addr, next, walk, private);
+		if (err)
+			break;
+	} while (pmd++, addr = next, addr != end);
+
+	return err;
+}
+
+static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+			  const struct mm_walk *walk, void *private)
+{
+	pud_t *pud;
+	unsigned long next;
+	int err = 0;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud)) {
+			if (walk->pte_hole)
+				err = walk->pte_hole(addr, next, private);
+			if (err)
+				break;
+			continue;
+		}
+		if (walk->pud_entry)
+			err = walk->pud_entry(pud, addr, next, private);
+		if (!err && (walk->pmd_entry || walk->pte_entry))
+			err = walk_pmd_range(pud, addr, next, walk, private);
+		if (err)
+			break;
+	} while (pud++, addr = next, addr != end);
+
+	return err;
+}
+
+/**
+ * walk_page_range - walk a memory map's page tables with a callback
+ * @mm - memory map to walk
+ * @addr - starting address
+ * @end - ending address
+ * @walk - set of callbacks to invoke for each level of the tree
+ * @private - private data passed to the callback function
+ *
+ * Recursively walk the page table for the memory area in a VMA,
+ * calling supplied callbacks. Callbacks are called in-order (first
+ * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
+ * etc.). If lower-level callbacks are omitted, walking depth is reduced.
+ *
+ * Each callback receives an entry pointer, the start and end of the
+ * associated range, and a caller-supplied private data pointer.
+ *
+ * No locks are taken, but the bottom level iterator will map PTE
+ * directories from highmem if necessary.
+ *
+ * If any callback returns a non-zero value, the walk is aborted and
+ * the return value is propagated back to the caller. Otherwise 0 is returned.
+ */
+int walk_page_range(const struct mm_struct *mm,
+		    unsigned long addr, unsigned long end,
+		    const struct mm_walk *walk, void *private)
+{
+	pgd_t *pgd;
+	unsigned long next;
+	int err = 0;
+
+	if (addr >= end)
+		return err;
+
+	pgd = pgd_offset(mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd)) {
+			if (walk->pte_hole)
+				err = walk->pte_hole(addr, next, private);
+			if (err)
+				break;
+			continue;
+		}
+		if (walk->pgd_entry)
+			err = walk->pgd_entry(pgd, addr, next, private);
+		if (!err &&
+		    (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
+			err = walk_pud_range(pgd, addr, next, walk, private);
+		if (err)
+			break;
+	} while (pgd++, addr = next, addr != end);
+
+	return err;
+}
diff --git a/mm/rmap.c b/mm/rmap.c
index dbc2ca2..57ad276 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -36,7 +36,6 @@
  *                 mapping->tree_lock (widely used, in set_page_dirty,
  *                           in arch-dependent flush_dcache_mmap_lock,
  *                           within inode_lock in __sync_single_inode)
- *                   zone->lock (within radix tree node alloc)
  */
 
 #include <linux/mm.h>
@@ -284,7 +283,10 @@
 	if (!pte)
 		goto out;
 
-	if (ptep_clear_flush_young(vma, address, pte))
+	if (vma->vm_flags & VM_LOCKED) {
+		referenced++;
+		*mapcount = 1;	/* break early from loop */
+	} else if (ptep_clear_flush_young(vma, address, pte))
 		referenced++;
 
 	/* Pretend the page is referenced if the task has the
diff --git a/mm/shmem.c b/mm/shmem.c
index 51b3d6c..0f246c4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -78,11 +78,10 @@
 
 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
 enum sgp_type {
-	SGP_QUICK,	/* don't try more than file page cache lookup */
 	SGP_READ,	/* don't exceed i_size, don't allocate page */
 	SGP_CACHE,	/* don't exceed i_size, may allocate page */
+	SGP_DIRTY,	/* like SGP_CACHE, but set new page dirty */
 	SGP_WRITE,	/* may exceed i_size, may allocate page */
-	SGP_FAULT,	/* same as SGP_CACHE, return with page locked */
 };
 
 static int shmem_getpage(struct inode *inode, unsigned long idx,
@@ -194,7 +193,7 @@
 };
 
 static LIST_HEAD(shmem_swaplist);
-static DEFINE_SPINLOCK(shmem_swaplist_lock);
+static DEFINE_MUTEX(shmem_swaplist_mutex);
 
 static void shmem_free_blocks(struct inode *inode, long pages)
 {
@@ -207,6 +206,31 @@
 	}
 }
 
+static int shmem_reserve_inode(struct super_block *sb)
+{
+	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+	if (sbinfo->max_inodes) {
+		spin_lock(&sbinfo->stat_lock);
+		if (!sbinfo->free_inodes) {
+			spin_unlock(&sbinfo->stat_lock);
+			return -ENOSPC;
+		}
+		sbinfo->free_inodes--;
+		spin_unlock(&sbinfo->stat_lock);
+	}
+	return 0;
+}
+
+static void shmem_free_inode(struct super_block *sb)
+{
+	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+	if (sbinfo->max_inodes) {
+		spin_lock(&sbinfo->stat_lock);
+		sbinfo->free_inodes++;
+		spin_unlock(&sbinfo->stat_lock);
+	}
+}
+
 /*
  * shmem_recalc_inode - recalculate the size of an inode
  *
@@ -731,6 +755,8 @@
 				(void) shmem_getpage(inode,
 					attr->ia_size>>PAGE_CACHE_SHIFT,
 						&page, SGP_READ, NULL);
+				if (page)
+					unlock_page(page);
 			}
 			/*
 			 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
@@ -762,7 +788,6 @@
 
 static void shmem_delete_inode(struct inode *inode)
 {
-	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	struct shmem_inode_info *info = SHMEM_I(inode);
 
 	if (inode->i_op->truncate == shmem_truncate) {
@@ -771,17 +796,13 @@
 		inode->i_size = 0;
 		shmem_truncate(inode);
 		if (!list_empty(&info->swaplist)) {
-			spin_lock(&shmem_swaplist_lock);
+			mutex_lock(&shmem_swaplist_mutex);
 			list_del_init(&info->swaplist);
-			spin_unlock(&shmem_swaplist_lock);
+			mutex_unlock(&shmem_swaplist_mutex);
 		}
 	}
 	BUG_ON(inode->i_blocks);
-	if (sbinfo->max_inodes) {
-		spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_inodes++;
-		spin_unlock(&sbinfo->stat_lock);
-	}
+	shmem_free_inode(inode->i_sb);
 	clear_inode(inode);
 }
 
@@ -807,19 +828,22 @@
 	struct page *subdir;
 	swp_entry_t *ptr;
 	int offset;
+	int error;
 
 	idx = 0;
 	ptr = info->i_direct;
 	spin_lock(&info->lock);
+	if (!info->swapped) {
+		list_del_init(&info->swaplist);
+		goto lost2;
+	}
 	limit = info->next_index;
 	size = limit;
 	if (size > SHMEM_NR_DIRECT)
 		size = SHMEM_NR_DIRECT;
 	offset = shmem_find_swp(entry, ptr, ptr+size);
-	if (offset >= 0) {
-		shmem_swp_balance_unmap();
+	if (offset >= 0)
 		goto found;
-	}
 	if (!info->i_indirect)
 		goto lost2;
 
@@ -829,6 +853,14 @@
 	for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
 		if (unlikely(idx == stage)) {
 			shmem_dir_unmap(dir-1);
+			if (cond_resched_lock(&info->lock)) {
+				/* check it has not been truncated */
+				if (limit > info->next_index) {
+					limit = info->next_index;
+					if (idx >= limit)
+						goto lost2;
+				}
+			}
 			dir = shmem_dir_map(info->i_indirect) +
 			    ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
 			while (!*dir) {
@@ -849,11 +881,11 @@
 			if (size > ENTRIES_PER_PAGE)
 				size = ENTRIES_PER_PAGE;
 			offset = shmem_find_swp(entry, ptr, ptr+size);
+			shmem_swp_unmap(ptr);
 			if (offset >= 0) {
 				shmem_dir_unmap(dir);
 				goto found;
 			}
-			shmem_swp_unmap(ptr);
 		}
 	}
 lost1:
@@ -863,19 +895,63 @@
 	return 0;
 found:
 	idx += offset;
-	inode = &info->vfs_inode;
-	if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) {
-		info->flags |= SHMEM_PAGEIN;
-		shmem_swp_set(info, ptr + offset, 0);
-	}
-	shmem_swp_unmap(ptr);
+	inode = igrab(&info->vfs_inode);
 	spin_unlock(&info->lock);
+
 	/*
-	 * Decrement swap count even when the entry is left behind:
-	 * try_to_unuse will skip over mms, then reincrement count.
+	 * Move _head_ to start search for next from here.
+	 * But be careful: shmem_delete_inode checks list_empty without taking
+	 * mutex, and there's an instant in list_move_tail when info->swaplist
+	 * would appear empty, if it were the only one on shmem_swaplist.  We
+	 * could avoid doing it if inode NULL; or use this minor optimization.
 	 */
-	swap_free(entry);
-	return 1;
+	if (shmem_swaplist.next != &info->swaplist)
+		list_move_tail(&shmem_swaplist, &info->swaplist);
+	mutex_unlock(&shmem_swaplist_mutex);
+
+	error = 1;
+	if (!inode)
+		goto out;
+	error = radix_tree_preload(GFP_KERNEL);
+	if (error)
+		goto out;
+	error = 1;
+
+	spin_lock(&info->lock);
+	ptr = shmem_swp_entry(info, idx, NULL);
+	if (ptr && ptr->val == entry.val)
+		error = add_to_page_cache(page, inode->i_mapping,
+						idx, GFP_NOWAIT);
+	if (error == -EEXIST) {
+		struct page *filepage = find_get_page(inode->i_mapping, idx);
+		error = 1;
+		if (filepage) {
+			/*
+			 * There might be a more uptodate page coming down
+			 * from a stacked writepage: forget our swappage if so.
+			 */
+			if (PageUptodate(filepage))
+				error = 0;
+			page_cache_release(filepage);
+		}
+	}
+	if (!error) {
+		delete_from_swap_cache(page);
+		set_page_dirty(page);
+		info->flags |= SHMEM_PAGEIN;
+		shmem_swp_set(info, ptr, 0);
+		swap_free(entry);
+		error = 1;	/* not an error, but entry was found */
+	}
+	if (ptr)
+		shmem_swp_unmap(ptr);
+	spin_unlock(&info->lock);
+	radix_tree_preload_end();
+out:
+	unlock_page(page);
+	page_cache_release(page);
+	iput(inode);		/* allows for NULL */
+	return error;
 }
 
 /*
@@ -887,20 +963,16 @@
 	struct shmem_inode_info *info;
 	int found = 0;
 
-	spin_lock(&shmem_swaplist_lock);
+	mutex_lock(&shmem_swaplist_mutex);
 	list_for_each_safe(p, next, &shmem_swaplist) {
 		info = list_entry(p, struct shmem_inode_info, swaplist);
-		if (!info->swapped)
-			list_del_init(&info->swaplist);
-		else if (shmem_unuse_inode(info, entry, page)) {
-			/* move head to start search for next from here */
-			list_move_tail(&shmem_swaplist, &info->swaplist);
-			found = 1;
-			break;
-		}
+		found = shmem_unuse_inode(info, entry, page);
+		cond_resched();
+		if (found)
+			goto out;
 	}
-	spin_unlock(&shmem_swaplist_lock);
-	return found;
+	mutex_unlock(&shmem_swaplist_mutex);
+out:	return found;	/* 0 or 1 or -ENOMEM */
 }
 
 /*
@@ -915,54 +987,65 @@
 	struct inode *inode;
 
 	BUG_ON(!PageLocked(page));
-	/*
-	 * shmem_backing_dev_info's capabilities prevent regular writeback or
-	 * sync from ever calling shmem_writepage; but a stacking filesystem
-	 * may use the ->writepage of its underlying filesystem, in which case
-	 * we want to do nothing when that underlying filesystem is tmpfs
-	 * (writing out to swap is useful as a response to memory pressure, but
-	 * of no use to stabilize the data) - just redirty the page, unlock it
-	 * and claim success in this case.  AOP_WRITEPAGE_ACTIVATE, and the
-	 * page_mapped check below, must be avoided unless we're in reclaim.
-	 */
-	if (!wbc->for_reclaim) {
-		set_page_dirty(page);
-		unlock_page(page);
-		return 0;
-	}
-	BUG_ON(page_mapped(page));
-
 	mapping = page->mapping;
 	index = page->index;
 	inode = mapping->host;
 	info = SHMEM_I(inode);
 	if (info->flags & VM_LOCKED)
 		goto redirty;
-	swap = get_swap_page();
-	if (!swap.val)
+	if (!total_swap_pages)
 		goto redirty;
 
+	/*
+	 * shmem_backing_dev_info's capabilities prevent regular writeback or
+	 * sync from ever calling shmem_writepage; but a stacking filesystem
+	 * may use the ->writepage of its underlying filesystem, in which case
+	 * tmpfs should write out to swap only in response to memory pressure,
+	 * and not for pdflush or sync.  However, in those cases, we do still
+	 * want to check if there's a redundant swappage to be discarded.
+	 */
+	if (wbc->for_reclaim)
+		swap = get_swap_page();
+	else
+		swap.val = 0;
+
 	spin_lock(&info->lock);
-	shmem_recalc_inode(inode);
 	if (index >= info->next_index) {
 		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
 		goto unlock;
 	}
 	entry = shmem_swp_entry(info, index, NULL);
-	BUG_ON(!entry);
-	BUG_ON(entry->val);
+	if (entry->val) {
+		/*
+		 * The more uptodate page coming down from a stacked
+		 * writepage should replace our old swappage.
+		 */
+		free_swap_and_cache(*entry);
+		shmem_swp_set(info, entry, 0);
+	}
+	shmem_recalc_inode(inode);
 
-	if (move_to_swap_cache(page, swap) == 0) {
+	if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
+		remove_from_page_cache(page);
 		shmem_swp_set(info, entry, swap.val);
 		shmem_swp_unmap(entry);
+		if (list_empty(&info->swaplist))
+			inode = igrab(inode);
+		else
+			inode = NULL;
 		spin_unlock(&info->lock);
-		if (list_empty(&info->swaplist)) {
-			spin_lock(&shmem_swaplist_lock);
+		swap_duplicate(swap);
+		BUG_ON(page_mapped(page));
+		page_cache_release(page);	/* pagecache ref */
+		set_page_dirty(page);
+		unlock_page(page);
+		if (inode) {
+			mutex_lock(&shmem_swaplist_mutex);
 			/* move instead of add in case we're racing */
 			list_move_tail(&info->swaplist, &shmem_swaplist);
-			spin_unlock(&shmem_swaplist_lock);
+			mutex_unlock(&shmem_swaplist_mutex);
+			iput(inode);
 		}
-		unlock_page(page);
 		return 0;
 	}
 
@@ -972,7 +1055,10 @@
 	swap_free(swap);
 redirty:
 	set_page_dirty(page);
-	return AOP_WRITEPAGE_ACTIVATE;	/* Return with the page locked */
+	if (wbc->for_reclaim)
+		return AOP_WRITEPAGE_ACTIVATE;	/* Return with page locked */
+	unlock_page(page);
+	return 0;
 }
 
 #ifdef CONFIG_NUMA
@@ -1025,53 +1111,33 @@
 	return err;
 }
 
-static struct page *shmem_swapin_async(struct shared_policy *p,
-				       swp_entry_t entry, unsigned long idx)
+static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
+			struct shmem_inode_info *info, unsigned long idx)
 {
-	struct page *page;
 	struct vm_area_struct pvma;
+	struct page *page;
 
 	/* Create a pseudo vma that just contains the policy */
-	memset(&pvma, 0, sizeof(struct vm_area_struct));
-	pvma.vm_end = PAGE_SIZE;
+	pvma.vm_start = 0;
 	pvma.vm_pgoff = idx;
-	pvma.vm_policy = mpol_shared_policy_lookup(p, idx);
-	page = read_swap_cache_async(entry, &pvma, 0);
+	pvma.vm_ops = NULL;
+	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
+	page = swapin_readahead(entry, gfp, &pvma, 0);
 	mpol_free(pvma.vm_policy);
 	return page;
 }
 
-static struct page *shmem_swapin(struct shmem_inode_info *info,
-				 swp_entry_t entry, unsigned long idx)
-{
-	struct shared_policy *p = &info->policy;
-	int i, num;
-	struct page *page;
-	unsigned long offset;
-
-	num = valid_swaphandles(entry, &offset);
-	for (i = 0; i < num; offset++, i++) {
-		page = shmem_swapin_async(p,
-				swp_entry(swp_type(entry), offset), idx);
-		if (!page)
-			break;
-		page_cache_release(page);
-	}
-	lru_add_drain();	/* Push any new pages onto the LRU now */
-	return shmem_swapin_async(p, entry, idx);
-}
-
-static struct page *
-shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
-		 unsigned long idx)
+static struct page *shmem_alloc_page(gfp_t gfp,
+			struct shmem_inode_info *info, unsigned long idx)
 {
 	struct vm_area_struct pvma;
 	struct page *page;
 
-	memset(&pvma, 0, sizeof(struct vm_area_struct));
-	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
+	/* Create a pseudo vma that just contains the policy */
+	pvma.vm_start = 0;
 	pvma.vm_pgoff = idx;
-	pvma.vm_end = PAGE_SIZE;
+	pvma.vm_ops = NULL;
+	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
 	page = alloc_page_vma(gfp, &pvma, 0);
 	mpol_free(pvma.vm_policy);
 	return page;
@@ -1083,15 +1149,14 @@
 	return 1;
 }
 
-static inline struct page *
-shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
+static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
+			struct shmem_inode_info *info, unsigned long idx)
 {
-	swapin_readahead(entry, 0, NULL);
-	return read_swap_cache_async(entry, NULL, 0);
+	return swapin_readahead(entry, gfp, NULL, 0);
 }
 
-static inline struct page *
-shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx)
+static inline struct page *shmem_alloc_page(gfp_t gfp,
+			struct shmem_inode_info *info, unsigned long idx)
 {
 	return alloc_page(gfp);
 }
@@ -1114,6 +1179,7 @@
 	struct page *swappage;
 	swp_entry_t *entry;
 	swp_entry_t swap;
+	gfp_t gfp;
 	int error;
 
 	if (idx >= SHMEM_MAX_INDEX)
@@ -1126,7 +1192,7 @@
 	 * Normally, filepage is NULL on entry, and either found
 	 * uptodate immediately, or allocated and zeroed, or read
 	 * in under swappage, which is then assigned to filepage.
-	 * But shmem_readpage and shmem_write_begin pass in a locked
+	 * But shmem_readpage (required for splice) passes in a locked
 	 * filepage, which may be found not uptodate by other callers
 	 * too, and may need to be copied from the swappage read in.
 	 */
@@ -1136,8 +1202,17 @@
 	if (filepage && PageUptodate(filepage))
 		goto done;
 	error = 0;
-	if (sgp == SGP_QUICK)
-		goto failed;
+	gfp = mapping_gfp_mask(mapping);
+	if (!filepage) {
+		/*
+		 * Try to preload while we can wait, to not make a habit of
+		 * draining atomic reserves; but don't latch on to this cpu.
+		 */
+		error = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
+		if (error)
+			goto failed;
+		radix_tree_preload_end();
+	}
 
 	spin_lock(&info->lock);
 	shmem_recalc_inode(inode);
@@ -1160,7 +1235,7 @@
 				*type |= VM_FAULT_MAJOR;
 			}
 			spin_unlock(&info->lock);
-			swappage = shmem_swapin(info, swap, idx);
+			swappage = shmem_swapin(swap, gfp, info, idx);
 			if (!swappage) {
 				spin_lock(&info->lock);
 				entry = shmem_swp_alloc(info, idx, sgp);
@@ -1218,23 +1293,21 @@
 			SetPageUptodate(filepage);
 			set_page_dirty(filepage);
 			swap_free(swap);
-		} else if (!(error = move_from_swap_cache(
-				swappage, idx, mapping))) {
+		} else if (!(error = add_to_page_cache(
+				swappage, mapping, idx, GFP_NOWAIT))) {
 			info->flags |= SHMEM_PAGEIN;
 			shmem_swp_set(info, entry, 0);
 			shmem_swp_unmap(entry);
+			delete_from_swap_cache(swappage);
 			spin_unlock(&info->lock);
 			filepage = swappage;
+			set_page_dirty(filepage);
 			swap_free(swap);
 		} else {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			unlock_page(swappage);
 			page_cache_release(swappage);
-			if (error == -ENOMEM) {
-				/* let kswapd refresh zone for GFP_ATOMICs */
-				congestion_wait(WRITE, HZ/50);
-			}
 			goto repeat;
 		}
 	} else if (sgp == SGP_READ && !filepage) {
@@ -1272,9 +1345,7 @@
 
 		if (!filepage) {
 			spin_unlock(&info->lock);
-			filepage = shmem_alloc_page(mapping_gfp_mask(mapping),
-						    info,
-						    idx);
+			filepage = shmem_alloc_page(gfp, info, idx);
 			if (!filepage) {
 				shmem_unacct_blocks(info->flags, 1);
 				shmem_free_blocks(inode, 1);
@@ -1291,7 +1362,7 @@
 				shmem_swp_unmap(entry);
 			}
 			if (error || swap.val || 0 != add_to_page_cache_lru(
-					filepage, mapping, idx, GFP_ATOMIC)) {
+					filepage, mapping, idx, GFP_NOWAIT)) {
 				spin_unlock(&info->lock);
 				page_cache_release(filepage);
 				shmem_unacct_blocks(info->flags, 1);
@@ -1309,14 +1380,11 @@
 		clear_highpage(filepage);
 		flush_dcache_page(filepage);
 		SetPageUptodate(filepage);
+		if (sgp == SGP_DIRTY)
+			set_page_dirty(filepage);
 	}
 done:
-	if (*pagep != filepage) {
-		*pagep = filepage;
-		if (sgp != SGP_FAULT)
-			unlock_page(filepage);
-
-	}
+	*pagep = filepage;
 	return 0;
 
 failed:
@@ -1336,7 +1404,7 @@
 	if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
 		return VM_FAULT_SIGBUS;
 
-	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_FAULT, &ret);
+	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
 	if (error)
 		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
 
@@ -1399,15 +1467,8 @@
 	struct shmem_inode_info *info;
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
-	if (sbinfo->max_inodes) {
-		spin_lock(&sbinfo->stat_lock);
-		if (!sbinfo->free_inodes) {
-			spin_unlock(&sbinfo->stat_lock);
-			return NULL;
-		}
-		sbinfo->free_inodes--;
-		spin_unlock(&sbinfo->stat_lock);
-	}
+	if (shmem_reserve_inode(sb))
+		return NULL;
 
 	inode = new_inode(sb);
 	if (inode) {
@@ -1451,11 +1512,8 @@
 						NULL);
 			break;
 		}
-	} else if (sbinfo->max_inodes) {
-		spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_inodes++;
-		spin_unlock(&sbinfo->stat_lock);
-	}
+	} else
+		shmem_free_inode(sb);
 	return inode;
 }
 
@@ -1494,123 +1552,30 @@
 {
 	struct inode *inode = mapping->host;
 
+	if (pos + copied > inode->i_size)
+		i_size_write(inode, pos + copied);
+
+	unlock_page(page);
 	set_page_dirty(page);
 	page_cache_release(page);
 
-	if (pos+copied > inode->i_size)
-		i_size_write(inode, pos+copied);
-
 	return copied;
 }
 
-static ssize_t
-shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
-{
-	struct inode	*inode = file->f_path.dentry->d_inode;
-	loff_t		pos;
-	unsigned long	written;
-	ssize_t		err;
-
-	if ((ssize_t) count < 0)
-		return -EINVAL;
-
-	if (!access_ok(VERIFY_READ, buf, count))
-		return -EFAULT;
-
-	mutex_lock(&inode->i_mutex);
-
-	pos = *ppos;
-	written = 0;
-
-	err = generic_write_checks(file, &pos, &count, 0);
-	if (err || !count)
-		goto out;
-
-	err = remove_suid(file->f_path.dentry);
-	if (err)
-		goto out;
-
-	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-
-	do {
-		struct page *page = NULL;
-		unsigned long bytes, index, offset;
-		char *kaddr;
-		int left;
-
-		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
-		index = pos >> PAGE_CACHE_SHIFT;
-		bytes = PAGE_CACHE_SIZE - offset;
-		if (bytes > count)
-			bytes = count;
-
-		/*
-		 * We don't hold page lock across copy from user -
-		 * what would it guard against? - so no deadlock here.
-		 * But it still may be a good idea to prefault below.
-		 */
-
-		err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
-		if (err)
-			break;
-
-		left = bytes;
-		if (PageHighMem(page)) {
-			volatile unsigned char dummy;
-			__get_user(dummy, buf);
-			__get_user(dummy, buf + bytes - 1);
-
-			kaddr = kmap_atomic(page, KM_USER0);
-			left = __copy_from_user_inatomic(kaddr + offset,
-							buf, bytes);
-			kunmap_atomic(kaddr, KM_USER0);
-		}
-		if (left) {
-			kaddr = kmap(page);
-			left = __copy_from_user(kaddr + offset, buf, bytes);
-			kunmap(page);
-		}
-
-		written += bytes;
-		count -= bytes;
-		pos += bytes;
-		buf += bytes;
-		if (pos > inode->i_size)
-			i_size_write(inode, pos);
-
-		flush_dcache_page(page);
-		set_page_dirty(page);
-		mark_page_accessed(page);
-		page_cache_release(page);
-
-		if (left) {
-			pos -= left;
-			written -= left;
-			err = -EFAULT;
-			break;
-		}
-
-		/*
-		 * Our dirty pages are not counted in nr_dirty,
-		 * and we do not attempt to balance dirty pages.
-		 */
-
-		cond_resched();
-	} while (count);
-
-	*ppos = pos;
-	if (written)
-		err = written;
-out:
-	mutex_unlock(&inode->i_mutex);
-	return err;
-}
-
 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
 {
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
 	unsigned long index, offset;
+	enum sgp_type sgp = SGP_READ;
+
+	/*
+	 * Might this read be for a stacking filesystem?  Then when reading
+	 * holes of a sparse file, we actually need to allocate those pages,
+	 * and even mark them dirty, so it cannot exceed the max_blocks limit.
+	 */
+	if (segment_eq(get_fs(), KERNEL_DS))
+		sgp = SGP_DIRTY;
 
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	offset = *ppos & ~PAGE_CACHE_MASK;
@@ -1629,12 +1594,14 @@
 				break;
 		}
 
-		desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL);
+		desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
 		if (desc->error) {
 			if (desc->error == -EINVAL)
 				desc->error = 0;
 			break;
 		}
+		if (page)
+			unlock_page(page);
 
 		/*
 		 * We must evaluate after, since reads (unlike writes)
@@ -1798,22 +1765,16 @@
 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = old_dentry->d_inode;
-	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	int ret;
 
 	/*
 	 * No ordinary (disk based) filesystem counts links as inodes;
 	 * but each new link needs a new dentry, pinning lowmem, and
 	 * tmpfs dentries cannot be pruned until they are unlinked.
 	 */
-	if (sbinfo->max_inodes) {
-		spin_lock(&sbinfo->stat_lock);
-		if (!sbinfo->free_inodes) {
-			spin_unlock(&sbinfo->stat_lock);
-			return -ENOSPC;
-		}
-		sbinfo->free_inodes--;
-		spin_unlock(&sbinfo->stat_lock);
-	}
+	ret = shmem_reserve_inode(inode->i_sb);
+	if (ret)
+		goto out;
 
 	dir->i_size += BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
@@ -1821,21 +1782,16 @@
 	atomic_inc(&inode->i_count);	/* New dentry reference */
 	dget(dentry);		/* Extra pinning count for the created dentry */
 	d_instantiate(dentry, inode);
-	return 0;
+out:
+	return ret;
 }
 
 static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
 
-	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) {
-		struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-		if (sbinfo->max_inodes) {
-			spin_lock(&sbinfo->stat_lock);
-			sbinfo->free_inodes++;
-			spin_unlock(&sbinfo->stat_lock);
-		}
-	}
+	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
+		shmem_free_inode(inode->i_sb);
 
 	dir->i_size -= BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
@@ -1924,6 +1880,7 @@
 			iput(inode);
 			return error;
 		}
+		unlock_page(page);
 		inode->i_op = &shmem_symlink_inode_operations;
 		kaddr = kmap_atomic(page, KM_USER0);
 		memcpy(kaddr, symname, len);
@@ -1951,6 +1908,8 @@
 	struct page *page = NULL;
 	int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
 	nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
+	if (page)
+		unlock_page(page);
 	return page;
 }
 
@@ -1996,8 +1955,7 @@
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return security_inode_getsecurity(inode, name, buffer, size,
-					  -EOPNOTSUPP);
+	return xattr_getsecurity(inode, name, buffer, size);
 }
 
 static int shmem_xattr_security_set(struct inode *inode, const char *name,
@@ -2138,7 +2096,7 @@
 			}
 			if (*rest)
 				goto bad_val;
-			*blocks = size >> PAGE_CACHE_SHIFT;
+			*blocks = DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
 		} else if (!strcmp(this_char,"nr_blocks")) {
 			*blocks = memparse(value,&rest);
 			if (*rest)
@@ -2375,7 +2333,8 @@
 #ifdef CONFIG_TMPFS
 	.llseek		= generic_file_llseek,
 	.read		= shmem_file_read,
-	.write		= shmem_file_write,
+	.write		= do_sync_write,
+	.aio_write	= generic_file_aio_write,
 	.fsync		= simple_sync_file,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
diff --git a/mm/slob.c b/mm/slob.c
index 773a7aa..e2c3c0e 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -12,10 +12,17 @@
  * allocator is as little as 2 bytes, however typically most architectures
  * will require 4 bytes on 32-bit and 8 bytes on 64-bit.
  *
- * The slob heap is a linked list of pages from alloc_pages(), and
- * within each page, there is a singly-linked list of free blocks (slob_t).
- * The heap is grown on demand and allocation from the heap is currently
- * first-fit.
+ * The slob heap is a set of linked list of pages from alloc_pages(),
+ * and within each page, there is a singly-linked list of free blocks
+ * (slob_t). The heap is grown on demand. To reduce fragmentation,
+ * heap pages are segregated into three lists, with objects less than
+ * 256 bytes, objects less than 1024 bytes, and all other objects.
+ *
+ * Allocation from heap involves first searching for a page with
+ * sufficient free blocks (using a next-fit-like approach) followed by
+ * a first-fit scan of the page. Deallocation inserts objects back
+ * into the free list in address order, so this is effectively an
+ * address-ordered first fit.
  *
  * Above this is an implementation of kmalloc/kfree. Blocks returned
  * from kmalloc are prepended with a 4-byte header with the kmalloc size.
@@ -110,9 +117,13 @@
 }
 
 /*
- * All (partially) free slob pages go on this list.
+ * All partially free slob pages go on these lists.
  */
-static LIST_HEAD(free_slob_pages);
+#define SLOB_BREAK1 256
+#define SLOB_BREAK2 1024
+static LIST_HEAD(free_slob_small);
+static LIST_HEAD(free_slob_medium);
+static LIST_HEAD(free_slob_large);
 
 /*
  * slob_page: True for all slob pages (false for bigblock pages)
@@ -140,9 +151,9 @@
 	return test_bit(PG_private, &sp->flags);
 }
 
-static inline void set_slob_page_free(struct slob_page *sp)
+static void set_slob_page_free(struct slob_page *sp, struct list_head *list)
 {
-	list_add(&sp->list, &free_slob_pages);
+	list_add(&sp->list, list);
 	__set_bit(PG_private, &sp->flags);
 }
 
@@ -294,12 +305,20 @@
 {
 	struct slob_page *sp;
 	struct list_head *prev;
+	struct list_head *slob_list;
 	slob_t *b = NULL;
 	unsigned long flags;
 
+	if (size < SLOB_BREAK1)
+		slob_list = &free_slob_small;
+	else if (size < SLOB_BREAK2)
+		slob_list = &free_slob_medium;
+	else
+		slob_list = &free_slob_large;
+
 	spin_lock_irqsave(&slob_lock, flags);
 	/* Iterate through each partially free page, try to find room */
-	list_for_each_entry(sp, &free_slob_pages, list) {
+	list_for_each_entry(sp, slob_list, list) {
 #ifdef CONFIG_NUMA
 		/*
 		 * If there's a node specification, search for a partial
@@ -321,9 +340,9 @@
 		/* Improve fragment distribution and reduce our average
 		 * search time by starting our next search here. (see
 		 * Knuth vol 1, sec 2.5, pg 449) */
-		if (prev != free_slob_pages.prev &&
-				free_slob_pages.next != prev->next)
-			list_move_tail(&free_slob_pages, prev->next);
+		if (prev != slob_list->prev &&
+				slob_list->next != prev->next)
+			list_move_tail(slob_list, prev->next);
 		break;
 	}
 	spin_unlock_irqrestore(&slob_lock, flags);
@@ -341,7 +360,7 @@
 		sp->free = b;
 		INIT_LIST_HEAD(&sp->list);
 		set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
-		set_slob_page_free(sp);
+		set_slob_page_free(sp, slob_list);
 		b = slob_page_alloc(sp, size, align);
 		BUG_ON(!b);
 		spin_unlock_irqrestore(&slob_lock, flags);
@@ -387,7 +406,7 @@
 		set_slob(b, units,
 			(void *)((unsigned long)(b +
 					SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
-		set_slob_page_free(sp);
+		set_slob_page_free(sp, &free_slob_small);
 		goto out;
 	}
 
@@ -398,6 +417,10 @@
 	sp->units += units;
 
 	if (b < sp->free) {
+		if (b + units == sp->free) {
+			units += slob_units(sp->free);
+			sp->free = slob_next(sp->free);
+		}
 		set_slob(b, units, sp->free);
 		sp->free = b;
 	} else {
diff --git a/mm/slub.c b/mm/slub.c
index 5cc4b7d..3f05667 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -247,7 +247,10 @@
 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
 							{ return 0; }
-static inline void sysfs_slab_remove(struct kmem_cache *s) {}
+static inline void sysfs_slab_remove(struct kmem_cache *s)
+{
+	kfree(s);
+}
 #endif
 
 /********************************************************************
@@ -354,22 +357,22 @@
 			printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
 			newline = 0;
 		}
-		printk(" %02x", addr[i]);
+		printk(KERN_CONT " %02x", addr[i]);
 		offset = i % 16;
 		ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
 		if (offset == 15) {
-			printk(" %s\n",ascii);
+			printk(KERN_CONT " %s\n", ascii);
 			newline = 1;
 		}
 	}
 	if (!newline) {
 		i %= 16;
 		while (i < 16) {
-			printk("   ");
+			printk(KERN_CONT "   ");
 			ascii[i] = ' ';
 			i++;
 		}
-		printk(" %s\n", ascii);
+		printk(KERN_CONT " %s\n", ascii);
 	}
 }
 
@@ -529,7 +532,7 @@
 
 	if (s->flags & __OBJECT_POISON) {
 		memset(p, POISON_FREE, s->objsize - 1);
-		p[s->objsize -1] = POISON_END;
+		p[s->objsize - 1] = POISON_END;
 	}
 
 	if (s->flags & SLAB_RED_ZONE)
@@ -558,7 +561,7 @@
 
 static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
 			u8 *object, char *what,
-			u8* start, unsigned int value, unsigned int bytes)
+			u8 *start, unsigned int value, unsigned int bytes)
 {
 	u8 *fault;
 	u8 *end;
@@ -692,7 +695,7 @@
 			(!check_bytes_and_report(s, page, p, "Poison", p,
 					POISON_FREE, s->objsize - 1) ||
 			 !check_bytes_and_report(s, page, p, "Poison",
-			 	p + s->objsize -1, POISON_END, 1)))
+				p + s->objsize - 1, POISON_END, 1)))
 			return 0;
 		/*
 		 * check_pad_bytes cleans up on its own.
@@ -900,8 +903,7 @@
 				"SLUB <none>: no slab for object 0x%p.\n",
 						object);
 			dump_stack();
-		}
-		else
+		} else
 			object_err(s, page, object,
 					"page slab pointer corrupt.");
 		goto fail;
@@ -947,7 +949,7 @@
 	/*
 	 * Determine which debug features should be switched on
 	 */
-	for ( ;*str && *str != ','; str++) {
+	for (; *str && *str != ','; str++) {
 		switch (tolower(*str)) {
 		case 'f':
 			slub_debug |= SLAB_DEBUG_FREE;
@@ -966,7 +968,7 @@
 			break;
 		default:
 			printk(KERN_ERR "slub_debug option '%c' "
-				"unknown. skipped\n",*str);
+				"unknown. skipped\n", *str);
 		}
 	}
 
@@ -1039,7 +1041,7 @@
  */
 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
-	struct page * page;
+	struct page *page;
 	int pages = 1 << s->order;
 
 	if (s->order)
@@ -1135,7 +1137,7 @@
 	mod_zone_page_state(page_zone(page),
 		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-		- pages);
+		-pages);
 
 	__free_pages(page, s->order);
 }
@@ -1195,19 +1197,15 @@
 /*
  * Management of partially allocated slabs
  */
-static void add_partial_tail(struct kmem_cache_node *n, struct page *page)
+static void add_partial(struct kmem_cache_node *n,
+				struct page *page, int tail)
 {
 	spin_lock(&n->list_lock);
 	n->nr_partial++;
-	list_add_tail(&page->lru, &n->partial);
-	spin_unlock(&n->list_lock);
-}
-
-static void add_partial(struct kmem_cache_node *n, struct page *page)
-{
-	spin_lock(&n->list_lock);
-	n->nr_partial++;
-	list_add(&page->lru, &n->partial);
+	if (tail)
+		list_add_tail(&page->lru, &n->partial);
+	else
+		list_add(&page->lru, &n->partial);
 	spin_unlock(&n->list_lock);
 }
 
@@ -1292,7 +1290,8 @@
 	 * expensive if we do it every time we are trying to find a slab
 	 * with available objects.
 	 */
-	if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio)
+	if (!s->remote_node_defrag_ratio ||
+			get_cycles() % 1024 > s->remote_node_defrag_ratio)
 		return NULL;
 
 	zonelist = &NODE_DATA(slab_node(current->mempolicy))
@@ -1335,7 +1334,7 @@
  *
  * On exit the slab lock will have been dropped.
  */
-static void unfreeze_slab(struct kmem_cache *s, struct page *page)
+static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 {
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
 
@@ -1343,7 +1342,7 @@
 	if (page->inuse) {
 
 		if (page->freelist)
-			add_partial(n, page);
+			add_partial(n, page, tail);
 		else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
 			add_full(n, page);
 		slab_unlock(page);
@@ -1358,7 +1357,7 @@
 			 * partial list stays small. kmem_cache_shrink can
 			 * reclaim empty slabs from the partial list.
 			 */
-			add_partial_tail(n, page);
+			add_partial(n, page, 1);
 			slab_unlock(page);
 		} else {
 			slab_unlock(page);
@@ -1373,6 +1372,7 @@
 static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 {
 	struct page *page = c->page;
+	int tail = 1;
 	/*
 	 * Merge cpu freelist into freelist. Typically we get here
 	 * because both freelists are empty. So this is unlikely
@@ -1381,6 +1381,8 @@
 	while (unlikely(c->freelist)) {
 		void **object;
 
+		tail = 0;	/* Hot objects. Put the slab first */
+
 		/* Retrieve object from cpu_freelist */
 		object = c->freelist;
 		c->freelist = c->freelist[c->offset];
@@ -1391,7 +1393,7 @@
 		page->inuse--;
 	}
 	c->page = NULL;
-	unfreeze_slab(s, page);
+	unfreeze_slab(s, page, tail);
 }
 
 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
@@ -1539,7 +1541,7 @@
  *
  * Otherwise we can simply pick the next object from the lockless free list.
  */
-static void __always_inline *slab_alloc(struct kmem_cache *s,
+static __always_inline void *slab_alloc(struct kmem_cache *s,
 		gfp_t gfpflags, int node, void *addr)
 {
 	void **object;
@@ -1613,7 +1615,7 @@
 	 * then add it.
 	 */
 	if (unlikely(!prior))
-		add_partial_tail(get_node(s, page_to_nid(page)), page);
+		add_partial(get_node(s, page_to_nid(page)), page, 1);
 
 out_unlock:
 	slab_unlock(page);
@@ -1647,7 +1649,7 @@
  * If fastpath is not possible then fall back to __slab_free where we deal
  * with all sorts of special processing.
  */
-static void __always_inline slab_free(struct kmem_cache *s,
+static __always_inline void slab_free(struct kmem_cache *s,
 			struct page *page, void *x, void *addr)
 {
 	void **object = (void *)x;
@@ -1997,6 +1999,7 @@
 {
 	struct page *page;
 	struct kmem_cache_node *n;
+	unsigned long flags;
 
 	BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
 
@@ -2021,7 +2024,14 @@
 #endif
 	init_kmem_cache_node(n);
 	atomic_long_inc(&n->nr_slabs);
-	add_partial(n, page);
+	/*
+	 * lockdep requires consistent irq usage for each lock
+	 * so even though there cannot be a race this early in
+	 * the boot sequence, we still disable irqs.
+	 */
+	local_irq_save(flags);
+	add_partial(n, page, 0);
+	local_irq_restore(flags);
 	return n;
 }
 
@@ -2206,7 +2216,7 @@
 
 	s->refcount = 1;
 #ifdef CONFIG_NUMA
-	s->defrag_ratio = 100;
+	s->remote_node_defrag_ratio = 100;
 #endif
 	if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
 		goto error;
@@ -2228,7 +2238,7 @@
  */
 int kmem_ptr_validate(struct kmem_cache *s, const void *object)
 {
-	struct page * page;
+	struct page *page;
 
 	page = get_object_page(object);
 
@@ -2322,7 +2332,6 @@
 		if (kmem_cache_close(s))
 			WARN_ON(1);
 		sysfs_slab_remove(s);
-		kfree(s);
 	} else
 		up_write(&slub_lock);
 }
@@ -2341,7 +2350,7 @@
 
 static int __init setup_slub_min_order(char *str)
 {
-	get_option (&str, &slub_min_order);
+	get_option(&str, &slub_min_order);
 
 	return 1;
 }
@@ -2350,7 +2359,7 @@
 
 static int __init setup_slub_max_order(char *str)
 {
-	get_option (&str, &slub_max_order);
+	get_option(&str, &slub_max_order);
 
 	return 1;
 }
@@ -2359,7 +2368,7 @@
 
 static int __init setup_slub_min_objects(char *str)
 {
-	get_option (&str, &slub_min_objects);
+	get_option(&str, &slub_min_objects);
 
 	return 1;
 }
@@ -2605,6 +2614,19 @@
 }
 EXPORT_SYMBOL(kfree);
 
+static unsigned long count_partial(struct kmem_cache_node *n)
+{
+	unsigned long flags;
+	unsigned long x = 0;
+	struct page *page;
+
+	spin_lock_irqsave(&n->list_lock, flags);
+	list_for_each_entry(page, &n->partial, lru)
+		x += page->inuse;
+	spin_unlock_irqrestore(&n->list_lock, flags);
+	return x;
+}
+
 /*
  * kmem_cache_shrink removes empty slabs from the partial lists and sorts
  * the remaining slabs by the number of items in use. The slabs with the
@@ -2931,7 +2953,7 @@
 		 * Check if alignment is compatible.
 		 * Courtesy of Adrian Drzewiecki
 		 */
-		if ((s->size & ~(align -1)) != s->size)
+		if ((s->size & ~(align - 1)) != s->size)
 			continue;
 
 		if (s->size - size >= sizeof(void *))
@@ -3040,8 +3062,9 @@
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata slab_notifier =
-	{ &slab_cpuup_callback, NULL, 0 };
+static struct notifier_block __cpuinitdata slab_notifier = {
+	&slab_cpuup_callback, NULL, 0
+};
 
 #endif
 
@@ -3076,19 +3099,6 @@
 	return slab_alloc(s, gfpflags, node, caller);
 }
 
-static unsigned long count_partial(struct kmem_cache_node *n)
-{
-	unsigned long flags;
-	unsigned long x = 0;
-	struct page *page;
-
-	spin_lock_irqsave(&n->list_lock, flags);
-	list_for_each_entry(page, &n->partial, lru)
-		x += page->inuse;
-	spin_unlock_irqrestore(&n->list_lock, flags);
-	return x;
-}
-
 #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
 static int validate_slab(struct kmem_cache *s, struct page *page,
 						unsigned long *map)
@@ -3390,7 +3400,7 @@
 static int list_locations(struct kmem_cache *s, char *buf,
 					enum track_item alloc)
 {
-	int n = 0;
+	int len = 0;
 	unsigned long i;
 	struct loc_track t = { 0, 0, NULL };
 	int node;
@@ -3421,54 +3431,54 @@
 	for (i = 0; i < t.count; i++) {
 		struct location *l = &t.loc[i];
 
-		if (n > PAGE_SIZE - 100)
+		if (len > PAGE_SIZE - 100)
 			break;
-		n += sprintf(buf + n, "%7ld ", l->count);
+		len += sprintf(buf + len, "%7ld ", l->count);
 
 		if (l->addr)
-			n += sprint_symbol(buf + n, (unsigned long)l->addr);
+			len += sprint_symbol(buf + len, (unsigned long)l->addr);
 		else
-			n += sprintf(buf + n, "<not-available>");
+			len += sprintf(buf + len, "<not-available>");
 
 		if (l->sum_time != l->min_time) {
 			unsigned long remainder;
 
-			n += sprintf(buf + n, " age=%ld/%ld/%ld",
+			len += sprintf(buf + len, " age=%ld/%ld/%ld",
 			l->min_time,
 			div_long_long_rem(l->sum_time, l->count, &remainder),
 			l->max_time);
 		} else
-			n += sprintf(buf + n, " age=%ld",
+			len += sprintf(buf + len, " age=%ld",
 				l->min_time);
 
 		if (l->min_pid != l->max_pid)
-			n += sprintf(buf + n, " pid=%ld-%ld",
+			len += sprintf(buf + len, " pid=%ld-%ld",
 				l->min_pid, l->max_pid);
 		else
-			n += sprintf(buf + n, " pid=%ld",
+			len += sprintf(buf + len, " pid=%ld",
 				l->min_pid);
 
 		if (num_online_cpus() > 1 && !cpus_empty(l->cpus) &&
-				n < PAGE_SIZE - 60) {
-			n += sprintf(buf + n, " cpus=");
-			n += cpulist_scnprintf(buf + n, PAGE_SIZE - n - 50,
+				len < PAGE_SIZE - 60) {
+			len += sprintf(buf + len, " cpus=");
+			len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
 					l->cpus);
 		}
 
 		if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
-				n < PAGE_SIZE - 60) {
-			n += sprintf(buf + n, " nodes=");
-			n += nodelist_scnprintf(buf + n, PAGE_SIZE - n - 50,
+				len < PAGE_SIZE - 60) {
+			len += sprintf(buf + len, " nodes=");
+			len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
 					l->nodes);
 		}
 
-		n += sprintf(buf + n, "\n");
+		len += sprintf(buf + len, "\n");
 	}
 
 	free_loc_track(&t);
 	if (!t.count)
-		n += sprintf(buf, "No data\n");
-	return n;
+		len += sprintf(buf, "No data\n");
+	return len;
 }
 
 enum slab_stat_type {
@@ -3498,7 +3508,6 @@
 
 	for_each_possible_cpu(cpu) {
 		struct page *page;
-		int node;
 		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
 
 		if (!c)
@@ -3510,8 +3519,6 @@
 			continue;
 		if (page) {
 			if (flags & SO_CPU) {
-				int x = 0;
-
 				if (flags & SO_OBJECTS)
 					x = page->inuse;
 				else
@@ -3848,24 +3855,24 @@
 SLAB_ATTR_RO(free_calls);
 
 #ifdef CONFIG_NUMA
-static ssize_t defrag_ratio_show(struct kmem_cache *s, char *buf)
+static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
 {
-	return sprintf(buf, "%d\n", s->defrag_ratio / 10);
+	return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
 }
 
-static ssize_t defrag_ratio_store(struct kmem_cache *s,
+static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
 				const char *buf, size_t length)
 {
 	int n = simple_strtoul(buf, NULL, 10);
 
 	if (n < 100)
-		s->defrag_ratio = n * 10;
+		s->remote_node_defrag_ratio = n * 10;
 	return length;
 }
-SLAB_ATTR(defrag_ratio);
+SLAB_ATTR(remote_node_defrag_ratio);
 #endif
 
-static struct attribute * slab_attrs[] = {
+static struct attribute *slab_attrs[] = {
 	&slab_size_attr.attr,
 	&object_size_attr.attr,
 	&objs_per_slab_attr.attr,
@@ -3893,7 +3900,7 @@
 	&cache_dma_attr.attr,
 #endif
 #ifdef CONFIG_NUMA
-	&defrag_ratio_attr.attr,
+	&remote_node_defrag_ratio_attr.attr,
 #endif
 	NULL
 };
@@ -3940,6 +3947,13 @@
 	return err;
 }
 
+static void kmem_cache_release(struct kobject *kobj)
+{
+	struct kmem_cache *s = to_slab(kobj);
+
+	kfree(s);
+}
+
 static struct sysfs_ops slab_sysfs_ops = {
 	.show = slab_attr_show,
 	.store = slab_attr_store,
@@ -3947,6 +3961,7 @@
 
 static struct kobj_type slab_ktype = {
 	.sysfs_ops = &slab_sysfs_ops,
+	.release = kmem_cache_release
 };
 
 static int uevent_filter(struct kset *kset, struct kobject *kobj)
@@ -4048,6 +4063,7 @@
 {
 	kobject_uevent(&s->kobj, KOBJ_REMOVE);
 	kobject_del(&s->kobj);
+	kobject_put(&s->kobj);
 }
 
 /*
diff --git a/mm/sparse.c b/mm/sparse.c
index a2183cb..f6a43c0 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -237,7 +237,7 @@
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static unsigned long *sparse_early_usemap_alloc(unsigned long pnum)
+static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
 {
 	unsigned long *usemap;
 	struct mem_section *ms = __nr_to_section(pnum);
@@ -353,17 +353,9 @@
 	return __kmalloc_section_memmap(nr_pages);
 }
 
-static int vaddr_in_vmalloc_area(void *addr)
-{
-	if (addr >= (void *)VMALLOC_START &&
-	    addr < (void *)VMALLOC_END)
-		return 1;
-	return 0;
-}
-
 static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
 {
-	if (vaddr_in_vmalloc_area(memmap))
+	if (is_vmalloc_addr(memmap))
 		vfree(memmap);
 	else
 		free_pages((unsigned long)memmap,
diff --git a/mm/swap.c b/mm/swap.c
index 9ac8832..57b7e25 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -41,7 +41,7 @@
  * This path almost never happens for VM activity - pages are normally
  * freed via pagevecs.  But it gets used by networking.
  */
-static void fastcall __page_cache_release(struct page *page)
+static void __page_cache_release(struct page *page)
 {
 	if (PageLRU(page)) {
 		unsigned long flags;
@@ -165,7 +165,7 @@
 /*
  * FIXME: speed this up?
  */
-void fastcall activate_page(struct page *page)
+void activate_page(struct page *page)
 {
 	struct zone *zone = page_zone(page);
 
@@ -186,7 +186,7 @@
  * inactive,referenced		->	active,unreferenced
  * active,unreferenced		->	active,referenced
  */
-void fastcall mark_page_accessed(struct page *page)
+void mark_page_accessed(struct page *page)
 {
 	if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
 		activate_page(page);
@@ -202,7 +202,7 @@
  * lru_cache_add: add a page to the page lists
  * @page: the page to add
  */
-void fastcall lru_cache_add(struct page *page)
+void lru_cache_add(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
 
@@ -212,7 +212,7 @@
 	put_cpu_var(lru_add_pvecs);
 }
 
-void fastcall lru_cache_add_active(struct page *page)
+void lru_cache_add_active(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b526356..ec42f01 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -10,6 +10,7 @@
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
+#include <linux/swapops.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
@@ -51,26 +52,22 @@
 	unsigned long del_total;
 	unsigned long find_success;
 	unsigned long find_total;
-	unsigned long noent_race;
-	unsigned long exist_race;
 } swap_cache_info;
 
 void show_swap_cache_info(void)
 {
-	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
+	printk("Swap cache: add %lu, delete %lu, find %lu/%lu\n",
 		swap_cache_info.add_total, swap_cache_info.del_total,
-		swap_cache_info.find_success, swap_cache_info.find_total,
-		swap_cache_info.noent_race, swap_cache_info.exist_race);
+		swap_cache_info.find_success, swap_cache_info.find_total);
 	printk("Free swap  = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));
 	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
 }
 
 /*
- * __add_to_swap_cache resembles add_to_page_cache on swapper_space,
+ * add_to_swap_cache resembles add_to_page_cache on swapper_space,
  * but sets SwapCache flag and private instead of mapping and index.
  */
-static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
-			       gfp_t gfp_mask)
+int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 {
 	int error;
 
@@ -88,6 +85,7 @@
 			set_page_private(page, entry.val);
 			total_swapcache_pages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
+			INC_CACHE_INFO(add_total);
 		}
 		write_unlock_irq(&swapper_space.tree_lock);
 		radix_tree_preload_end();
@@ -95,31 +93,6 @@
 	return error;
 }
 
-static int add_to_swap_cache(struct page *page, swp_entry_t entry)
-{
-	int error;
-
-	BUG_ON(PageLocked(page));
-	if (!swap_duplicate(entry)) {
-		INC_CACHE_INFO(noent_race);
-		return -ENOENT;
-	}
-	SetPageLocked(page);
-	error = __add_to_swap_cache(page, entry, GFP_KERNEL);
-	/*
-	 * Anon pages are already on the LRU, we don't run lru_cache_add here.
-	 */
-	if (error) {
-		ClearPageLocked(page);
-		swap_free(entry);
-		if (error == -EEXIST)
-			INC_CACHE_INFO(exist_race);
-		return error;
-	}
-	INC_CACHE_INFO(add_total);
-	return 0;
-}
-
 /*
  * This must be called only on pages that have
  * been verified to be in the swap cache.
@@ -152,6 +125,7 @@
 	int err;
 
 	BUG_ON(!PageLocked(page));
+	BUG_ON(!PageUptodate(page));
 
 	for (;;) {
 		entry = get_swap_page();
@@ -169,18 +143,15 @@
 		/*
 		 * Add it to the swap cache and mark it dirty
 		 */
-		err = __add_to_swap_cache(page, entry,
+		err = add_to_swap_cache(page, entry,
 				gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN);
 
 		switch (err) {
 		case 0:				/* Success */
-			SetPageUptodate(page);
 			SetPageDirty(page);
-			INC_CACHE_INFO(add_total);
 			return 1;
 		case -EEXIST:
 			/* Raced with "speculative" read_swap_cache_async */
-			INC_CACHE_INFO(exist_race);
 			swap_free(entry);
 			continue;
 		default:
@@ -211,40 +182,6 @@
 	page_cache_release(page);
 }
 
-/*
- * Strange swizzling function only for use by shmem_writepage
- */
-int move_to_swap_cache(struct page *page, swp_entry_t entry)
-{
-	int err = __add_to_swap_cache(page, entry, GFP_ATOMIC);
-	if (!err) {
-		remove_from_page_cache(page);
-		page_cache_release(page);	/* pagecache ref */
-		if (!swap_duplicate(entry))
-			BUG();
-		SetPageDirty(page);
-		INC_CACHE_INFO(add_total);
-	} else if (err == -EEXIST)
-		INC_CACHE_INFO(exist_race);
-	return err;
-}
-
-/*
- * Strange swizzling function for shmem_getpage (and shmem_unuse)
- */
-int move_from_swap_cache(struct page *page, unsigned long index,
-		struct address_space *mapping)
-{
-	int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC);
-	if (!err) {
-		delete_from_swap_cache(page);
-		/* shift page from clean_pages to dirty_pages list */
-		ClearPageDirty(page);
-		set_page_dirty(page);
-	}
-	return err;
-}
-
 /* 
  * If we are the only user, then try to free up the swap cache. 
  * 
@@ -317,7 +254,7 @@
  * A failure return means that either the page allocation failed or that
  * the swap entry is no longer in use.
  */
-struct page *read_swap_cache_async(swp_entry_t entry,
+struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			struct vm_area_struct *vma, unsigned long addr)
 {
 	struct page *found_page, *new_page = NULL;
@@ -337,23 +274,27 @@
 		 * Get a new page to read into from swap.
 		 */
 		if (!new_page) {
-			new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
-								vma, addr);
+			new_page = alloc_page_vma(gfp_mask, vma, addr);
 			if (!new_page)
 				break;		/* Out of memory */
 		}
 
 		/*
+		 * Swap entry may have been freed since our caller observed it.
+		 */
+		if (!swap_duplicate(entry))
+			break;
+
+		/*
 		 * Associate the page with swap entry in the swap cache.
-		 * May fail (-ENOENT) if swap entry has been freed since
-		 * our caller observed it.  May fail (-EEXIST) if there
-		 * is already a page associated with this entry in the
-		 * swap cache: added by a racing read_swap_cache_async,
-		 * or by try_to_swap_out (or shmem_writepage) re-using
-		 * the just freed swap entry for an existing page.
+		 * May fail (-EEXIST) if there is already a page associated
+		 * with this entry in the swap cache: added by a racing
+		 * read_swap_cache_async, or add_to_swap or shmem_writepage
+		 * re-using the just freed swap entry for an existing page.
 		 * May fail (-ENOMEM) if radix-tree node allocation failed.
 		 */
-		err = add_to_swap_cache(new_page, entry);
+		SetPageLocked(new_page);
+		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
 		if (!err) {
 			/*
 			 * Initiate read into locked page and return.
@@ -362,9 +303,57 @@
 			swap_readpage(NULL, new_page);
 			return new_page;
 		}
-	} while (err != -ENOENT && err != -ENOMEM);
+		ClearPageLocked(new_page);
+		swap_free(entry);
+	} while (err != -ENOMEM);
 
 	if (new_page)
 		page_cache_release(new_page);
 	return found_page;
 }
+
+/**
+ * swapin_readahead - swap in pages in hope we need them soon
+ * @entry: swap entry of this memory
+ * @vma: user vma this address belongs to
+ * @addr: target address for mempolicy
+ *
+ * Returns the struct page for entry and addr, after queueing swapin.
+ *
+ * Primitive swap readahead code. We simply read an aligned block of
+ * (1 << page_cluster) entries in the swap area. This method is chosen
+ * because it doesn't cost us any seek time.  We also make sure to queue
+ * the 'original' request together with the readahead ones...
+ *
+ * This has been extended to use the NUMA policies from the mm triggering
+ * the readahead.
+ *
+ * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
+ */
+struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
+			struct vm_area_struct *vma, unsigned long addr)
+{
+	int nr_pages;
+	struct page *page;
+	unsigned long offset;
+	unsigned long end_offset;
+
+	/*
+	 * Get starting offset for readaround, and number of pages to read.
+	 * Adjust starting address by readbehind (for NUMA interleave case)?
+	 * No, it's very unlikely that swap layout would follow vma layout,
+	 * more likely that neighbouring swap pages came from the same node:
+	 * so use the same "addr" to choose the same node for each swap read.
+	 */
+	nr_pages = valid_swaphandles(entry, &offset);
+	for (end_offset = offset + nr_pages; offset < end_offset; offset++) {
+		/* Ok, do the async read-ahead now */
+		page = read_swap_cache_async(swp_entry(swp_type(entry), offset),
+						gfp_mask, vma, addr);
+		if (!page)
+			break;
+		page_cache_release(page);
+	}
+	lru_add_drain();	/* Push any new pages onto the LRU now */
+	return read_swap_cache_async(entry, gfp_mask, vma, addr);
+}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f071648..eade24d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -506,9 +506,19 @@
  * just let do_wp_page work it out if a write is requested later - to
  * force COW, vm_page_prot omits write permission from any private vma.
  */
-static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
+static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, swp_entry_t entry, struct page *page)
 {
+	spinlock_t *ptl;
+	pte_t *pte;
+	int found = 1;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
+		found = 0;
+		goto out;
+	}
+
 	inc_mm_counter(vma->vm_mm, anon_rss);
 	get_page(page);
 	set_pte_at(vma->vm_mm, addr, pte,
@@ -520,6 +530,9 @@
 	 * immediately swapped out again after swapon.
 	 */
 	activate_page(page);
+out:
+	pte_unmap_unlock(pte, ptl);
+	return found;
 }
 
 static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -528,22 +541,33 @@
 {
 	pte_t swp_pte = swp_entry_to_pte(entry);
 	pte_t *pte;
-	spinlock_t *ptl;
 	int found = 0;
 
-	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	/*
+	 * We don't actually need pte lock while scanning for swp_pte: since
+	 * we hold page lock and mmap_sem, swp_pte cannot be inserted into the
+	 * page table while we're scanning; though it could get zapped, and on
+	 * some architectures (e.g. x86_32 with PAE) we might catch a glimpse
+	 * of unmatched parts which look like swp_pte, so unuse_pte must
+	 * recheck under pte lock.  Scanning without pte lock lets it be
+	 * preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.
+	 */
+	pte = pte_offset_map(pmd, addr);
 	do {
 		/*
 		 * swapoff spends a _lot_ of time in this loop!
 		 * Test inline before going to call unuse_pte.
 		 */
 		if (unlikely(pte_same(*pte, swp_pte))) {
-			unuse_pte(vma, pte++, addr, entry, page);
-			found = 1;
-			break;
+			pte_unmap(pte);
+			found = unuse_pte(vma, pmd, addr, entry, page);
+			if (found)
+				goto out;
+			pte = pte_offset_map(pmd, addr);
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
-	pte_unmap_unlock(pte - 1, ptl);
+	pte_unmap(pte - 1);
+out:
 	return found;
 }
 
@@ -730,7 +754,8 @@
 		 */
 		swap_map = &si->swap_map[i];
 		entry = swp_entry(type, i);
-		page = read_swap_cache_async(entry, NULL, 0);
+		page = read_swap_cache_async(entry,
+					GFP_HIGHUSER_MOVABLE, NULL, 0);
 		if (!page) {
 			/*
 			 * Either swap_duplicate() failed because entry
@@ -789,7 +814,7 @@
 			atomic_inc(&new_start_mm->mm_users);
 			atomic_inc(&prev_mm->mm_users);
 			spin_lock(&mmlist_lock);
-			while (*swap_map > 1 && !retval &&
+			while (*swap_map > 1 && !retval && !shmem &&
 					(p = p->next) != &start_mm->mmlist) {
 				mm = list_entry(p, struct mm_struct, mmlist);
 				if (!atomic_inc_not_zero(&mm->mm_users))
@@ -821,6 +846,13 @@
 			mmput(start_mm);
 			start_mm = new_start_mm;
 		}
+		if (shmem) {
+			/* page has already been unlocked and released */
+			if (shmem > 0)
+				continue;
+			retval = shmem;
+			break;
+		}
 		if (retval) {
 			unlock_page(page);
 			page_cache_release(page);
@@ -859,12 +891,6 @@
 		 * read from disk into another page.  Splitting into two
 		 * pages would be incorrect if swap supported "shared
 		 * private" pages, but they are handled by tmpfs files.
-		 *
-		 * Note shmem_unuse already deleted a swappage from
-		 * the swap cache, unless the move to filepage failed:
-		 * in which case it left swappage in cache, lowered its
-		 * swap count to pass quickly through the loops above,
-		 * and now we must reincrement count to try again later.
 		 */
 		if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
 			struct writeback_control wbc = {
@@ -875,12 +901,8 @@
 			lock_page(page);
 			wait_on_page_writeback(page);
 		}
-		if (PageSwapCache(page)) {
-			if (shmem)
-				swap_duplicate(entry);
-			else
-				delete_from_swap_cache(page);
-		}
+		if (PageSwapCache(page))
+			delete_from_swap_cache(page);
 
 		/*
 		 * So we could skip searching mms once swap count went
@@ -1768,31 +1790,48 @@
  */
 int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
 {
+	struct swap_info_struct *si;
 	int our_page_cluster = page_cluster;
-	int ret = 0, i = 1 << our_page_cluster;
-	unsigned long toff;
-	struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
+	pgoff_t target, toff;
+	pgoff_t base, end;
+	int nr_pages = 0;
 
 	if (!our_page_cluster)	/* no readahead */
 		return 0;
-	toff = (swp_offset(entry) >> our_page_cluster) << our_page_cluster;
-	if (!toff)		/* first page is swap header */
-		toff++, i--;
-	*offset = toff;
+
+	si = &swap_info[swp_type(entry)];
+	target = swp_offset(entry);
+	base = (target >> our_page_cluster) << our_page_cluster;
+	end = base + (1 << our_page_cluster);
+	if (!base)		/* first page is swap header */
+		base++;
 
 	spin_lock(&swap_lock);
-	do {
-		/* Don't read-ahead past the end of the swap area */
-		if (toff >= swapdev->max)
-			break;
+	if (end > si->max)	/* don't go beyond end of map */
+		end = si->max;
+
+	/* Count contiguous allocated slots above our target */
+	for (toff = target; ++toff < end; nr_pages++) {
 		/* Don't read in free or bad pages */
-		if (!swapdev->swap_map[toff])
+		if (!si->swap_map[toff])
 			break;
-		if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
+		if (si->swap_map[toff] == SWAP_MAP_BAD)
 			break;
-		toff++;
-		ret++;
-	} while (--i);
+	}
+	/* Count contiguous allocated slots below our target */
+	for (toff = target; --toff >= base; nr_pages++) {
+		/* Don't read in free or bad pages */
+		if (!si->swap_map[toff])
+			break;
+		if (si->swap_map[toff] == SWAP_MAP_BAD)
+			break;
+	}
 	spin_unlock(&swap_lock);
-	return ret;
+
+	/*
+	 * Indicate starting offset, and return number of pages to get:
+	 * if only 1, say 0, since there's then no readahead to be done.
+	 */
+	*offset = ++toff;
+	return nr_pages? ++nr_pages: 0;
 }
diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c
index d436a9c..7020836 100644
--- a/mm/tiny-shmem.c
+++ b/mm/tiny-shmem.c
@@ -121,18 +121,6 @@
 	return 0;
 }
 
-#if 0
-int shmem_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	file_accessed(file);
-#ifndef CONFIG_MMU
-	return ramfs_nommu_mmap(file, vma);
-#else
-	return 0;
-#endif
-}
-#endif  /*  0  */
-
 #ifndef CONFIG_MMU
 unsigned long shmem_get_unmapped_area(struct file *file,
 				      unsigned long addr,
diff --git a/mm/truncate.c b/mm/truncate.c
index c3123b0..c35c49e 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -48,7 +48,7 @@
 
 static inline void truncate_partial_page(struct page *page, unsigned partial)
 {
-	zero_user_page(page, partial, PAGE_CACHE_SIZE - partial, KM_USER0);
+	zero_user_segment(page, partial, PAGE_CACHE_SIZE);
 	if (PagePrivate(page))
 		do_invalidatepage(page, partial);
 }
@@ -84,7 +84,7 @@
 
 /*
  * If truncate cannot remove the fs-private metadata from the page, the page
- * becomes anonymous.  It will be left on the LRU and may even be mapped into
+ * becomes orphaned.  It will be left on the LRU and may even be mapped into
  * user pagetables if we're racing with filemap_fault().
  *
  * We need to bale out if page->mapping is no longer equal to the original
@@ -98,11 +98,11 @@
 	if (page->mapping != mapping)
 		return;
 
-	cancel_dirty_page(page, PAGE_CACHE_SIZE);
-
 	if (PagePrivate(page))
 		do_invalidatepage(page, 0);
 
+	cancel_dirty_page(page, PAGE_CACHE_SIZE);
+
 	remove_from_page_cache(page);
 	ClearPageUptodate(page);
 	ClearPageMappedToDisk(page);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index af77e17..0536dde 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -166,6 +166,44 @@
 }
 EXPORT_SYMBOL_GPL(map_vm_area);
 
+/*
+ * Map a vmalloc()-space virtual address to the physical page.
+ */
+struct page *vmalloc_to_page(const void *vmalloc_addr)
+{
+	unsigned long addr = (unsigned long) vmalloc_addr;
+	struct page *page = NULL;
+	pgd_t *pgd = pgd_offset_k(addr);
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+
+	if (!pgd_none(*pgd)) {
+		pud = pud_offset(pgd, addr);
+		if (!pud_none(*pud)) {
+			pmd = pmd_offset(pud, addr);
+			if (!pmd_none(*pmd)) {
+				ptep = pte_offset_map(pmd, addr);
+				pte = *ptep;
+				if (pte_present(pte))
+					page = pte_page(pte);
+				pte_unmap(ptep);
+			}
+		}
+	}
+	return page;
+}
+EXPORT_SYMBOL(vmalloc_to_page);
+
+/*
+ * Map a vmalloc()-space virtual address to the physical page frame number.
+ */
+unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
+{
+	return page_to_pfn(vmalloc_to_page(vmalloc_addr));
+}
+EXPORT_SYMBOL(vmalloc_to_pfn);
+
 static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
 					    unsigned long start, unsigned long end,
 					    int node, gfp_t gfp_mask)
@@ -216,6 +254,10 @@
 		if (addr > end - size)
 			goto out;
 	}
+	if ((size + addr) < addr)
+		goto out;
+	if (addr > end - size)
+		goto out;
 
 found:
 	area->next = *p;
@@ -268,7 +310,7 @@
 }
 
 /* Caller must hold vmlist_lock */
-static struct vm_struct *__find_vm_area(void *addr)
+static struct vm_struct *__find_vm_area(const void *addr)
 {
 	struct vm_struct *tmp;
 
@@ -281,7 +323,7 @@
 }
 
 /* Caller must hold vmlist_lock */
-static struct vm_struct *__remove_vm_area(void *addr)
+static struct vm_struct *__remove_vm_area(const void *addr)
 {
 	struct vm_struct **p, *tmp;
 
@@ -310,7 +352,7 @@
  *	This function returns the found VM area, but using it is NOT safe
  *	on SMP machines, except for its size or flags.
  */
-struct vm_struct *remove_vm_area(void *addr)
+struct vm_struct *remove_vm_area(const void *addr)
 {
 	struct vm_struct *v;
 	write_lock(&vmlist_lock);
@@ -319,7 +361,7 @@
 	return v;
 }
 
-static void __vunmap(void *addr, int deallocate_pages)
+static void __vunmap(const void *addr, int deallocate_pages)
 {
 	struct vm_struct *area;
 
@@ -346,8 +388,10 @@
 		int i;
 
 		for (i = 0; i < area->nr_pages; i++) {
-			BUG_ON(!area->pages[i]);
-			__free_page(area->pages[i]);
+			struct page *page = area->pages[i];
+
+			BUG_ON(!page);
+			__free_page(page);
 		}
 
 		if (area->flags & VM_VPAGES)
@@ -370,7 +414,7 @@
  *
  *	Must not be called in interrupt context.
  */
-void vfree(void *addr)
+void vfree(const void *addr)
 {
 	BUG_ON(in_interrupt());
 	__vunmap(addr, 1);
@@ -386,7 +430,7 @@
  *
  *	Must not be called in interrupt context.
  */
-void vunmap(void *addr)
+void vunmap(const void *addr)
 {
 	BUG_ON(in_interrupt());
 	__vunmap(addr, 0);
@@ -423,8 +467,8 @@
 }
 EXPORT_SYMBOL(vmap);
 
-void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
-				pgprot_t prot, int node)
+static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
+				 pgprot_t prot, int node)
 {
 	struct page **pages;
 	unsigned int nr_pages, array_size, i;
@@ -451,15 +495,19 @@
 	}
 
 	for (i = 0; i < area->nr_pages; i++) {
+		struct page *page;
+
 		if (node < 0)
-			area->pages[i] = alloc_page(gfp_mask);
+			page = alloc_page(gfp_mask);
 		else
-			area->pages[i] = alloc_pages_node(node, gfp_mask, 0);
-		if (unlikely(!area->pages[i])) {
+			page = alloc_pages_node(node, gfp_mask, 0);
+
+		if (unlikely(!page)) {
 			/* Successfully allocated i pages, free them in __vunmap() */
 			area->nr_pages = i;
 			goto fail;
 		}
+		area->pages[i] = page;
 	}
 
 	if (map_vm_area(area, prot, &pages))
diff --git a/mm/vmstat.c b/mm/vmstat.c
index e8d846f..422d960 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -21,21 +21,14 @@
 
 static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
 {
-	int cpu = 0;
+	int cpu;
 	int i;
 
 	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
 
-	cpu = first_cpu(*cpumask);
-	while (cpu < NR_CPUS) {
+	for_each_cpu_mask(cpu, *cpumask) {
 		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
 
-		cpu = next_cpu(cpu, *cpumask);
-
-		if (cpu < NR_CPUS)
-			prefetch(&per_cpu(vm_event_states, cpu));
-
-
 		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
 			ret[i] += this->event[i];
 	}
@@ -284,6 +277,10 @@
 /*
  * Update the zone counters for one cpu.
  *
+ * The cpu specified must be either the current cpu or a processor that
+ * is not online. If it is the current cpu then the execution thread must
+ * be pinned to the current cpu.
+ *
  * Note that refresh_cpu_vm_stats strives to only access
  * node local memory. The per cpu pagesets on remote zones are placed
  * in the memory local to the processor using that pageset. So the
@@ -299,7 +296,7 @@
 {
 	struct zone *zone;
 	int i;
-	unsigned long flags;
+	int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
 
 	for_each_zone(zone) {
 		struct per_cpu_pageset *p;
@@ -311,15 +308,19 @@
 
 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 			if (p->vm_stat_diff[i]) {
+				unsigned long flags;
+				int v;
+
 				local_irq_save(flags);
-				zone_page_state_add(p->vm_stat_diff[i],
-					zone, i);
+				v = p->vm_stat_diff[i];
 				p->vm_stat_diff[i] = 0;
+				local_irq_restore(flags);
+				atomic_long_add(v, &zone->vm_stat[i]);
+				global_diff[i] += v;
 #ifdef CONFIG_NUMA
 				/* 3 seconds idle till flush */
 				p->expire = 3;
 #endif
-				local_irq_restore(flags);
 			}
 #ifdef CONFIG_NUMA
 		/*
@@ -329,7 +330,7 @@
 		 * Check if there are pages remaining in this pageset
 		 * if not then there is nothing to expire.
 		 */
-		if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count))
+		if (!p->expire || !p->pcp.count)
 			continue;
 
 		/*
@@ -344,13 +345,14 @@
 		if (p->expire)
 			continue;
 
-		if (p->pcp[0].count)
-			drain_zone_pages(zone, p->pcp + 0);
-
-		if (p->pcp[1].count)
-			drain_zone_pages(zone, p->pcp + 1);
+		if (p->pcp.count)
+			drain_zone_pages(zone, &p->pcp);
 #endif
 	}
+
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+		if (global_diff[i])
+			atomic_long_add(global_diff[i], &vm_stat[i]);
 }
 
 #endif
@@ -681,20 +683,17 @@
 		   "\n  pagesets");
 	for_each_online_cpu(i) {
 		struct per_cpu_pageset *pageset;
-		int j;
 
 		pageset = zone_pcp(zone, i);
-		for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
-			seq_printf(m,
-				   "\n    cpu: %i pcp: %i"
-				   "\n              count: %i"
-				   "\n              high:  %i"
-				   "\n              batch: %i",
-				   i, j,
-				   pageset->pcp[j].count,
-				   pageset->pcp[j].high,
-				   pageset->pcp[j].batch);
-			}
+		seq_printf(m,
+			   "\n    cpu: %i"
+			   "\n              count: %i"
+			   "\n              high:  %i"
+			   "\n              batch: %i",
+			   i,
+			   pageset->pcp.count,
+			   pageset->pcp.high,
+			   pageset->pcp.batch);
 #ifdef CONFIG_SMP
 		seq_printf(m, "\n  vm stats threshold: %d",
 				pageset->stat_threshold);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 782a226..519cdb9 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -135,8 +135,8 @@
 	}
 }
 
-static inline int hidp_queue_event(struct hidp_session *session, struct input_dev *dev,
-					unsigned int type, unsigned int code, int value)
+static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev,
+				unsigned int type, unsigned int code, int value)
 {
 	unsigned char newleds;
 	struct sk_buff *skb;
@@ -243,7 +243,8 @@
 	input_sync(dev);
 }
 
-static inline int hidp_queue_report(struct hidp_session *session, unsigned char *data, int size)
+static int hidp_queue_report(struct hidp_session *session,
+				unsigned char *data, int size)
 {
 	struct sk_buff *skb;
 
@@ -287,7 +288,7 @@
 	hidp_schedule(session);
 }
 
-static inline void hidp_set_timer(struct hidp_session *session)
+static void hidp_set_timer(struct hidp_session *session)
 {
 	if (session->idle_to > 0)
 		mod_timer(&session->timer, jiffies + HZ * session->idle_to);
@@ -332,7 +333,8 @@
 	return err;
 }
 
-static inline void hidp_process_handshake(struct hidp_session *session, unsigned char param)
+static void hidp_process_handshake(struct hidp_session *session,
+					unsigned char param)
 {
 	BT_DBG("session %p param 0x%02x", session, param);
 
@@ -365,38 +367,23 @@
 	}
 }
 
-static inline void hidp_process_hid_control(struct hidp_session *session, unsigned char param)
+static void hidp_process_hid_control(struct hidp_session *session,
+					unsigned char param)
 {
 	BT_DBG("session %p param 0x%02x", session, param);
 
-	switch (param) {
-	case HIDP_CTRL_NOP:
-		break;
-
-	case HIDP_CTRL_VIRTUAL_CABLE_UNPLUG:
+	if (param == HIDP_CTRL_VIRTUAL_CABLE_UNPLUG) {
 		/* Flush the transmit queues */
 		skb_queue_purge(&session->ctrl_transmit);
 		skb_queue_purge(&session->intr_transmit);
 
 		/* Kill session thread */
 		atomic_inc(&session->terminate);
-		break;
-
-	case HIDP_CTRL_HARD_RESET:
-	case HIDP_CTRL_SOFT_RESET:
-	case HIDP_CTRL_SUSPEND:
-	case HIDP_CTRL_EXIT_SUSPEND:
-		/* FIXME: We have to parse these and return no error */
-		break;
-
-	default:
-		__hidp_send_ctrl_message(session,
-			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
-		break;
 	}
 }
 
-static inline void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, unsigned char param)
+static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
+				unsigned char param)
 {
 	BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param);
 
@@ -423,7 +410,8 @@
 	}
 }
 
-static inline void hidp_recv_ctrl_frame(struct hidp_session *session, struct sk_buff *skb)
+static void hidp_recv_ctrl_frame(struct hidp_session *session,
+					struct sk_buff *skb)
 {
 	unsigned char hdr, type, param;
 
@@ -457,7 +445,8 @@
 	kfree_skb(skb);
 }
 
-static inline void hidp_recv_intr_frame(struct hidp_session *session, struct sk_buff *skb)
+static void hidp_recv_intr_frame(struct hidp_session *session,
+				struct sk_buff *skb)
 {
 	unsigned char hdr;
 
@@ -625,7 +614,8 @@
 	return conn ? &conn->dev : NULL;
 }
 
-static inline int hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req)
+static int hidp_setup_input(struct hidp_session *session,
+				struct hidp_connadd_req *req)
 {
 	struct input_dev *input = session->input;
 	int i;
@@ -702,7 +692,8 @@
 			hid->quirks = hidp_blacklist[n].quirks;
 }
 
-static inline void hidp_setup_hid(struct hidp_session *session, struct hidp_connadd_req *req)
+static void hidp_setup_hid(struct hidp_session *session,
+				struct hidp_connadd_req *req)
 {
 	struct hid_device *hid = session->hid;
 	struct hid_report *report;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 788c703..e4c779b 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -429,7 +429,8 @@
 	if (dev->tty)
 		tty_vhangup(dev->tty);
 
-	rfcomm_dev_del(dev);
+	if (!test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags))
+		rfcomm_dev_del(dev);
 	rfcomm_dev_put(dev);
 	return 0;
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ddbdde8..61ac8d0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -82,32 +82,6 @@
 	return mutex_trylock(&rtnl_mutex);
 }
 
-int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
-{
-	memset(tb, 0, sizeof(struct rtattr*)*maxattr);
-
-	while (RTA_OK(rta, len)) {
-		unsigned flavor = rta->rta_type;
-		if (flavor && flavor <= maxattr)
-			tb[flavor-1] = rta;
-		rta = RTA_NEXT(rta, len);
-	}
-	return 0;
-}
-
-int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
-				 struct rtattr *rta, int len)
-{
-	if (RTA_PAYLOAD(rta) < len)
-		return -1;
-	if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) {
-		rta = RTA_DATA(rta) + RTA_ALIGN(len);
-		return rtattr_parse_nested(tb, maxattr, rta);
-	}
-	memset(tb, 0, sizeof(struct rtattr *) * maxattr);
-	return 0;
-}
-
 static struct rtnl_link *rtnl_msg_handlers[NPROTO];
 
 static inline int rtm_msgindex(int msgtype)
@@ -442,21 +416,6 @@
 	memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
 }
 
-size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
-{
-	size_t ret = RTA_PAYLOAD(rta);
-	char *src = RTA_DATA(rta);
-
-	if (ret > 0 && src[ret - 1] == '\0')
-		ret--;
-	if (size > 0) {
-		size_t len = (ret >= size) ? size - 1 : ret;
-		memset(dest, 0, size);
-		memcpy(dest, src, len);
-	}
-	return ret;
-}
-
 int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
 {
 	struct sock *rtnl = net->rtnl;
@@ -1411,9 +1370,6 @@
 }
 
 EXPORT_SYMBOL(__rta_fill);
-EXPORT_SYMBOL(rtattr_strlcpy);
-EXPORT_SYMBOL(rtattr_parse);
-EXPORT_SYMBOL(__rtattr_parse_nested_compat);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
 EXPORT_SYMBOL(rtnl_lock);
 EXPORT_SYMBOL(rtnl_trylock);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index a224106..8cd357f 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -547,8 +547,8 @@
 		rcu_read_lock();
 		list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
 			if (dom_iter->valid)
-				netlbl_domhsh_remove(dom_iter->domain,
-						     audit_info);
+				netlbl_cfg_map_del(dom_iter->domain,
+						   audit_info);
 		rcu_read_unlock();
 		cipso_v4_cache_invalidate();
 		call_rcu(&doi_def->rcu, callback);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 35851c9..f5fba3f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2431,8 +2431,7 @@
 					   rtn_type(buf2, sizeof(buf2),
 						    fa->fa_type));
 				if (fa->fa_tos)
-					seq_printf(seq, "tos =%d\n",
-						   fa->fa_tos);
+					seq_printf(seq, " tos=%d", fa->fa_tos);
 				seq_putc(seq, '\n');
 			}
 		}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a7321a8..a13c074 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1015,7 +1015,8 @@
 			goto error;
 	}
 
-	__skb_pull(skb, sizeof(*icmph));
+	if (!pskb_pull(skb, sizeof(*icmph)))
+		goto error;
 
 	icmph = icmp_hdr(skb);
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 90f422c..9cac6c0 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -398,7 +398,7 @@
 EXPORT_SYMBOL_GPL(inet_unhash);
 
 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
-		struct sock *sk,
+		struct sock *sk, u32 port_offset,
 		int (*check_established)(struct inet_timewait_death_row *,
 			struct sock *, __u16, struct inet_timewait_sock **),
 		void (*hash)(struct sock *sk))
@@ -413,7 +413,7 @@
 	if (!snum) {
 		int i, remaining, low, high, port;
 		static u32 hint;
-		u32 offset = hint + inet_sk_port_offset(sk);
+		u32 offset = hint + port_offset;
 		struct hlist_node *node;
 		struct inet_timewait_sock *tw = NULL;
 
@@ -502,7 +502,7 @@
 int inet_hash_connect(struct inet_timewait_death_row *death_row,
 		      struct sock *sk)
 {
-	return __inet_hash_connect(death_row, sk,
+	return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
 			__inet_check_established, __inet_hash_nolisten);
 }
 
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index e093a7b..b47030b 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -102,7 +102,7 @@
 
 		XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr;
 
-		if (!pskb_may_pull(skb, phlen));
+		if (!pskb_may_pull(skb, phlen))
 			goto out;
 		__skb_pull(skb, phlen);
 	}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index cbb5b9c..121d517 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -683,7 +683,8 @@
 		}
 	}
 
-	__skb_pull(skb, sizeof(*hdr));
+	if (!pskb_pull(skb, sizeof(*hdr)))
+		goto discard_it;
 
 	hdr = icmp6_hdr(skb);
 
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 43f3993..99fd25f 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -236,7 +236,7 @@
 int inet6_hash_connect(struct inet_timewait_death_row *death_row,
 		       struct sock *sk)
 {
-	return __inet_hash_connect(death_row, sk,
+	return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk),
 			__inet6_check_established, __inet6_hash);
 }
 
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index becf91a..c7ad64d 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -90,7 +90,7 @@
  * safely.
  *
  */
-static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
+void netlbl_cipsov4_doi_free(struct rcu_head *entry)
 {
 	struct cipso_v4_doi *ptr;
 
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index f03cf9b..220cb9d 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -163,4 +163,7 @@
 /* NetLabel protocol functions */
 int netlbl_cipsov4_genl_init(void);
 
+/* Free the memory associated with a CIPSOv4 DOI definition */
+void netlbl_cipsov4_doi_free(struct rcu_head *entry);
+
 #endif
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 3689956..8220990 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -61,6 +61,7 @@
 		      struct netlbl_audit *audit_info);
 int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
 			      struct netlbl_audit *audit_info);
+int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
 int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
 struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
 int netlbl_domhsh_walk(u32 *skip_bkt,
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index c69e3e1..39793a1 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -30,6 +30,7 @@
 
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/audit.h>
 #include <net/ip.h>
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
@@ -38,10 +39,186 @@
 
 #include "netlabel_domainhash.h"
 #include "netlabel_unlabeled.h"
+#include "netlabel_cipso_v4.h"
 #include "netlabel_user.h"
 #include "netlabel_mgmt.h"
 
 /*
+ * Configuration Functions
+ */
+
+/**
+ * netlbl_cfg_map_del - Remove a NetLabel/LSM domain mapping
+ * @domain: the domain mapping to remove
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Removes a NetLabel/LSM domain mapping.  A @domain value of NULL causes the
+ * default domain mapping to be removed.  Returns zero on success, negative
+ * values on failure.
+ *
+ */
+int netlbl_cfg_map_del(const char *domain, struct netlbl_audit *audit_info)
+{
+	return netlbl_domhsh_remove(domain, audit_info);
+}
+
+/**
+ * netlbl_cfg_unlbl_add_map - Add an unlabeled NetLabel/LSM domain mapping
+ * @domain: the domain mapping to add
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Adds a new unlabeled NetLabel/LSM domain mapping.  A @domain value of NULL
+ * causes a new default domain mapping to be added.  Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int netlbl_cfg_unlbl_add_map(const char *domain,
+			     struct netlbl_audit *audit_info)
+{
+	int ret_val = -ENOMEM;
+	struct netlbl_dom_map *entry;
+
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (entry == NULL)
+		goto cfg_unlbl_add_map_failure;
+	if (domain != NULL) {
+		entry->domain = kstrdup(domain, GFP_ATOMIC);
+		if (entry->domain == NULL)
+			goto cfg_unlbl_add_map_failure;
+	}
+	entry->type = NETLBL_NLTYPE_UNLABELED;
+
+	ret_val = netlbl_domhsh_add(entry, audit_info);
+	if (ret_val != 0)
+		goto cfg_unlbl_add_map_failure;
+
+	return 0;
+
+cfg_unlbl_add_map_failure:
+	if (entry != NULL)
+		kfree(entry->domain);
+	kfree(entry);
+	return ret_val;
+}
+
+/**
+ * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition
+ * @doi_def: the DOI definition
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Add a new CIPSOv4 DOI definition to the NetLabel subsystem.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def,
+			   struct netlbl_audit *audit_info)
+{
+	int ret_val;
+	const char *type_str;
+	struct audit_buffer *audit_buf;
+
+	ret_val = cipso_v4_doi_add(doi_def);
+
+	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
+					      audit_info);
+	if (audit_buf != NULL) {
+		switch (doi_def->type) {
+		case CIPSO_V4_MAP_STD:
+			type_str = "std";
+			break;
+		case CIPSO_V4_MAP_PASS:
+			type_str = "pass";
+			break;
+		default:
+			type_str = "(unknown)";
+		}
+		audit_log_format(audit_buf,
+				 " cipso_doi=%u cipso_type=%s res=%u",
+				 doi_def->doi,
+				 type_str,
+				 ret_val == 0 ? 1 : 0);
+		audit_log_end(audit_buf);
+	}
+
+	return ret_val;
+}
+
+/**
+ * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping
+ * @doi_def: the DOI definition
+ * @domain: the domain mapping to add
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Add a new CIPSOv4 DOI definition and NetLabel/LSM domain mapping for this
+ * new DOI definition to the NetLabel subsystem.  A @domain value of NULL adds
+ * a new default domain mapping.  Returns zero on success, negative values on
+ * failure.
+ *
+ */
+int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
+			       const char *domain,
+			       struct netlbl_audit *audit_info)
+{
+	int ret_val = -ENOMEM;
+	struct netlbl_dom_map *entry;
+
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (entry == NULL)
+		goto cfg_cipsov4_add_map_failure;
+	if (domain != NULL) {
+		entry->domain = kstrdup(domain, GFP_ATOMIC);
+		if (entry->domain == NULL)
+			goto cfg_cipsov4_add_map_failure;
+	}
+	entry->type = NETLBL_NLTYPE_CIPSOV4;
+	entry->type_def.cipsov4 = doi_def;
+
+	/* Grab a RCU read lock here so nothing happens to the doi_def variable
+	 * between adding it to the CIPSOv4 protocol engine and adding a
+	 * domain mapping for it. */
+
+	rcu_read_lock();
+	ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info);
+	if (ret_val != 0)
+		goto cfg_cipsov4_add_map_failure_unlock;
+	ret_val = netlbl_domhsh_add(entry, audit_info);
+	if (ret_val != 0)
+		goto cfg_cipsov4_add_map_failure_remove_doi;
+	rcu_read_unlock();
+
+	return 0;
+
+cfg_cipsov4_add_map_failure_remove_doi:
+	cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free);
+cfg_cipsov4_add_map_failure_unlock:
+	rcu_read_unlock();
+cfg_cipsov4_add_map_failure:
+	if (entry != NULL)
+		kfree(entry->domain);
+	kfree(entry);
+	return ret_val;
+}
+
+/**
+ * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition
+ * @doi: the CIPSO DOI value
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info)
+{
+	return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free);
+}
+
+/*
  * Security Attribute Functions
  */
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 5a7f6a3..8d76986 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -594,11 +594,11 @@
 
 	if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
 		goto nla_put_failure;
-
+#ifdef CONFIG_NET_EMATCH
 	if (f->ematches.hdr.nmatches &&
 	    tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
 		goto nla_put_failure;
-
+#endif
 	nla_nest_end(skb, nest);
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index a1e5619..9c2ec19 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -65,6 +65,7 @@
 #include <linux/string.h>
 #include <linux/skbuff.h>
 #include <linux/random.h>
+#include <linux/if_vlan.h>
 #include <linux/tc_ematch/tc_em_meta.h>
 #include <net/dst.h>
 #include <net/route.h>
@@ -170,6 +171,21 @@
 }
 
 /**************************************************************************
+ * vlan tag
+ **************************************************************************/
+
+META_COLLECTOR(int_vlan_tag)
+{
+	unsigned short tag;
+	if (vlan_get_tag(skb, &tag) < 0)
+		*err = -1;
+	else
+		dst->value = tag;
+}
+
+
+
+/**************************************************************************
  * skb attributes
  **************************************************************************/
 
@@ -520,6 +536,7 @@
 		[META_ID(SK_SNDTIMEO)]		= META_FUNC(int_sk_sndtimeo),
 		[META_ID(SK_SENDMSG_OFF)]	= META_FUNC(int_sk_sendmsg_off),
 		[META_ID(SK_WRITE_PENDING)]	= META_FUNC(int_sk_write_pend),
+		[META_ID(VLAN_TAG)]		= META_FUNC(int_vlan_tag),
 	}
 };
 
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 97e6ebd..ae367c8 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -420,15 +420,15 @@
 				const struct sctp_association *asoc,
 				__u16 key_id)
 {
-	struct sctp_shared_key *key = NULL;
+	struct sctp_shared_key *key;
 
 	/* First search associations set of endpoint pair shared keys */
 	key_for_each(key, &asoc->endpoint_shared_keys) {
 		if (key->key_id == key_id)
-			break;
+			return key;
 	}
 
-	return key;
+	return NULL;
 }
 
 /*
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5df0c4b..f986587 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3865,6 +3865,10 @@
 	struct sctp_chunk *err_chunk;
 	sctp_ierror_t error;
 
+	/* Make sure that the peer has AUTH capable */
+	if (!asoc->peer.auth_capable)
+		return sctp_sf_unk_chunk(ep, asoc, type, arg, commands);
+
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
 				SCTP_NULL());
diff --git a/security/Kconfig b/security/Kconfig
index 389e151..25ffe1b 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -105,6 +105,7 @@
 	  If you are unsure how to answer this question, answer N.
 
 source security/selinux/Kconfig
+source security/smack/Kconfig
 
 endmenu
 
diff --git a/security/Makefile b/security/Makefile
index ef87df2..9e8b025 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_KEYS)			+= keys/
 subdir-$(CONFIG_SECURITY_SELINUX)	+= selinux
+subdir-$(CONFIG_SECURITY_SMACK)		+= smack
 
 # if we don't select a security model, use the default capabilities
 ifneq ($(CONFIG_SECURITY),y)
@@ -14,5 +15,6 @@
 obj-$(CONFIG_SECURITY)			+= security.o dummy.o inode.o
 # Must precede capability.o in order to stack properly.
 obj-$(CONFIG_SECURITY_SELINUX)		+= selinux/built-in.o
+obj-$(CONFIG_SECURITY_SMACK)		+= commoncap.o smack/built-in.o
 obj-$(CONFIG_SECURITY_CAPABILITIES)	+= commoncap.o capability.o
 obj-$(CONFIG_SECURITY_ROOTPLUG)		+= commoncap.o root_plug.o
diff --git a/security/commoncap.c b/security/commoncap.c
index ea61bc7..5aba826 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1,4 +1,4 @@
-/* Common capabilities, needed by capability.o and root_plug.o 
+/* Common capabilities, needed by capability.o and root_plug.o
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License as published by
@@ -25,20 +25,6 @@
 #include <linux/mount.h>
 #include <linux/sched.h>
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
-/*
- * Because of the reduced scope of CAP_SETPCAP when filesystem
- * capabilities are in effect, it is safe to allow this capability to
- * be available in the default configuration.
- */
-# define CAP_INIT_BSET  CAP_FULL_SET
-#else /* ie. ndef CONFIG_SECURITY_FILE_CAPABILITIES */
-# define CAP_INIT_BSET  CAP_INIT_EFF_SET
-#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
-
-kernel_cap_t cap_bset = CAP_INIT_BSET;    /* systemwide capability bound */
-EXPORT_SYMBOL(cap_bset);
-
 /* Global security state */
 
 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
@@ -93,9 +79,9 @@
 		kernel_cap_t *inheritable, kernel_cap_t *permitted)
 {
 	/* Derived from kernel/capability.c:sys_capget. */
-	*effective = cap_t (target->cap_effective);
-	*inheritable = cap_t (target->cap_inheritable);
-	*permitted = cap_t (target->cap_permitted);
+	*effective = target->cap_effective;
+	*inheritable = target->cap_inheritable;
+	*permitted = target->cap_permitted;
 	return 0;
 }
 
@@ -140,6 +126,12 @@
 		/* incapable of using this inheritable set */
 		return -EPERM;
 	}
+	if (!cap_issubset(*inheritable,
+			   cap_combine(target->cap_inheritable,
+				       current->cap_bset))) {
+		/* no new pI capabilities outside bounding set */
+		return -EPERM;
+	}
 
 	/* verify restrictions on target's new Permitted set */
 	if (!cap_issubset (*permitted,
@@ -198,28 +190,50 @@
 }
 
 static inline int cap_from_disk(struct vfs_cap_data *caps,
-				struct linux_binprm *bprm,
-				int size)
+				struct linux_binprm *bprm, unsigned size)
 {
 	__u32 magic_etc;
+	unsigned tocopy, i;
 
-	if (size != XATTR_CAPS_SZ)
+	if (size < sizeof(magic_etc))
 		return -EINVAL;
 
 	magic_etc = le32_to_cpu(caps->magic_etc);
 
 	switch ((magic_etc & VFS_CAP_REVISION_MASK)) {
-	case VFS_CAP_REVISION:
-		if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
-			bprm->cap_effective = true;
-		else
-			bprm->cap_effective = false;
-		bprm->cap_permitted = to_cap_t(le32_to_cpu(caps->permitted));
-		bprm->cap_inheritable = to_cap_t(le32_to_cpu(caps->inheritable));
-		return 0;
+	case VFS_CAP_REVISION_1:
+		if (size != XATTR_CAPS_SZ_1)
+			return -EINVAL;
+		tocopy = VFS_CAP_U32_1;
+		break;
+	case VFS_CAP_REVISION_2:
+		if (size != XATTR_CAPS_SZ_2)
+			return -EINVAL;
+		tocopy = VFS_CAP_U32_2;
+		break;
 	default:
 		return -EINVAL;
 	}
+
+	if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) {
+		bprm->cap_effective = true;
+	} else {
+		bprm->cap_effective = false;
+	}
+
+	for (i = 0; i < tocopy; ++i) {
+		bprm->cap_permitted.cap[i] =
+			le32_to_cpu(caps->data[i].permitted);
+		bprm->cap_inheritable.cap[i] =
+			le32_to_cpu(caps->data[i].inheritable);
+	}
+	while (i < VFS_CAP_U32) {
+		bprm->cap_permitted.cap[i] = 0;
+		bprm->cap_inheritable.cap[i] = 0;
+		i++;
+	}
+
+	return 0;
 }
 
 /* Locate any VFS capabilities: */
@@ -227,7 +241,7 @@
 {
 	struct dentry *dentry;
 	int rc = 0;
-	struct vfs_cap_data incaps;
+	struct vfs_cap_data vcaps;
 	struct inode *inode;
 
 	if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) {
@@ -240,14 +254,8 @@
 	if (!inode->i_op || !inode->i_op->getxattr)
 		goto out;
 
-	rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0);
-	if (rc > 0) {
-		if (rc == XATTR_CAPS_SZ)
-			rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS,
-						&incaps, XATTR_CAPS_SZ);
-		else
-			rc = -EINVAL;
-	}
+	rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps,
+				   XATTR_CAPS_SZ);
 	if (rc == -ENODATA || rc == -EOPNOTSUPP) {
 		/* no data, that's ok */
 		rc = 0;
@@ -256,7 +264,7 @@
 	if (rc < 0)
 		goto out;
 
-	rc = cap_from_disk(&incaps, bprm, rc);
+	rc = cap_from_disk(&vcaps, bprm, rc);
 	if (rc)
 		printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
 			__FUNCTION__, rc, bprm->filename);
@@ -321,10 +329,11 @@
 	/* Derived from fs/exec.c:compute_creds. */
 	kernel_cap_t new_permitted, working;
 
-	new_permitted = cap_intersect (bprm->cap_permitted, cap_bset);
-	working = cap_intersect (bprm->cap_inheritable,
+	new_permitted = cap_intersect(bprm->cap_permitted,
+				 current->cap_bset);
+	working = cap_intersect(bprm->cap_inheritable,
 				 current->cap_inheritable);
-	new_permitted = cap_combine (new_permitted, working);
+	new_permitted = cap_combine(new_permitted, working);
 
 	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
 	    !cap_issubset (new_permitted, current->cap_permitted)) {
@@ -351,8 +360,10 @@
 	 * capability rules */
 	if (!is_global_init(current)) {
 		current->cap_permitted = new_permitted;
-		current->cap_effective = bprm->cap_effective ?
-				new_permitted : 0;
+		if (bprm->cap_effective)
+			current->cap_effective = new_permitted;
+		else
+			cap_clear(current->cap_effective);
 	}
 
 	/* AUD: Audit candidate if current->cap_effective is set */
@@ -474,13 +485,15 @@
 
 			if (!issecure (SECURE_NO_SETUID_FIXUP)) {
 				if (old_fsuid == 0 && current->fsuid != 0) {
-					cap_t (current->cap_effective) &=
-					    ~CAP_FS_MASK;
+					current->cap_effective =
+						cap_drop_fs_set(
+						    current->cap_effective);
 				}
 				if (old_fsuid != 0 && current->fsuid == 0) {
-					cap_t (current->cap_effective) |=
-					    (cap_t (current->cap_permitted) &
-					     CAP_FS_MASK);
+					current->cap_effective =
+						cap_raise_fs_set(
+						    current->cap_effective,
+						    current->cap_permitted);
 				}
 			}
 			break;
@@ -561,6 +574,23 @@
 
 	return -EPERM;
 }
+
+/*
+ * called from kernel/sys.c for prctl(PR_CABSET_DROP)
+ * done without task_capability_lock() because it introduces
+ * no new races - i.e. only another task doing capget() on
+ * this task could get inconsistent info.  There can be no
+ * racing writer bc a task can only change its own caps.
+ */
+long cap_prctl_drop(unsigned long cap)
+{
+	if (!capable(CAP_SETPCAP))
+		return -EPERM;
+	if (!cap_valid(cap))
+		return -EINVAL;
+	cap_lower(current->cap_bset, cap);
+	return 0;
+}
 #else
 int cap_task_setscheduler (struct task_struct *p, int policy,
 			   struct sched_param *lp)
@@ -584,9 +614,9 @@
 
 void cap_task_reparent_to_init (struct task_struct *p)
 {
-	p->cap_effective = CAP_INIT_EFF_SET;
-	p->cap_inheritable = CAP_INIT_INH_SET;
-	p->cap_permitted = CAP_FULL_SET;
+	cap_set_init_eff(p->cap_effective);
+	cap_clear(p->cap_inheritable);
+	cap_set_full(p->cap_permitted);
 	p->keep_capabilities = 0;
 	return;
 }
diff --git a/security/dummy.c b/security/dummy.c
index 48d4b0a..649326b 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -36,14 +36,19 @@
 static int dummy_capget (struct task_struct *target, kernel_cap_t * effective,
 			 kernel_cap_t * inheritable, kernel_cap_t * permitted)
 {
-	*effective = *inheritable = *permitted = 0;
 	if (target->euid == 0) {
-		*permitted |= (~0 & ~CAP_FS_MASK);
-		*effective |= (~0 & ~CAP_TO_MASK(CAP_SETPCAP) & ~CAP_FS_MASK);
+		cap_set_full(*permitted);
+		cap_set_init_eff(*effective);
+	} else {
+		cap_clear(*permitted);
+		cap_clear(*effective);
 	}
-	if (target->fsuid == 0) {
-		*permitted |= CAP_FS_MASK;
-		*effective |= CAP_FS_MASK;
+
+	cap_clear(*inheritable);
+
+	if (target->fsuid != 0) {
+		*permitted = cap_drop_fs_set(*permitted);
+		*effective = cap_drop_fs_set(*effective);
 	}
 	return 0;
 }
@@ -402,7 +407,7 @@
 	return 0;
 }
 
-static int dummy_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err)
+static int dummy_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/security/security.c b/security/security.c
index ca475ca..b6c57a6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -493,11 +493,11 @@
 	return security_ops->inode_killpriv(dentry);
 }
 
-int security_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err)
+int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	return security_ops->inode_getsecurity(inode, name, buffer, size, err);
+	return security_ops->inode_getsecurity(inode, name, buffer, alloc);
 }
 
 int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index be6de0b..e5ed075 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -136,32 +136,6 @@
 
 static struct kmem_cache *sel_inode_cache;
 
-/* Return security context for a given sid or just the context 
-   length if the buffer is null or length is 0 */
-static int selinux_getsecurity(u32 sid, void *buffer, size_t size)
-{
-	char *context;
-	unsigned len;
-	int rc;
-
-	rc = security_sid_to_context(sid, &context, &len);
-	if (rc)
-		return rc;
-
-	if (!buffer || !size)
-		goto getsecurity_exit;
-
-	if (size < len) {
-		len = -ERANGE;
-		goto getsecurity_exit;
-	}
-	memcpy(buffer, context, len);
-
-getsecurity_exit:
-	kfree(context);
-	return len;
-}
-
 /**
  * selinux_secmark_enabled - Check to see if SECMARK is currently enabled
  *
@@ -2675,14 +2649,27 @@
  *
  * Permission check is handled by selinux_inode_getxattr hook.
  */
-static int selinux_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err)
+static int selinux_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc)
 {
+	u32 size;
+	int error;
+	char *context = NULL;
 	struct inode_security_struct *isec = inode->i_security;
 
 	if (strcmp(name, XATTR_SELINUX_SUFFIX))
 		return -EOPNOTSUPP;
 
-	return selinux_getsecurity(isec->sid, buffer, size);
+	error = security_sid_to_context(isec->sid, &context, &size);
+	if (error)
+		return error;
+	error = size;
+	if (alloc) {
+		*buffer = context;
+		goto out_nofree;
+	}
+	kfree(context);
+out_nofree:
+	return error;
 }
 
 static int selinux_inode_setsecurity(struct inode *inode, const char *name,
diff --git a/security/smack/Kconfig b/security/smack/Kconfig
new file mode 100644
index 0000000..603b087
--- /dev/null
+++ b/security/smack/Kconfig
@@ -0,0 +1,10 @@
+config SECURITY_SMACK
+	bool "Simplified Mandatory Access Control Kernel Support"
+	depends on NETLABEL && SECURITY_NETWORK
+	default n
+	help
+	  This selects the Simplified Mandatory Access Control Kernel.
+	  Smack is useful for sensitivity, integrity, and a variety
+	  of other mandatory security schemes.
+	  If you are unsure how to answer this question, answer N.
+
diff --git a/security/smack/Makefile b/security/smack/Makefile
new file mode 100644
index 0000000..67a63aa
--- /dev/null
+++ b/security/smack/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the SMACK LSM
+#
+
+obj-$(CONFIG_SECURITY_SMACK) := smack.o
+
+smack-y := smack_lsm.o smack_access.o smackfs.o
diff --git a/security/smack/smack.h b/security/smack/smack.h
new file mode 100644
index 0000000..a21a0e9
--- /dev/null
+++ b/security/smack/smack.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com>
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation, version 2.
+ *
+ * Author:
+ *      Casey Schaufler <casey@schaufler-ca.com>
+ *
+ */
+
+#ifndef _SECURITY_SMACK_H
+#define _SECURITY_SMACK_H
+
+#include <linux/capability.h>
+#include <linux/spinlock.h>
+#include <net/netlabel.h>
+
+/*
+ * Why 23? CIPSO is constrained to 30, so a 32 byte buffer is
+ * bigger than can be used, and 24 is the next lower multiple
+ * of 8, and there are too many issues if there isn't space set
+ * aside for the terminating null byte.
+ */
+#define SMK_MAXLEN	23
+#define SMK_LABELLEN	(SMK_MAXLEN+1)
+
+/*
+ * How many kinds of access are there?
+ * Here's your answer.
+ */
+#define SMK_ACCESSDASH	'-'
+#define SMK_ACCESSLOW	"rwxa"
+#define SMK_ACCESSKINDS	(sizeof(SMK_ACCESSLOW) - 1)
+
+struct superblock_smack {
+	char		*smk_root;
+	char		*smk_floor;
+	char		*smk_hat;
+	char		*smk_default;
+	int		smk_initialized;
+	spinlock_t	smk_sblock;	/* for initialization */
+};
+
+struct socket_smack {
+	char		*smk_out;			/* outbound label */
+	char		*smk_in;			/* inbound label */
+	char		smk_packet[SMK_LABELLEN];	/* TCP peer label */
+};
+
+/*
+ * Inode smack data
+ */
+struct inode_smack {
+	char		*smk_inode;	/* label of the fso */
+	struct mutex	smk_lock;	/* initialization lock */
+	int		smk_flags;	/* smack inode flags */
+};
+
+#define	SMK_INODE_INSTANT	0x01	/* inode is instantiated */
+
+/*
+ * A label access rule.
+ */
+struct smack_rule {
+	char	*smk_subject;
+	char	*smk_object;
+	int	smk_access;
+};
+
+/*
+ * An entry in the table of permitted label accesses.
+ */
+struct smk_list_entry {
+	struct smk_list_entry	*smk_next;
+	struct smack_rule	smk_rule;
+};
+
+/*
+ * An entry in the table mapping smack values to
+ * CIPSO level/category-set values.
+ */
+struct smack_cipso {
+	int	smk_level;
+	char	smk_catset[SMK_LABELLEN];
+};
+
+/*
+ * This is the repository for labels seen so that it is
+ * not necessary to keep allocating tiny chuncks of memory
+ * and so that they can be shared.
+ *
+ * Labels are never modified in place. Anytime a label
+ * is imported (e.g. xattrset on a file) the list is checked
+ * for it and it is added if it doesn't exist. The address
+ * is passed out in either case. Entries are added, but
+ * never deleted.
+ *
+ * Since labels are hanging around anyway it doesn't
+ * hurt to maintain a secid for those awkward situations
+ * where kernel components that ought to use LSM independent
+ * interfaces don't. The secid should go away when all of
+ * these components have been repaired.
+ *
+ * If there is a cipso value associated with the label it
+ * gets stored here, too. This will most likely be rare as
+ * the cipso direct mapping in used internally.
+ */
+struct smack_known {
+	struct smack_known	*smk_next;
+	char			smk_known[SMK_LABELLEN];
+	u32			smk_secid;
+	struct smack_cipso	*smk_cipso;
+	spinlock_t		smk_cipsolock; /* for changing cipso map */
+};
+
+/*
+ * Mount options
+ */
+#define SMK_FSDEFAULT	"smackfsdef="
+#define SMK_FSFLOOR	"smackfsfloor="
+#define SMK_FSHAT	"smackfshat="
+#define SMK_FSROOT	"smackfsroot="
+
+/*
+ * xattr names
+ */
+#define XATTR_SMACK_SUFFIX	"SMACK64"
+#define XATTR_SMACK_IPIN	"SMACK64IPIN"
+#define XATTR_SMACK_IPOUT	"SMACK64IPOUT"
+#define XATTR_NAME_SMACK	XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
+#define XATTR_NAME_SMACKIPIN	XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
+#define XATTR_NAME_SMACKIPOUT	XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT
+
+/*
+ * smackfs macic number
+ */
+#define SMACK_MAGIC	0x43415d53 /* "SMAC" */
+
+/*
+ * A limit on the number of entries in the lists
+ * makes some of the list administration easier.
+ */
+#define SMACK_LIST_MAX	10000
+
+/*
+ * CIPSO defaults.
+ */
+#define SMACK_CIPSO_DOI_DEFAULT		3	/* Historical */
+#define SMACK_CIPSO_DIRECT_DEFAULT	250	/* Arbitrary */
+#define SMACK_CIPSO_MAXCATVAL		63	/* Bigger gets harder */
+#define SMACK_CIPSO_MAXLEVEL            255     /* CIPSO 2.2 standard */
+#define SMACK_CIPSO_MAXCATNUM           239     /* CIPSO 2.2 standard */
+
+/*
+ * Just to make the common cases easier to deal with
+ */
+#define MAY_ANY		(MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC)
+#define MAY_ANYREAD	(MAY_READ | MAY_EXEC)
+#define MAY_ANYWRITE	(MAY_WRITE | MAY_APPEND)
+#define MAY_READWRITE	(MAY_READ | MAY_WRITE)
+#define MAY_NOT		0
+
+/*
+ * These functions are in smack_lsm.c
+ */
+struct inode_smack *new_inode_smack(char *);
+
+/*
+ * These functions are in smack_access.c
+ */
+int smk_access(char *, char *, int);
+int smk_curacc(char *, u32);
+int smack_to_cipso(const char *, struct smack_cipso *);
+void smack_from_cipso(u32, char *, char *);
+char *smack_from_secid(const u32);
+char *smk_import(const char *, int);
+struct smack_known *smk_import_entry(const char *, int);
+u32 smack_to_secid(const char *);
+
+/*
+ * Shared data.
+ */
+extern int smack_cipso_direct;
+extern int smack_net_nltype;
+extern char *smack_net_ambient;
+
+extern struct smack_known *smack_known;
+extern struct smack_known smack_known_floor;
+extern struct smack_known smack_known_hat;
+extern struct smack_known smack_known_huh;
+extern struct smack_known smack_known_invalid;
+extern struct smack_known smack_known_star;
+extern struct smack_known smack_known_unset;
+
+extern struct smk_list_entry *smack_list;
+
+/*
+ * Stricly for CIPSO level manipulation.
+ * Set the category bit number in a smack label sized buffer.
+ */
+static inline void smack_catset_bit(int cat, char *catsetp)
+{
+	if (cat > SMK_LABELLEN * 8)
+		return;
+
+	catsetp[(cat - 1) / 8] |= 0x80 >> ((cat - 1) % 8);
+}
+
+/*
+ * Present a pointer to the smack label in an inode blob.
+ */
+static inline char *smk_of_inode(const struct inode *isp)
+{
+	struct inode_smack *sip = isp->i_security;
+	return sip->smk_inode;
+}
+
+#endif  /* _SECURITY_SMACK_H */
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
new file mode 100644
index 0000000..f6b5f6e
--- /dev/null
+++ b/security/smack/smack_access.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com>
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation, version 2.
+ *
+ * Author:
+ *      Casey Schaufler <casey@schaufler-ca.com>
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include "smack.h"
+
+struct smack_known smack_known_unset = {
+	.smk_next	= NULL,
+	.smk_known	= "UNSET",
+	.smk_secid	= 1,
+	.smk_cipso	= NULL,
+};
+
+struct smack_known smack_known_huh = {
+	.smk_next	= &smack_known_unset,
+	.smk_known	= "?",
+	.smk_secid	= 2,
+	.smk_cipso	= NULL,
+};
+
+struct smack_known smack_known_hat = {
+	.smk_next	= &smack_known_huh,
+	.smk_known	= "^",
+	.smk_secid	= 3,
+	.smk_cipso	= NULL,
+};
+
+struct smack_known smack_known_star = {
+	.smk_next	= &smack_known_hat,
+	.smk_known	= "*",
+	.smk_secid	= 4,
+	.smk_cipso	= NULL,
+};
+
+struct smack_known smack_known_floor = {
+	.smk_next	= &smack_known_star,
+	.smk_known	= "_",
+	.smk_secid	= 5,
+	.smk_cipso	= NULL,
+};
+
+struct smack_known smack_known_invalid = {
+	.smk_next	= &smack_known_floor,
+	.smk_known	= "",
+	.smk_secid	= 6,
+	.smk_cipso	= NULL,
+};
+
+struct smack_known *smack_known = &smack_known_invalid;
+
+/*
+ * The initial value needs to be bigger than any of the
+ * known values above.
+ */
+static u32 smack_next_secid = 10;
+
+/**
+ * smk_access - determine if a subject has a specific access to an object
+ * @subject_label: a pointer to the subject's Smack label
+ * @object_label: a pointer to the object's Smack label
+ * @request: the access requested, in "MAY" format
+ *
+ * This function looks up the subject/object pair in the
+ * access rule list and returns 0 if the access is permitted,
+ * non zero otherwise.
+ *
+ * Even though Smack labels are usually shared on smack_list
+ * labels that come in off the network can't be imported
+ * and added to the list for locking reasons.
+ *
+ * Therefore, it is necessary to check the contents of the labels,
+ * not just the pointer values. Of course, in most cases the labels
+ * will be on the list, so checking the pointers may be a worthwhile
+ * optimization.
+ */
+int smk_access(char *subject_label, char *object_label, int request)
+{
+	u32 may = MAY_NOT;
+	struct smk_list_entry *sp;
+	struct smack_rule *srp;
+
+	/*
+	 * Hardcoded comparisons.
+	 *
+	 * A star subject can't access any object.
+	 */
+	if (subject_label == smack_known_star.smk_known ||
+	    strcmp(subject_label, smack_known_star.smk_known) == 0)
+		return -EACCES;
+	/*
+	 * A star object can be accessed by any subject.
+	 */
+	if (object_label == smack_known_star.smk_known ||
+	    strcmp(object_label, smack_known_star.smk_known) == 0)
+		return 0;
+	/*
+	 * An object can be accessed in any way by a subject
+	 * with the same label.
+	 */
+	if (subject_label == object_label ||
+	    strcmp(subject_label, object_label) == 0)
+		return 0;
+	/*
+	 * A hat subject can read any object.
+	 * A floor object can be read by any subject.
+	 */
+	if ((request & MAY_ANYREAD) == request) {
+		if (object_label == smack_known_floor.smk_known ||
+		    strcmp(object_label, smack_known_floor.smk_known) == 0)
+			return 0;
+		if (subject_label == smack_known_hat.smk_known ||
+		    strcmp(subject_label, smack_known_hat.smk_known) == 0)
+			return 0;
+	}
+	/*
+	 * Beyond here an explicit relationship is required.
+	 * If the requested access is contained in the available
+	 * access (e.g. read is included in readwrite) it's
+	 * good.
+	 */
+	for (sp = smack_list; sp != NULL; sp = sp->smk_next) {
+		srp = &sp->smk_rule;
+
+		if (srp->smk_subject == subject_label ||
+		    strcmp(srp->smk_subject, subject_label) == 0) {
+			if (srp->smk_object == object_label ||
+			    strcmp(srp->smk_object, object_label) == 0) {
+				may = srp->smk_access;
+				break;
+			}
+		}
+	}
+	/*
+	 * This is a bit map operation.
+	 */
+	if ((request & may) == request)
+		return 0;
+
+	return -EACCES;
+}
+
+/**
+ * smk_curacc - determine if current has a specific access to an object
+ * @object_label: a pointer to the object's Smack label
+ * @request: the access requested, in "MAY" format
+ *
+ * This function checks the current subject label/object label pair
+ * in the access rule list and returns 0 if the access is permitted,
+ * non zero otherwise. It allows that current my have the capability
+ * to override the rules.
+ */
+int smk_curacc(char *obj_label, u32 mode)
+{
+	int rc;
+
+	rc = smk_access(current->security, obj_label, mode);
+	if (rc == 0)
+		return 0;
+
+	if (capable(CAP_MAC_OVERRIDE))
+		return 0;
+
+	return rc;
+}
+
+static DEFINE_MUTEX(smack_known_lock);
+
+/**
+ * smk_import_entry - import a label, return the list entry
+ * @string: a text string that might be a Smack label
+ * @len: the maximum size, or zero if it is NULL terminated.
+ *
+ * Returns a pointer to the entry in the label list that
+ * matches the passed string, adding it if necessary.
+ */
+struct smack_known *smk_import_entry(const char *string, int len)
+{
+	struct smack_known *skp;
+	char smack[SMK_LABELLEN];
+	int found;
+	int i;
+
+	if (len <= 0 || len > SMK_MAXLEN)
+		len = SMK_MAXLEN;
+
+	for (i = 0, found = 0; i < SMK_LABELLEN; i++) {
+		if (found)
+			smack[i] = '\0';
+		else if (i >= len || string[i] > '~' || string[i] <= ' ' ||
+			 string[i] == '/') {
+			smack[i] = '\0';
+			found = 1;
+		} else
+			smack[i] = string[i];
+	}
+
+	if (smack[0] == '\0')
+		return NULL;
+
+	mutex_lock(&smack_known_lock);
+
+	for (skp = smack_known; skp != NULL; skp = skp->smk_next)
+		if (strncmp(skp->smk_known, smack, SMK_MAXLEN) == 0)
+			break;
+
+	if (skp == NULL) {
+		skp = kzalloc(sizeof(struct smack_known), GFP_KERNEL);
+		if (skp != NULL) {
+			skp->smk_next = smack_known;
+			strncpy(skp->smk_known, smack, SMK_MAXLEN);
+			skp->smk_secid = smack_next_secid++;
+			skp->smk_cipso = NULL;
+			spin_lock_init(&skp->smk_cipsolock);
+			/*
+			 * Make sure that the entry is actually
+			 * filled before putting it on the list.
+			 */
+			smp_mb();
+			smack_known = skp;
+		}
+	}
+
+	mutex_unlock(&smack_known_lock);
+
+	return skp;
+}
+
+/**
+ * smk_import - import a smack label
+ * @string: a text string that might be a Smack label
+ * @len: the maximum size, or zero if it is NULL terminated.
+ *
+ * Returns a pointer to the label in the label list that
+ * matches the passed string, adding it if necessary.
+ */
+char *smk_import(const char *string, int len)
+{
+	struct smack_known *skp;
+
+	skp = smk_import_entry(string, len);
+	if (skp == NULL)
+		return NULL;
+	return skp->smk_known;
+}
+
+/**
+ * smack_from_secid - find the Smack label associated with a secid
+ * @secid: an integer that might be associated with a Smack label
+ *
+ * Returns a pointer to the appropraite Smack label if there is one,
+ * otherwise a pointer to the invalid Smack label.
+ */
+char *smack_from_secid(const u32 secid)
+{
+	struct smack_known *skp;
+
+	for (skp = smack_known; skp != NULL; skp = skp->smk_next)
+		if (skp->smk_secid == secid)
+			return skp->smk_known;
+
+	/*
+	 * If we got this far someone asked for the translation
+	 * of a secid that is not on the list.
+	 */
+	return smack_known_invalid.smk_known;
+}
+
+/**
+ * smack_to_secid - find the secid associated with a Smack label
+ * @smack: the Smack label
+ *
+ * Returns the appropriate secid if there is one,
+ * otherwise 0
+ */
+u32 smack_to_secid(const char *smack)
+{
+	struct smack_known *skp;
+
+	for (skp = smack_known; skp != NULL; skp = skp->smk_next)
+		if (strncmp(skp->smk_known, smack, SMK_MAXLEN) == 0)
+			return skp->smk_secid;
+	return 0;
+}
+
+/**
+ * smack_from_cipso - find the Smack label associated with a CIPSO option
+ * @level: Bell & LaPadula level from the network
+ * @cp: Bell & LaPadula categories from the network
+ * @result: where to put the Smack value
+ *
+ * This is a simple lookup in the label table.
+ *
+ * This is an odd duck as far as smack handling goes in that
+ * it sends back a copy of the smack label rather than a pointer
+ * to the master list. This is done because it is possible for
+ * a foreign host to send a smack label that is new to this
+ * machine and hence not on the list. That would not be an
+ * issue except that adding an entry to the master list can't
+ * be done at that point.
+ */
+void smack_from_cipso(u32 level, char *cp, char *result)
+{
+	struct smack_known *kp;
+	char *final = NULL;
+
+	for (kp = smack_known; final == NULL && kp != NULL; kp = kp->smk_next) {
+		if (kp->smk_cipso == NULL)
+			continue;
+
+		spin_lock_bh(&kp->smk_cipsolock);
+
+		if (kp->smk_cipso->smk_level == level &&
+		    memcmp(kp->smk_cipso->smk_catset, cp, SMK_LABELLEN) == 0)
+			final = kp->smk_known;
+
+		spin_unlock_bh(&kp->smk_cipsolock);
+	}
+	if (final == NULL)
+		final = smack_known_huh.smk_known;
+	strncpy(result, final, SMK_MAXLEN);
+	return;
+}
+
+/**
+ * smack_to_cipso - find the CIPSO option to go with a Smack label
+ * @smack: a pointer to the smack label in question
+ * @cp: where to put the result
+ *
+ * Returns zero if a value is available, non-zero otherwise.
+ */
+int smack_to_cipso(const char *smack, struct smack_cipso *cp)
+{
+	struct smack_known *kp;
+
+	for (kp = smack_known; kp != NULL; kp = kp->smk_next)
+		if (kp->smk_known == smack ||
+		    strcmp(kp->smk_known, smack) == 0)
+			break;
+
+	if (kp == NULL || kp->smk_cipso == NULL)
+		return -ENOENT;
+
+	memcpy(cp, kp->smk_cipso, sizeof(struct smack_cipso));
+	return 0;
+}
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
new file mode 100644
index 0000000..1c11e42
--- /dev/null
+++ b/security/smack/smack_lsm.c
@@ -0,0 +1,2518 @@
+/*
+ *  Simplified MAC Kernel (smack) security module
+ *
+ *  This file contains the smack hook function implementations.
+ *
+ *  Author:
+ *	Casey Schaufler <casey@schaufler-ca.com>
+ *
+ *  Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com>
+ *
+ *	This program is free software; you can redistribute it and/or modify
+ *	it under the terms of the GNU General Public License version 2,
+ *      as published by the Free Software Foundation.
+ */
+
+#include <linux/xattr.h>
+#include <linux/pagemap.h>
+#include <linux/mount.h>
+#include <linux/stat.h>
+#include <linux/ext2_fs.h>
+#include <linux/kd.h>
+#include <asm/ioctls.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/mutex.h>
+#include <linux/pipe_fs_i.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+
+#include "smack.h"
+
+/*
+ * I hope these are the hokeyist lines of code in the module. Casey.
+ */
+#define DEVPTS_SUPER_MAGIC	0x1cd1
+#define SOCKFS_MAGIC		0x534F434B
+#define TMPFS_MAGIC		0x01021994
+
+/**
+ * smk_fetch - Fetch the smack label from a file.
+ * @ip: a pointer to the inode
+ * @dp: a pointer to the dentry
+ *
+ * Returns a pointer to the master list entry for the Smack label
+ * or NULL if there was no label to fetch.
+ */
+static char *smk_fetch(struct inode *ip, struct dentry *dp)
+{
+	int rc;
+	char in[SMK_LABELLEN];
+
+	if (ip->i_op->getxattr == NULL)
+		return NULL;
+
+	rc = ip->i_op->getxattr(dp, XATTR_NAME_SMACK, in, SMK_LABELLEN);
+	if (rc < 0)
+		return NULL;
+
+	return smk_import(in, rc);
+}
+
+/**
+ * new_inode_smack - allocate an inode security blob
+ * @smack: a pointer to the Smack label to use in the blob
+ *
+ * Returns the new blob or NULL if there's no memory available
+ */
+struct inode_smack *new_inode_smack(char *smack)
+{
+	struct inode_smack *isp;
+
+	isp = kzalloc(sizeof(struct inode_smack), GFP_KERNEL);
+	if (isp == NULL)
+		return NULL;
+
+	isp->smk_inode = smack;
+	isp->smk_flags = 0;
+	mutex_init(&isp->smk_lock);
+
+	return isp;
+}
+
+/*
+ * LSM hooks.
+ * We he, that is fun!
+ */
+
+/**
+ * smack_ptrace - Smack approval on ptrace
+ * @ptp: parent task pointer
+ * @ctp: child task pointer
+ *
+ * Returns 0 if access is OK, an error code otherwise
+ *
+ * Do the capability checks, and require read and write.
+ */
+static int smack_ptrace(struct task_struct *ptp, struct task_struct *ctp)
+{
+	int rc;
+
+	rc = cap_ptrace(ptp, ctp);
+	if (rc != 0)
+		return rc;
+
+	rc = smk_access(ptp->security, ctp->security, MAY_READWRITE);
+	if (rc != 0 && __capable(ptp, CAP_MAC_OVERRIDE))
+		return 0;
+
+	return rc;
+}
+
+/**
+ * smack_syslog - Smack approval on syslog
+ * @type: message type
+ *
+ * Require that the task has the floor label
+ *
+ * Returns 0 on success, error code otherwise.
+ */
+static int smack_syslog(int type)
+{
+	int rc;
+	char *sp = current->security;
+
+	rc = cap_syslog(type);
+	if (rc != 0)
+		return rc;
+
+	if (capable(CAP_MAC_OVERRIDE))
+		return 0;
+
+	 if (sp != smack_known_floor.smk_known)
+		rc = -EACCES;
+
+	return rc;
+}
+
+
+/*
+ * Superblock Hooks.
+ */
+
+/**
+ * smack_sb_alloc_security - allocate a superblock blob
+ * @sb: the superblock getting the blob
+ *
+ * Returns 0 on success or -ENOMEM on error.
+ */
+static int smack_sb_alloc_security(struct super_block *sb)
+{
+	struct superblock_smack *sbsp;
+
+	sbsp = kzalloc(sizeof(struct superblock_smack), GFP_KERNEL);
+
+	if (sbsp == NULL)
+		return -ENOMEM;
+
+	sbsp->smk_root = smack_known_floor.smk_known;
+	sbsp->smk_default = smack_known_floor.smk_known;
+	sbsp->smk_floor = smack_known_floor.smk_known;
+	sbsp->smk_hat = smack_known_hat.smk_known;
+	sbsp->smk_initialized = 0;
+	spin_lock_init(&sbsp->smk_sblock);
+
+	sb->s_security = sbsp;
+
+	return 0;
+}
+
+/**
+ * smack_sb_free_security - free a superblock blob
+ * @sb: the superblock getting the blob
+ *
+ */
+static void smack_sb_free_security(struct super_block *sb)
+{
+	kfree(sb->s_security);
+	sb->s_security = NULL;
+}
+
+/**
+ * smack_sb_copy_data - copy mount options data for processing
+ * @type: file system type
+ * @orig: where to start
+ * @smackopts
+ *
+ * Returns 0 on success or -ENOMEM on error.
+ *
+ * Copy the Smack specific mount options out of the mount
+ * options list.
+ */
+static int smack_sb_copy_data(struct file_system_type *type, void *orig,
+			      void *smackopts)
+{
+	char *cp, *commap, *otheropts, *dp;
+
+	/* Binary mount data: just copy */
+	if (type->fs_flags & FS_BINARY_MOUNTDATA) {
+		copy_page(smackopts, orig);
+		return 0;
+	}
+
+	otheropts = (char *)get_zeroed_page(GFP_KERNEL);
+	if (otheropts == NULL)
+		return -ENOMEM;
+
+	for (cp = orig, commap = orig; commap != NULL; cp = commap + 1) {
+		if (strstr(cp, SMK_FSDEFAULT) == cp)
+			dp = smackopts;
+		else if (strstr(cp, SMK_FSFLOOR) == cp)
+			dp = smackopts;
+		else if (strstr(cp, SMK_FSHAT) == cp)
+			dp = smackopts;
+		else if (strstr(cp, SMK_FSROOT) == cp)
+			dp = smackopts;
+		else
+			dp = otheropts;
+
+		commap = strchr(cp, ',');
+		if (commap != NULL)
+			*commap = '\0';
+
+		if (*dp != '\0')
+			strcat(dp, ",");
+		strcat(dp, cp);
+	}
+
+	strcpy(orig, otheropts);
+	free_page((unsigned long)otheropts);
+
+	return 0;
+}
+
+/**
+ * smack_sb_kern_mount - Smack specific mount processing
+ * @sb: the file system superblock
+ * @data: the smack mount options
+ *
+ * Returns 0 on success, an error code on failure
+ */
+static int smack_sb_kern_mount(struct super_block *sb, void *data)
+{
+	struct dentry *root = sb->s_root;
+	struct inode *inode = root->d_inode;
+	struct superblock_smack *sp = sb->s_security;
+	struct inode_smack *isp;
+	char *op;
+	char *commap;
+	char *nsp;
+
+	spin_lock(&sp->smk_sblock);
+	if (sp->smk_initialized != 0) {
+		spin_unlock(&sp->smk_sblock);
+		return 0;
+	}
+	sp->smk_initialized = 1;
+	spin_unlock(&sp->smk_sblock);
+
+	for (op = data; op != NULL; op = commap) {
+		commap = strchr(op, ',');
+		if (commap != NULL)
+			*commap++ = '\0';
+
+		if (strncmp(op, SMK_FSHAT, strlen(SMK_FSHAT)) == 0) {
+			op += strlen(SMK_FSHAT);
+			nsp = smk_import(op, 0);
+			if (nsp != NULL)
+				sp->smk_hat = nsp;
+		} else if (strncmp(op, SMK_FSFLOOR, strlen(SMK_FSFLOOR)) == 0) {
+			op += strlen(SMK_FSFLOOR);
+			nsp = smk_import(op, 0);
+			if (nsp != NULL)
+				sp->smk_floor = nsp;
+		} else if (strncmp(op, SMK_FSDEFAULT,
+				   strlen(SMK_FSDEFAULT)) == 0) {
+			op += strlen(SMK_FSDEFAULT);
+			nsp = smk_import(op, 0);
+			if (nsp != NULL)
+				sp->smk_default = nsp;
+		} else if (strncmp(op, SMK_FSROOT, strlen(SMK_FSROOT)) == 0) {
+			op += strlen(SMK_FSROOT);
+			nsp = smk_import(op, 0);
+			if (nsp != NULL)
+				sp->smk_root = nsp;
+		}
+	}
+
+	/*
+	 * Initialize the root inode.
+	 */
+	isp = inode->i_security;
+	if (isp == NULL)
+		inode->i_security = new_inode_smack(sp->smk_root);
+	else
+		isp->smk_inode = sp->smk_root;
+
+	return 0;
+}
+
+/**
+ * smack_sb_statfs - Smack check on statfs
+ * @dentry: identifies the file system in question
+ *
+ * Returns 0 if current can read the floor of the filesystem,
+ * and error code otherwise
+ */
+static int smack_sb_statfs(struct dentry *dentry)
+{
+	struct superblock_smack *sbp = dentry->d_sb->s_security;
+
+	return smk_curacc(sbp->smk_floor, MAY_READ);
+}
+
+/**
+ * smack_sb_mount - Smack check for mounting
+ * @dev_name: unused
+ * @nd: mount point
+ * @type: unused
+ * @flags: unused
+ * @data: unused
+ *
+ * Returns 0 if current can write the floor of the filesystem
+ * being mounted on, an error code otherwise.
+ */
+static int smack_sb_mount(char *dev_name, struct nameidata *nd,
+			  char *type, unsigned long flags, void *data)
+{
+	struct superblock_smack *sbp = nd->mnt->mnt_sb->s_security;
+
+	return smk_curacc(sbp->smk_floor, MAY_WRITE);
+}
+
+/**
+ * smack_sb_umount - Smack check for unmounting
+ * @mnt: file system to unmount
+ * @flags: unused
+ *
+ * Returns 0 if current can write the floor of the filesystem
+ * being unmounted, an error code otherwise.
+ */
+static int smack_sb_umount(struct vfsmount *mnt, int flags)
+{
+	struct superblock_smack *sbp;
+
+	sbp = mnt->mnt_sb->s_security;
+
+	return smk_curacc(sbp->smk_floor, MAY_WRITE);
+}
+
+/*
+ * Inode hooks
+ */
+
+/**
+ * smack_inode_alloc_security - allocate an inode blob
+ * @inode - the inode in need of a blob
+ *
+ * Returns 0 if it gets a blob, -ENOMEM otherwise
+ */
+static int smack_inode_alloc_security(struct inode *inode)
+{
+	inode->i_security = new_inode_smack(current->security);
+	if (inode->i_security == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+/**
+ * smack_inode_free_security - free an inode blob
+ * @inode - the inode with a blob
+ *
+ * Clears the blob pointer in inode
+ */
+static void smack_inode_free_security(struct inode *inode)
+{
+	kfree(inode->i_security);
+	inode->i_security = NULL;
+}
+
+/**
+ * smack_inode_init_security - copy out the smack from an inode
+ * @inode: the inode
+ * @dir: unused
+ * @name: where to put the attribute name
+ * @value: where to put the attribute value
+ * @len: where to put the length of the attribute
+ *
+ * Returns 0 if it all works out, -ENOMEM if there's no memory
+ */
+static int smack_inode_init_security(struct inode *inode, struct inode *dir,
+				     char **name, void **value, size_t *len)
+{
+	char *isp = smk_of_inode(inode);
+
+	if (name) {
+		*name = kstrdup(XATTR_SMACK_SUFFIX, GFP_KERNEL);
+		if (*name == NULL)
+			return -ENOMEM;
+	}
+
+	if (value) {
+		*value = kstrdup(isp, GFP_KERNEL);
+		if (*value == NULL)
+			return -ENOMEM;
+	}
+
+	if (len)
+		*len = strlen(isp) + 1;
+
+	return 0;
+}
+
+/**
+ * smack_inode_link - Smack check on link
+ * @old_dentry: the existing object
+ * @dir: unused
+ * @new_dentry: the new object
+ *
+ * Returns 0 if access is permitted, an error code otherwise
+ */
+static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
+			    struct dentry *new_dentry)
+{
+	int rc;
+	char *isp;
+
+	isp = smk_of_inode(old_dentry->d_inode);
+	rc = smk_curacc(isp, MAY_WRITE);
+
+	if (rc == 0 && new_dentry->d_inode != NULL) {
+		isp = smk_of_inode(new_dentry->d_inode);
+		rc = smk_curacc(isp, MAY_WRITE);
+	}
+
+	return rc;
+}
+
+/**
+ * smack_inode_unlink - Smack check on inode deletion
+ * @dir: containing directory object
+ * @dentry: file to unlink
+ *
+ * Returns 0 if current can write the containing directory
+ * and the object, error code otherwise
+ */
+static int smack_inode_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *ip = dentry->d_inode;
+	int rc;
+
+	/*
+	 * You need write access to the thing you're unlinking
+	 */
+	rc = smk_curacc(smk_of_inode(ip), MAY_WRITE);
+	if (rc == 0)
+		/*
+		 * You also need write access to the containing directory
+		 */
+		rc = smk_curacc(smk_of_inode(dir), MAY_WRITE);
+
+	return rc;
+}
+
+/**
+ * smack_inode_rmdir - Smack check on directory deletion
+ * @dir: containing directory object
+ * @dentry: directory to unlink
+ *
+ * Returns 0 if current can write the containing directory
+ * and the directory, error code otherwise
+ */
+static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int rc;
+
+	/*
+	 * You need write access to the thing you're removing
+	 */
+	rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
+	if (rc == 0)
+		/*
+		 * You also need write access to the containing directory
+		 */
+		rc = smk_curacc(smk_of_inode(dir), MAY_WRITE);
+
+	return rc;
+}
+
+/**
+ * smack_inode_rename - Smack check on rename
+ * @old_inode: the old directory
+ * @old_dentry: unused
+ * @new_inode: the new directory
+ * @new_dentry: unused
+ *
+ * Read and write access is required on both the old and
+ * new directories.
+ *
+ * Returns 0 if access is permitted, an error code otherwise
+ */
+static int smack_inode_rename(struct inode *old_inode,
+			      struct dentry *old_dentry,
+			      struct inode *new_inode,
+			      struct dentry *new_dentry)
+{
+	int rc;
+	char *isp;
+
+	isp = smk_of_inode(old_dentry->d_inode);
+	rc = smk_curacc(isp, MAY_READWRITE);
+
+	if (rc == 0 && new_dentry->d_inode != NULL) {
+		isp = smk_of_inode(new_dentry->d_inode);
+		rc = smk_curacc(isp, MAY_READWRITE);
+	}
+
+	return rc;
+}
+
+/**
+ * smack_inode_permission - Smack version of permission()
+ * @inode: the inode in question
+ * @mask: the access requested
+ * @nd: unused
+ *
+ * This is the important Smack hook.
+ *
+ * Returns 0 if access is permitted, -EACCES otherwise
+ */
+static int smack_inode_permission(struct inode *inode, int mask,
+				  struct nameidata *nd)
+{
+	/*
+	 * No permission to check. Existence test. Yup, it's there.
+	 */
+	if (mask == 0)
+		return 0;
+
+	return smk_curacc(smk_of_inode(inode), mask);
+}
+
+/**
+ * smack_inode_setattr - Smack check for setting attributes
+ * @dentry: the object
+ * @iattr: for the force flag
+ *
+ * Returns 0 if access is permitted, an error code otherwise
+ */
+static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	/*
+	 * Need to allow for clearing the setuid bit.
+	 */
+	if (iattr->ia_valid & ATTR_FORCE)
+		return 0;
+
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
+}
+
+/**
+ * smack_inode_getattr - Smack check for getting attributes
+ * @mnt: unused
+ * @dentry: the object
+ *
+ * Returns 0 if access is permitted, an error code otherwise
+ */
+static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+{
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ);
+}
+
+/**
+ * smack_inode_setxattr - Smack check for setting xattrs
+ * @dentry: the object
+ * @name: name of the attribute
+ * @value: unused
+ * @size: unused
+ * @flags: unused
+ *
+ * This protects the Smack attribute explicitly.
+ *
+ * Returns 0 if access is permitted, an error code otherwise
+ */
+static int smack_inode_setxattr(struct dentry *dentry, char *name,
+				void *value, size_t size, int flags)
+{
+	if (!capable(CAP_MAC_ADMIN)) {
+		if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
+		    strcmp(name, XATTR_NAME_SMACKIPIN) == 0 ||
+		    strcmp(name, XATTR_NAME_SMACKIPOUT) == 0)
+			return -EPERM;
+	}
+
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
+}
+
+/**
+ * smack_inode_post_setxattr - Apply the Smack update approved above
+ * @dentry: object
+ * @name: attribute name
+ * @value: attribute value
+ * @size: attribute size
+ * @flags: unused
+ *
+ * Set the pointer in the inode blob to the entry found
+ * in the master label list.
+ */
+static void smack_inode_post_setxattr(struct dentry *dentry, char *name,
+				      void *value, size_t size, int flags)
+{
+	struct inode_smack *isp;
+	char *nsp;
+
+	/*
+	 * Not SMACK
+	 */
+	if (strcmp(name, XATTR_NAME_SMACK))
+		return;
+
+	if (size >= SMK_LABELLEN)
+		return;
+
+	isp = dentry->d_inode->i_security;
+
+	/*
+	 * No locking is done here. This is a pointer
+	 * assignment.
+	 */
+	nsp = smk_import(value, size);
+	if (nsp != NULL)
+		isp->smk_inode = nsp;
+	else
+		isp->smk_inode = smack_known_invalid.smk_known;
+
+	return;
+}
+
+/*
+ * smack_inode_getxattr - Smack check on getxattr
+ * @dentry: the object
+ * @name: unused
+ *
+ * Returns 0 if access is permitted, an error code otherwise
+ */
+static int smack_inode_getxattr(struct dentry *dentry, char *name)
+{
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ);
+}
+
+/*
+ * smack_inode_removexattr - Smack check on removexattr
+ * @dentry: the object
+ * @name: name of the attribute
+ *
+ * Removing the Smack attribute requires CAP_MAC_ADMIN
+ *
+ * Returns 0 if access is permitted, an error code otherwise
+ */
+static int smack_inode_removexattr(struct dentry *dentry, char *name)
+{
+	if (strcmp(name, XATTR_NAME_SMACK) == 0 && !capable(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
+}
+
+/**
+ * smack_inode_getsecurity - get smack xattrs
+ * @inode: the object
+ * @name: attribute name
+ * @buffer: where to put the result
+ * @size: size of the buffer
+ * @err: unused
+ *
+ * Returns the size of the attribute or an error code
+ */
+static int smack_inode_getsecurity(const struct inode *inode,
+				   const char *name, void **buffer,
+				   bool alloc)
+{
+	struct socket_smack *ssp;
+	struct socket *sock;
+	struct super_block *sbp;
+	struct inode *ip = (struct inode *)inode;
+	char *isp;
+	int ilen;
+	int rc = 0;
+
+	if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) {
+		isp = smk_of_inode(inode);
+		ilen = strlen(isp) + 1;
+		*buffer = isp;
+		return ilen;
+	}
+
+	/*
+	 * The rest of the Smack xattrs are only on sockets.
+	 */
+	sbp = ip->i_sb;
+	if (sbp->s_magic != SOCKFS_MAGIC)
+		return -EOPNOTSUPP;
+
+	sock = SOCKET_I(ip);
+	if (sock == NULL)
+		return -EOPNOTSUPP;
+
+	ssp = sock->sk->sk_security;
+
+	if (strcmp(name, XATTR_SMACK_IPIN) == 0)
+		isp = ssp->smk_in;
+	else if (strcmp(name, XATTR_SMACK_IPOUT) == 0)
+		isp = ssp->smk_out;
+	else
+		return -EOPNOTSUPP;
+
+	ilen = strlen(isp) + 1;
+	if (rc == 0) {
+		*buffer = isp;
+		rc = ilen;
+	}
+
+	return rc;
+}
+
+
+/**
+ * smack_inode_listsecurity - list the Smack attributes
+ * @inode: the object
+ * @buffer: where they go
+ * @buffer_size: size of buffer
+ *
+ * Returns 0 on success, -EINVAL otherwise
+ */
+static int smack_inode_listsecurity(struct inode *inode, char *buffer,
+				    size_t buffer_size)
+{
+	int len = strlen(XATTR_NAME_SMACK);
+
+	if (buffer != NULL && len <= buffer_size) {
+		memcpy(buffer, XATTR_NAME_SMACK, len);
+		return len;
+	}
+	return -EINVAL;
+}
+
+/*
+ * File Hooks
+ */
+
+/**
+ * smack_file_permission - Smack check on file operations
+ * @file: unused
+ * @mask: unused
+ *
+ * Returns 0
+ *
+ * Should access checks be done on each read or write?
+ * UNICOS and SELinux say yes.
+ * Trusted Solaris, Trusted Irix, and just about everyone else says no.
+ *
+ * I'll say no for now. Smack does not do the frequent
+ * label changing that SELinux does.
+ */
+static int smack_file_permission(struct file *file, int mask)
+{
+	return 0;
+}
+
+/**
+ * smack_file_alloc_security - assign a file security blob
+ * @file: the object
+ *
+ * The security blob for a file is a pointer to the master
+ * label list, so no allocation is done.
+ *
+ * Returns 0
+ */
+static int smack_file_alloc_security(struct file *file)
+{
+	file->f_security = current->security;
+	return 0;
+}
+
+/**
+ * smack_file_free_security - clear a file security blob
+ * @file: the object
+ *
+ * The security blob for a file is a pointer to the master
+ * label list, so no memory is freed.
+ */
+static void smack_file_free_security(struct file *file)
+{
+	file->f_security = NULL;
+}
+
+/**
+ * smack_file_ioctl - Smack check on ioctls
+ * @file: the object
+ * @cmd: what to do
+ * @arg: unused
+ *
+ * Relies heavily on the correct use of the ioctl command conventions.
+ *
+ * Returns 0 if allowed, error code otherwise
+ */
+static int smack_file_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	int rc = 0;
+
+	if (_IOC_DIR(cmd) & _IOC_WRITE)
+		rc = smk_curacc(file->f_security, MAY_WRITE);
+
+	if (rc == 0 && (_IOC_DIR(cmd) & _IOC_READ))
+		rc = smk_curacc(file->f_security, MAY_READ);
+
+	return rc;
+}
+
+/**
+ * smack_file_lock - Smack check on file locking
+ * @file: the object
+ * @cmd unused
+ *
+ * Returns 0 if current has write access, error code otherwise
+ */
+static int smack_file_lock(struct file *file, unsigned int cmd)
+{
+	return smk_curacc(file->f_security, MAY_WRITE);
+}
+
+/**
+ * smack_file_fcntl - Smack check on fcntl
+ * @file: the object
+ * @cmd: what action to check
+ * @arg: unused
+ *
+ * Returns 0 if current has access, error code otherwise
+ */
+static int smack_file_fcntl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	int rc;
+
+	switch (cmd) {
+	case F_DUPFD:
+	case F_GETFD:
+	case F_GETFL:
+	case F_GETLK:
+	case F_GETOWN:
+	case F_GETSIG:
+		rc = smk_curacc(file->f_security, MAY_READ);
+		break;
+	case F_SETFD:
+	case F_SETFL:
+	case F_SETLK:
+	case F_SETLKW:
+	case F_SETOWN:
+	case F_SETSIG:
+		rc = smk_curacc(file->f_security, MAY_WRITE);
+		break;
+	default:
+		rc = smk_curacc(file->f_security, MAY_READWRITE);
+	}
+
+	return rc;
+}
+
+/**
+ * smack_file_set_fowner - set the file security blob value
+ * @file: object in question
+ *
+ * Returns 0
+ * Further research may be required on this one.
+ */
+static int smack_file_set_fowner(struct file *file)
+{
+	file->f_security = current->security;
+	return 0;
+}
+
+/**
+ * smack_file_send_sigiotask - Smack on sigio
+ * @tsk: The target task
+ * @fown: the object the signal come from
+ * @signum: unused
+ *
+ * Allow a privileged task to get signals even if it shouldn't
+ *
+ * Returns 0 if a subject with the object's smack could
+ * write to the task, an error code otherwise.
+ */
+static int smack_file_send_sigiotask(struct task_struct *tsk,
+				     struct fown_struct *fown, int signum)
+{
+	struct file *file;
+	int rc;
+
+	/*
+	 * struct fown_struct is never outside the context of a struct file
+	 */
+	file = container_of(fown, struct file, f_owner);
+	rc = smk_access(file->f_security, tsk->security, MAY_WRITE);
+	if (rc != 0 && __capable(tsk, CAP_MAC_OVERRIDE))
+		return 0;
+	return rc;
+}
+
+/**
+ * smack_file_receive - Smack file receive check
+ * @file: the object
+ *
+ * Returns 0 if current has access, error code otherwise
+ */
+static int smack_file_receive(struct file *file)
+{
+	int may = 0;
+
+	/*
+	 * This code relies on bitmasks.
+	 */
+	if (file->f_mode & FMODE_READ)
+		may = MAY_READ;
+	if (file->f_mode & FMODE_WRITE)
+		may |= MAY_WRITE;
+
+	return smk_curacc(file->f_security, may);
+}
+
+/*
+ * Task hooks
+ */
+
+/**
+ * smack_task_alloc_security - "allocate" a task blob
+ * @tsk: the task in need of a blob
+ *
+ * Smack isn't using copies of blobs. Everyone
+ * points to an immutable list. No alloc required.
+ * No data copy required.
+ *
+ * Always returns 0
+ */
+static int smack_task_alloc_security(struct task_struct *tsk)
+{
+	tsk->security = current->security;
+
+	return 0;
+}
+
+/**
+ * smack_task_free_security - "free" a task blob
+ * @task: the task with the blob
+ *
+ * Smack isn't using copies of blobs. Everyone
+ * points to an immutable list. The blobs never go away.
+ * There is no leak here.
+ */
+static void smack_task_free_security(struct task_struct *task)
+{
+	task->security = NULL;
+}
+
+/**
+ * smack_task_setpgid - Smack check on setting pgid
+ * @p: the task object
+ * @pgid: unused
+ *
+ * Return 0 if write access is permitted
+ */
+static int smack_task_setpgid(struct task_struct *p, pid_t pgid)
+{
+	return smk_curacc(p->security, MAY_WRITE);
+}
+
+/**
+ * smack_task_getpgid - Smack access check for getpgid
+ * @p: the object task
+ *
+ * Returns 0 if current can read the object task, error code otherwise
+ */
+static int smack_task_getpgid(struct task_struct *p)
+{
+	return smk_curacc(p->security, MAY_READ);
+}
+
+/**
+ * smack_task_getsid - Smack access check for getsid
+ * @p: the object task
+ *
+ * Returns 0 if current can read the object task, error code otherwise
+ */
+static int smack_task_getsid(struct task_struct *p)
+{
+	return smk_curacc(p->security, MAY_READ);
+}
+
+/**
+ * smack_task_getsecid - get the secid of the task
+ * @p: the object task
+ * @secid: where to put the result
+ *
+ * Sets the secid to contain a u32 version of the smack label.
+ */
+static void smack_task_getsecid(struct task_struct *p, u32 *secid)
+{
+	*secid = smack_to_secid(p->security);
+}
+
+/**
+ * smack_task_setnice - Smack check on setting nice
+ * @p: the task object
+ * @nice: unused
+ *
+ * Return 0 if write access is permitted
+ */
+static int smack_task_setnice(struct task_struct *p, int nice)
+{
+	return smk_curacc(p->security, MAY_WRITE);
+}
+
+/**
+ * smack_task_setioprio - Smack check on setting ioprio
+ * @p: the task object
+ * @ioprio: unused
+ *
+ * Return 0 if write access is permitted
+ */
+static int smack_task_setioprio(struct task_struct *p, int ioprio)
+{
+	return smk_curacc(p->security, MAY_WRITE);
+}
+
+/**
+ * smack_task_getioprio - Smack check on reading ioprio
+ * @p: the task object
+ *
+ * Return 0 if read access is permitted
+ */
+static int smack_task_getioprio(struct task_struct *p)
+{
+	return smk_curacc(p->security, MAY_READ);
+}
+
+/**
+ * smack_task_setscheduler - Smack check on setting scheduler
+ * @p: the task object
+ * @policy: unused
+ * @lp: unused
+ *
+ * Return 0 if read access is permitted
+ */
+static int smack_task_setscheduler(struct task_struct *p, int policy,
+				   struct sched_param *lp)
+{
+	return smk_curacc(p->security, MAY_WRITE);
+}
+
+/**
+ * smack_task_getscheduler - Smack check on reading scheduler
+ * @p: the task object
+ *
+ * Return 0 if read access is permitted
+ */
+static int smack_task_getscheduler(struct task_struct *p)
+{
+	return smk_curacc(p->security, MAY_READ);
+}
+
+/**
+ * smack_task_movememory - Smack check on moving memory
+ * @p: the task object
+ *
+ * Return 0 if write access is permitted
+ */
+static int smack_task_movememory(struct task_struct *p)
+{
+	return smk_curacc(p->security, MAY_WRITE);
+}
+
+/**
+ * smack_task_kill - Smack check on signal delivery
+ * @p: the task object
+ * @info: unused
+ * @sig: unused
+ * @secid: identifies the smack to use in lieu of current's
+ *
+ * Return 0 if write access is permitted
+ *
+ * The secid behavior is an artifact of an SELinux hack
+ * in the USB code. Someday it may go away.
+ */
+static int smack_task_kill(struct task_struct *p, struct siginfo *info,
+			   int sig, u32 secid)
+{
+	/*
+	 * Special cases where signals really ought to go through
+	 * in spite of policy. Stephen Smalley suggests it may
+	 * make sense to change the caller so that it doesn't
+	 * bother with the LSM hook in these cases.
+	 */
+	if (info != SEND_SIG_NOINFO &&
+	    (is_si_special(info) || SI_FROMKERNEL(info)))
+		return 0;
+	/*
+	 * Sending a signal requires that the sender
+	 * can write the receiver.
+	 */
+	if (secid == 0)
+		return smk_curacc(p->security, MAY_WRITE);
+	/*
+	 * If the secid isn't 0 we're dealing with some USB IO
+	 * specific behavior. This is not clean. For one thing
+	 * we can't take privilege into account.
+	 */
+	return smk_access(smack_from_secid(secid), p->security, MAY_WRITE);
+}
+
+/**
+ * smack_task_wait - Smack access check for waiting
+ * @p: task to wait for
+ *
+ * Returns 0 if current can wait for p, error code otherwise
+ */
+static int smack_task_wait(struct task_struct *p)
+{
+	int rc;
+
+	rc = smk_access(current->security, p->security, MAY_WRITE);
+	if (rc == 0)
+		return 0;
+
+	/*
+	 * Allow the operation to succeed if either task
+	 * has privilege to perform operations that might
+	 * account for the smack labels having gotten to
+	 * be different in the first place.
+	 *
+	 * This breaks the strict subjet/object access
+	 * control ideal, taking the object's privilege
+	 * state into account in the decision as well as
+	 * the smack value.
+	 */
+	if (capable(CAP_MAC_OVERRIDE) || __capable(p, CAP_MAC_OVERRIDE))
+		return 0;
+
+	return rc;
+}
+
+/**
+ * smack_task_to_inode - copy task smack into the inode blob
+ * @p: task to copy from
+ * inode: inode to copy to
+ *
+ * Sets the smack pointer in the inode security blob
+ */
+static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
+{
+	struct inode_smack *isp = inode->i_security;
+	isp->smk_inode = p->security;
+}
+
+/*
+ * Socket hooks.
+ */
+
+/**
+ * smack_sk_alloc_security - Allocate a socket blob
+ * @sk: the socket
+ * @family: unused
+ * @priority: memory allocation priority
+ *
+ * Assign Smack pointers to current
+ *
+ * Returns 0 on success, -ENOMEM is there's no memory
+ */
+static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
+{
+	char *csp = current->security;
+	struct socket_smack *ssp;
+
+	ssp = kzalloc(sizeof(struct socket_smack), gfp_flags);
+	if (ssp == NULL)
+		return -ENOMEM;
+
+	ssp->smk_in = csp;
+	ssp->smk_out = csp;
+	ssp->smk_packet[0] = '\0';
+
+	sk->sk_security = ssp;
+
+	return 0;
+}
+
+/**
+ * smack_sk_free_security - Free a socket blob
+ * @sk: the socket
+ *
+ * Clears the blob pointer
+ */
+static void smack_sk_free_security(struct sock *sk)
+{
+	kfree(sk->sk_security);
+}
+
+/**
+ * smack_set_catset - convert a capset to netlabel mls categories
+ * @catset: the Smack categories
+ * @sap: where to put the netlabel categories
+ *
+ * Allocates and fills attr.mls.cat
+ */
+static void smack_set_catset(char *catset, struct netlbl_lsm_secattr *sap)
+{
+	unsigned char *cp;
+	unsigned char m;
+	int cat;
+	int rc;
+	int byte;
+
+	if (catset == 0)
+		return;
+
+	sap->flags |= NETLBL_SECATTR_MLS_CAT;
+	sap->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC);
+	sap->attr.mls.cat->startbit = 0;
+
+	for (cat = 1, cp = catset, byte = 0; byte < SMK_LABELLEN; cp++, byte++)
+		for (m = 0x80; m != 0; m >>= 1, cat++) {
+			if ((m & *cp) == 0)
+				continue;
+			rc = netlbl_secattr_catmap_setbit(sap->attr.mls.cat,
+							  cat, GFP_ATOMIC);
+		}
+}
+
+/**
+ * smack_to_secattr - fill a secattr from a smack value
+ * @smack: the smack value
+ * @nlsp: where the result goes
+ *
+ * Casey says that CIPSO is good enough for now.
+ * It can be used to effect.
+ * It can also be abused to effect when necessary.
+ * Appologies to the TSIG group in general and GW in particular.
+ */
+static void smack_to_secattr(char *smack, struct netlbl_lsm_secattr *nlsp)
+{
+	struct smack_cipso cipso;
+	int rc;
+
+	switch (smack_net_nltype) {
+	case NETLBL_NLTYPE_CIPSOV4:
+		nlsp->domain = NULL;
+		nlsp->flags = NETLBL_SECATTR_DOMAIN;
+		nlsp->flags |= NETLBL_SECATTR_MLS_LVL;
+
+		rc = smack_to_cipso(smack, &cipso);
+		if (rc == 0) {
+			nlsp->attr.mls.lvl = cipso.smk_level;
+			smack_set_catset(cipso.smk_catset, nlsp);
+		} else {
+			nlsp->attr.mls.lvl = smack_cipso_direct;
+			smack_set_catset(smack, nlsp);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * smack_netlabel - Set the secattr on a socket
+ * @sk: the socket
+ *
+ * Convert the outbound smack value (smk_out) to a
+ * secattr and attach it to the socket.
+ *
+ * Returns 0 on success or an error code
+ */
+static int smack_netlabel(struct sock *sk)
+{
+	struct socket_smack *ssp = sk->sk_security;
+	struct netlbl_lsm_secattr secattr;
+	int rc = 0;
+
+	netlbl_secattr_init(&secattr);
+	smack_to_secattr(ssp->smk_out, &secattr);
+	if (secattr.flags != NETLBL_SECATTR_NONE)
+		rc = netlbl_sock_setattr(sk, &secattr);
+
+	netlbl_secattr_destroy(&secattr);
+	return rc;
+}
+
+/**
+ * smack_inode_setsecurity - set smack xattrs
+ * @inode: the object
+ * @name: attribute name
+ * @value: attribute value
+ * @size: size of the attribute
+ * @flags: unused
+ *
+ * Sets the named attribute in the appropriate blob
+ *
+ * Returns 0 on success, or an error code
+ */
+static int smack_inode_setsecurity(struct inode *inode, const char *name,
+				   const void *value, size_t size, int flags)
+{
+	char *sp;
+	struct inode_smack *nsp = inode->i_security;
+	struct socket_smack *ssp;
+	struct socket *sock;
+
+	if (value == NULL || size > SMK_LABELLEN)
+		return -EACCES;
+
+	sp = smk_import(value, size);
+	if (sp == NULL)
+		return -EINVAL;
+
+	if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) {
+		nsp->smk_inode = sp;
+		return 0;
+	}
+	/*
+	 * The rest of the Smack xattrs are only on sockets.
+	 */
+	if (inode->i_sb->s_magic != SOCKFS_MAGIC)
+		return -EOPNOTSUPP;
+
+	sock = SOCKET_I(inode);
+	if (sock == NULL)
+		return -EOPNOTSUPP;
+
+	ssp = sock->sk->sk_security;
+
+	if (strcmp(name, XATTR_SMACK_IPIN) == 0)
+		ssp->smk_in = sp;
+	else if (strcmp(name, XATTR_SMACK_IPOUT) == 0) {
+		ssp->smk_out = sp;
+		return smack_netlabel(sock->sk);
+	} else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * smack_socket_post_create - finish socket setup
+ * @sock: the socket
+ * @family: protocol family
+ * @type: unused
+ * @protocol: unused
+ * @kern: unused
+ *
+ * Sets the netlabel information on the socket
+ *
+ * Returns 0 on success, and error code otherwise
+ */
+static int smack_socket_post_create(struct socket *sock, int family,
+				    int type, int protocol, int kern)
+{
+	if (family != PF_INET)
+		return 0;
+	/*
+	 * Set the outbound netlbl.
+	 */
+	return smack_netlabel(sock->sk);
+}
+
+/**
+ * smack_flags_to_may - convert S_ to MAY_ values
+ * @flags: the S_ value
+ *
+ * Returns the equivalent MAY_ value
+ */
+static int smack_flags_to_may(int flags)
+{
+	int may = 0;
+
+	if (flags & S_IRUGO)
+		may |= MAY_READ;
+	if (flags & S_IWUGO)
+		may |= MAY_WRITE;
+	if (flags & S_IXUGO)
+		may |= MAY_EXEC;
+
+	return may;
+}
+
+/**
+ * smack_msg_msg_alloc_security - Set the security blob for msg_msg
+ * @msg: the object
+ *
+ * Returns 0
+ */
+static int smack_msg_msg_alloc_security(struct msg_msg *msg)
+{
+	msg->security = current->security;
+	return 0;
+}
+
+/**
+ * smack_msg_msg_free_security - Clear the security blob for msg_msg
+ * @msg: the object
+ *
+ * Clears the blob pointer
+ */
+static void smack_msg_msg_free_security(struct msg_msg *msg)
+{
+	msg->security = NULL;
+}
+
+/**
+ * smack_of_shm - the smack pointer for the shm
+ * @shp: the object
+ *
+ * Returns a pointer to the smack value
+ */
+static char *smack_of_shm(struct shmid_kernel *shp)
+{
+	return (char *)shp->shm_perm.security;
+}
+
+/**
+ * smack_shm_alloc_security - Set the security blob for shm
+ * @shp: the object
+ *
+ * Returns 0
+ */
+static int smack_shm_alloc_security(struct shmid_kernel *shp)
+{
+	struct kern_ipc_perm *isp = &shp->shm_perm;
+
+	isp->security = current->security;
+	return 0;
+}
+
+/**
+ * smack_shm_free_security - Clear the security blob for shm
+ * @shp: the object
+ *
+ * Clears the blob pointer
+ */
+static void smack_shm_free_security(struct shmid_kernel *shp)
+{
+	struct kern_ipc_perm *isp = &shp->shm_perm;
+
+	isp->security = NULL;
+}
+
+/**
+ * smack_shm_associate - Smack access check for shm
+ * @shp: the object
+ * @shmflg: access requested
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smack_shm_associate(struct shmid_kernel *shp, int shmflg)
+{
+	char *ssp = smack_of_shm(shp);
+	int may;
+
+	may = smack_flags_to_may(shmflg);
+	return smk_curacc(ssp, may);
+}
+
+/**
+ * smack_shm_shmctl - Smack access check for shm
+ * @shp: the object
+ * @cmd: what it wants to do
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd)
+{
+	char *ssp = smack_of_shm(shp);
+	int may;
+
+	switch (cmd) {
+	case IPC_STAT:
+	case SHM_STAT:
+		may = MAY_READ;
+		break;
+	case IPC_SET:
+	case SHM_LOCK:
+	case SHM_UNLOCK:
+	case IPC_RMID:
+		may = MAY_READWRITE;
+		break;
+	case IPC_INFO:
+	case SHM_INFO:
+		/*
+		 * System level information.
+		 */
+		return 0;
+	default:
+		return -EINVAL;
+	}
+
+	return smk_curacc(ssp, may);
+}
+
+/**
+ * smack_shm_shmat - Smack access for shmat
+ * @shp: the object
+ * @shmaddr: unused
+ * @shmflg: access requested
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smack_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr,
+			   int shmflg)
+{
+	char *ssp = smack_of_shm(shp);
+	int may;
+
+	may = smack_flags_to_may(shmflg);
+	return smk_curacc(ssp, may);
+}
+
+/**
+ * smack_of_sem - the smack pointer for the sem
+ * @sma: the object
+ *
+ * Returns a pointer to the smack value
+ */
+static char *smack_of_sem(struct sem_array *sma)
+{
+	return (char *)sma->sem_perm.security;
+}
+
+/**
+ * smack_sem_alloc_security - Set the security blob for sem
+ * @sma: the object
+ *
+ * Returns 0
+ */
+static int smack_sem_alloc_security(struct sem_array *sma)
+{
+	struct kern_ipc_perm *isp = &sma->sem_perm;
+
+	isp->security = current->security;
+	return 0;
+}
+
+/**
+ * smack_sem_free_security - Clear the security blob for sem
+ * @sma: the object
+ *
+ * Clears the blob pointer
+ */
+static void smack_sem_free_security(struct sem_array *sma)
+{
+	struct kern_ipc_perm *isp = &sma->sem_perm;
+
+	isp->security = NULL;
+}
+
+/**
+ * smack_sem_associate - Smack access check for sem
+ * @sma: the object
+ * @semflg: access requested
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smack_sem_associate(struct sem_array *sma, int semflg)
+{
+	char *ssp = smack_of_sem(sma);
+	int may;
+
+	may = smack_flags_to_may(semflg);
+	return smk_curacc(ssp, may);
+}
+
+/**
+ * smack_sem_shmctl - Smack access check for sem
+ * @sma: the object
+ * @cmd: what it wants to do
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smack_sem_semctl(struct sem_array *sma, int cmd)
+{
+	char *ssp = smack_of_sem(sma);
+	int may;
+
+	switch (cmd) {
+	case GETPID:
+	case GETNCNT:
+	case GETZCNT:
+	case GETVAL:
+	case GETALL:
+	case IPC_STAT:
+	case SEM_STAT:
+		may = MAY_READ;
+		break;
+	case SETVAL:
+	case SETALL:
+	case IPC_RMID:
+	case IPC_SET:
+		may = MAY_READWRITE;
+		break;
+	case IPC_INFO:
+	case SEM_INFO:
+		/*
+		 * System level information
+		 */
+		return 0;
+	default:
+		return -EINVAL;
+	}
+
+	return smk_curacc(ssp, may);
+}
+
+/**
+ * smack_sem_semop - Smack checks of semaphore operations
+ * @sma: the object
+ * @sops: unused
+ * @nsops: unused
+ * @alter: unused
+ *
+ * Treated as read and write in all cases.
+ *
+ * Returns 0 if access is allowed, error code otherwise
+ */
+static int smack_sem_semop(struct sem_array *sma, struct sembuf *sops,
+			   unsigned nsops, int alter)
+{
+	char *ssp = smack_of_sem(sma);
+
+	return smk_curacc(ssp, MAY_READWRITE);
+}
+
+/**
+ * smack_msg_alloc_security - Set the security blob for msg
+ * @msq: the object
+ *
+ * Returns 0
+ */
+static int smack_msg_queue_alloc_security(struct msg_queue *msq)
+{
+	struct kern_ipc_perm *kisp = &msq->q_perm;
+
+	kisp->security = current->security;
+	return 0;
+}
+
+/**
+ * smack_msg_free_security - Clear the security blob for msg
+ * @msq: the object
+ *
+ * Clears the blob pointer
+ */
+static void smack_msg_queue_free_security(struct msg_queue *msq)
+{
+	struct kern_ipc_perm *kisp = &msq->q_perm;
+
+	kisp->security = NULL;
+}
+
+/**
+ * smack_of_msq - the smack pointer for the msq
+ * @msq: the object
+ *
+ * Returns a pointer to the smack value
+ */
+static char *smack_of_msq(struct msg_queue *msq)
+{
+	return (char *)msq->q_perm.security;
+}
+
+/**
+ * smack_msg_queue_associate - Smack access check for msg_queue
+ * @msq: the object
+ * @msqflg: access requested
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg)
+{
+	char *msp = smack_of_msq(msq);
+	int may;
+
+	may = smack_flags_to_may(msqflg);
+	return smk_curacc(msp, may);
+}
+
+/**
+ * smack_msg_queue_msgctl - Smack access check for msg_queue
+ * @msq: the object
+ * @cmd: what it wants to do
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd)
+{
+	char *msp = smack_of_msq(msq);
+	int may;
+
+	switch (cmd) {
+	case IPC_STAT:
+	case MSG_STAT:
+		may = MAY_READ;
+		break;
+	case IPC_SET:
+	case IPC_RMID:
+		may = MAY_READWRITE;
+		break;
+	case IPC_INFO:
+	case MSG_INFO:
+		/*
+		 * System level information
+		 */
+		return 0;
+	default:
+		return -EINVAL;
+	}
+
+	return smk_curacc(msp, may);
+}
+
+/**
+ * smack_msg_queue_msgsnd - Smack access check for msg_queue
+ * @msq: the object
+ * @msg: unused
+ * @msqflg: access requested
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
+				  int msqflg)
+{
+	char *msp = smack_of_msq(msq);
+	int rc;
+
+	rc = smack_flags_to_may(msqflg);
+	return smk_curacc(msp, rc);
+}
+
+/**
+ * smack_msg_queue_msgsnd - Smack access check for msg_queue
+ * @msq: the object
+ * @msg: unused
+ * @target: unused
+ * @type: unused
+ * @mode: unused
+ *
+ * Returns 0 if current has read and write access, error code otherwise
+ */
+static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
+			struct task_struct *target, long type, int mode)
+{
+	char *msp = smack_of_msq(msq);
+
+	return smk_curacc(msp, MAY_READWRITE);
+}
+
+/**
+ * smack_ipc_permission - Smack access for ipc_permission()
+ * @ipp: the object permissions
+ * @flag: access requested
+ *
+ * Returns 0 if current has read and write access, error code otherwise
+ */
+static int smack_ipc_permission(struct kern_ipc_perm *ipp, short flag)
+{
+	char *isp = ipp->security;
+	int may;
+
+	may = smack_flags_to_may(flag);
+	return smk_curacc(isp, may);
+}
+
+/**
+ * smack_d_instantiate - Make sure the blob is correct on an inode
+ * @opt_dentry: unused
+ * @inode: the object
+ *
+ * Set the inode's security blob if it hasn't been done already.
+ */
+static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
+{
+	struct super_block *sbp;
+	struct superblock_smack *sbsp;
+	struct inode_smack *isp;
+	char *csp = current->security;
+	char *fetched;
+	char *final;
+	struct dentry *dp;
+
+	if (inode == NULL)
+		return;
+
+	isp = inode->i_security;
+
+	mutex_lock(&isp->smk_lock);
+	/*
+	 * If the inode is already instantiated
+	 * take the quick way out
+	 */
+	if (isp->smk_flags & SMK_INODE_INSTANT)
+		goto unlockandout;
+
+	sbp = inode->i_sb;
+	sbsp = sbp->s_security;
+	/*
+	 * We're going to use the superblock default label
+	 * if there's no label on the file.
+	 */
+	final = sbsp->smk_default;
+
+	/*
+	 * This is pretty hackish.
+	 * Casey says that we shouldn't have to do
+	 * file system specific code, but it does help
+	 * with keeping it simple.
+	 */
+	switch (sbp->s_magic) {
+	case SMACK_MAGIC:
+		/*
+		 * Casey says that it's a little embarassing
+		 * that the smack file system doesn't do
+		 * extended attributes.
+		 */
+		final = smack_known_star.smk_known;
+		break;
+	case PIPEFS_MAGIC:
+		/*
+		 * Casey says pipes are easy (?)
+		 */
+		final = smack_known_star.smk_known;
+		break;
+	case DEVPTS_SUPER_MAGIC:
+		/*
+		 * devpts seems content with the label of the task.
+		 * Programs that change smack have to treat the
+		 * pty with respect.
+		 */
+		final = csp;
+		break;
+	case SOCKFS_MAGIC:
+		/*
+		 * Casey says sockets get the smack of the task.
+		 */
+		final = csp;
+		break;
+	case PROC_SUPER_MAGIC:
+		/*
+		 * Casey says procfs appears not to care.
+		 * The superblock default suffices.
+		 */
+		break;
+	case TMPFS_MAGIC:
+		/*
+		 * Device labels should come from the filesystem,
+		 * but watch out, because they're volitile,
+		 * getting recreated on every reboot.
+		 */
+		final = smack_known_star.smk_known;
+		/*
+		 * No break.
+		 *
+		 * If a smack value has been set we want to use it,
+		 * but since tmpfs isn't giving us the opportunity
+		 * to set mount options simulate setting the
+		 * superblock default.
+		 */
+	default:
+		/*
+		 * This isn't an understood special case.
+		 * Get the value from the xattr.
+		 *
+		 * No xattr support means, alas, no SMACK label.
+		 * Use the aforeapplied default.
+		 * It would be curious if the label of the task
+		 * does not match that assigned.
+		 */
+		if (inode->i_op->getxattr == NULL)
+			break;
+		/*
+		 * Get the dentry for xattr.
+		 */
+		if (opt_dentry == NULL) {
+			dp = d_find_alias(inode);
+			if (dp == NULL)
+				break;
+		} else {
+			dp = dget(opt_dentry);
+			if (dp == NULL)
+				break;
+		}
+
+		fetched = smk_fetch(inode, dp);
+		if (fetched != NULL)
+			final = fetched;
+
+		dput(dp);
+		break;
+	}
+
+	if (final == NULL)
+		isp->smk_inode = csp;
+	else
+		isp->smk_inode = final;
+
+	isp->smk_flags |= SMK_INODE_INSTANT;
+
+unlockandout:
+	mutex_unlock(&isp->smk_lock);
+	return;
+}
+
+/**
+ * smack_getprocattr - Smack process attribute access
+ * @p: the object task
+ * @name: the name of the attribute in /proc/.../attr
+ * @value: where to put the result
+ *
+ * Places a copy of the task Smack into value
+ *
+ * Returns the length of the smack label or an error code
+ */
+static int smack_getprocattr(struct task_struct *p, char *name, char **value)
+{
+	char *cp;
+	int slen;
+
+	if (strcmp(name, "current") != 0)
+		return -EINVAL;
+
+	cp = kstrdup(p->security, GFP_KERNEL);
+	if (cp == NULL)
+		return -ENOMEM;
+
+	slen = strlen(cp);
+	*value = cp;
+	return slen;
+}
+
+/**
+ * smack_setprocattr - Smack process attribute setting
+ * @p: the object task
+ * @name: the name of the attribute in /proc/.../attr
+ * @value: the value to set
+ * @size: the size of the value
+ *
+ * Sets the Smack value of the task. Only setting self
+ * is permitted and only with privilege
+ *
+ * Returns the length of the smack label or an error code
+ */
+static int smack_setprocattr(struct task_struct *p, char *name,
+			     void *value, size_t size)
+{
+	char *newsmack;
+
+	if (!__capable(p, CAP_MAC_ADMIN))
+		return -EPERM;
+
+	/*
+	 * Changing another process' Smack value is too dangerous
+	 * and supports no sane use case.
+	 */
+	if (p != current)
+		return -EPERM;
+
+	if (value == NULL || size == 0 || size >= SMK_LABELLEN)
+		return -EINVAL;
+
+	if (strcmp(name, "current") != 0)
+		return -EINVAL;
+
+	newsmack = smk_import(value, size);
+	if (newsmack == NULL)
+		return -EINVAL;
+
+	p->security = newsmack;
+	return size;
+}
+
+/**
+ * smack_unix_stream_connect - Smack access on UDS
+ * @sock: one socket
+ * @other: the other socket
+ * @newsk: unused
+ *
+ * Return 0 if a subject with the smack of sock could access
+ * an object with the smack of other, otherwise an error code
+ */
+static int smack_unix_stream_connect(struct socket *sock,
+				     struct socket *other, struct sock *newsk)
+{
+	struct inode *sp = SOCK_INODE(sock);
+	struct inode *op = SOCK_INODE(other);
+
+	return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_READWRITE);
+}
+
+/**
+ * smack_unix_may_send - Smack access on UDS
+ * @sock: one socket
+ * @other: the other socket
+ *
+ * Return 0 if a subject with the smack of sock could access
+ * an object with the smack of other, otherwise an error code
+ */
+static int smack_unix_may_send(struct socket *sock, struct socket *other)
+{
+	struct inode *sp = SOCK_INODE(sock);
+	struct inode *op = SOCK_INODE(other);
+
+	return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE);
+}
+
+/**
+ * smack_from_secattr - Convert a netlabel attr.mls.lvl/attr.mls.cat
+ * 	pair to smack
+ * @sap: netlabel secattr
+ * @sip: where to put the result
+ *
+ * Copies a smack label into sip
+ */
+static void smack_from_secattr(struct netlbl_lsm_secattr *sap, char *sip)
+{
+	char smack[SMK_LABELLEN];
+	int pcat;
+
+	if ((sap->flags & NETLBL_SECATTR_MLS_LVL) == 0) {
+		/*
+		 * If there are flags but no level netlabel isn't
+		 * behaving the way we expect it to.
+		 *
+		 * Without guidance regarding the smack value
+		 * for the packet fall back on the network
+		 * ambient value.
+		 */
+		strncpy(sip, smack_net_ambient, SMK_MAXLEN);
+		return;
+	}
+	/*
+	 * Get the categories, if any
+	 */
+	memset(smack, '\0', SMK_LABELLEN);
+	if ((sap->flags & NETLBL_SECATTR_MLS_CAT) != 0)
+		for (pcat = -1;;) {
+			pcat = netlbl_secattr_catmap_walk(sap->attr.mls.cat,
+							  pcat + 1);
+			if (pcat < 0)
+				break;
+			smack_catset_bit(pcat, smack);
+		}
+	/*
+	 * If it is CIPSO using smack direct mapping
+	 * we are already done. WeeHee.
+	 */
+	if (sap->attr.mls.lvl == smack_cipso_direct) {
+		memcpy(sip, smack, SMK_MAXLEN);
+		return;
+	}
+	/*
+	 * Look it up in the supplied table if it is not a direct mapping.
+	 */
+	smack_from_cipso(sap->attr.mls.lvl, smack, sip);
+	return;
+}
+
+/**
+ * smack_socket_sock_rcv_skb - Smack packet delivery access check
+ * @sk: socket
+ * @skb: packet
+ *
+ * Returns 0 if the packet should be delivered, an error code otherwise
+ */
+static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct netlbl_lsm_secattr secattr;
+	struct socket_smack *ssp = sk->sk_security;
+	char smack[SMK_LABELLEN];
+	int rc;
+
+	if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+		return 0;
+
+	/*
+	 * Translate what netlabel gave us.
+	 */
+	memset(smack, '\0', SMK_LABELLEN);
+	netlbl_secattr_init(&secattr);
+	rc = netlbl_skbuff_getattr(skb, sk->sk_family, &secattr);
+	if (rc == 0)
+		smack_from_secattr(&secattr, smack);
+	else
+		strncpy(smack, smack_net_ambient, SMK_MAXLEN);
+	netlbl_secattr_destroy(&secattr);
+	/*
+	 * Receiving a packet requires that the other end
+	 * be able to write here. Read access is not required.
+	 * This is the simplist possible security model
+	 * for networking.
+	 */
+	return smk_access(smack, ssp->smk_in, MAY_WRITE);
+}
+
+/**
+ * smack_socket_getpeersec_stream - pull in packet label
+ * @sock: the socket
+ * @optval: user's destination
+ * @optlen: size thereof
+ * @len: max thereoe
+ *
+ * returns zero on success, an error code otherwise
+ */
+static int smack_socket_getpeersec_stream(struct socket *sock,
+					  char __user *optval,
+					  int __user *optlen, unsigned len)
+{
+	struct socket_smack *ssp;
+	int slen;
+	int rc = 0;
+
+	ssp = sock->sk->sk_security;
+	slen = strlen(ssp->smk_packet) + 1;
+
+	if (slen > len)
+		rc = -ERANGE;
+	else if (copy_to_user(optval, ssp->smk_packet, slen) != 0)
+		rc = -EFAULT;
+
+	if (put_user(slen, optlen) != 0)
+		rc = -EFAULT;
+
+	return rc;
+}
+
+
+/**
+ * smack_socket_getpeersec_dgram - pull in packet label
+ * @sock: the socket
+ * @skb: packet data
+ * @secid: pointer to where to put the secid of the packet
+ *
+ * Sets the netlabel socket state on sk from parent
+ */
+static int smack_socket_getpeersec_dgram(struct socket *sock,
+					 struct sk_buff *skb, u32 *secid)
+
+{
+	struct netlbl_lsm_secattr secattr;
+	struct sock *sk;
+	char smack[SMK_LABELLEN];
+	int family = PF_INET;
+	u32 s;
+	int rc;
+
+	/*
+	 * Only works for families with packets.
+	 */
+	if (sock != NULL) {
+		sk = sock->sk;
+		if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+			return 0;
+		family = sk->sk_family;
+	}
+	/*
+	 * Translate what netlabel gave us.
+	 */
+	memset(smack, '\0', SMK_LABELLEN);
+	netlbl_secattr_init(&secattr);
+	rc = netlbl_skbuff_getattr(skb, family, &secattr);
+	if (rc == 0)
+		smack_from_secattr(&secattr, smack);
+	netlbl_secattr_destroy(&secattr);
+
+	/*
+	 * Give up if we couldn't get anything
+	 */
+	if (rc != 0)
+		return rc;
+
+	s = smack_to_secid(smack);
+	if (s == 0)
+		return -EINVAL;
+
+	*secid = s;
+	return 0;
+}
+
+/**
+ * smack_sock_graft - graft access state between two sockets
+ * @sk: fresh sock
+ * @parent: donor socket
+ *
+ * Sets the netlabel socket state on sk from parent
+ */
+static void smack_sock_graft(struct sock *sk, struct socket *parent)
+{
+	struct socket_smack *ssp;
+	int rc;
+
+	if (sk == NULL)
+		return;
+
+	if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+		return;
+
+	ssp = sk->sk_security;
+	ssp->smk_in = current->security;
+	ssp->smk_out = current->security;
+	ssp->smk_packet[0] = '\0';
+
+	rc = smack_netlabel(sk);
+}
+
+/**
+ * smack_inet_conn_request - Smack access check on connect
+ * @sk: socket involved
+ * @skb: packet
+ * @req: unused
+ *
+ * Returns 0 if a task with the packet label could write to
+ * the socket, otherwise an error code
+ */
+static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb,
+				   struct request_sock *req)
+{
+	struct netlbl_lsm_secattr skb_secattr;
+	struct socket_smack *ssp = sk->sk_security;
+	char smack[SMK_LABELLEN];
+	int rc;
+
+	if (skb == NULL)
+		return -EACCES;
+
+	memset(smack, '\0', SMK_LABELLEN);
+	netlbl_secattr_init(&skb_secattr);
+	rc = netlbl_skbuff_getattr(skb, sk->sk_family, &skb_secattr);
+	if (rc == 0)
+		smack_from_secattr(&skb_secattr, smack);
+	else
+		strncpy(smack, smack_known_huh.smk_known, SMK_MAXLEN);
+	netlbl_secattr_destroy(&skb_secattr);
+	/*
+	 * Receiving a packet requires that the other end
+	 * be able to write here. Read access is not required.
+	 *
+	 * If the request is successful save the peer's label
+	 * so that SO_PEERCRED can report it.
+	 */
+	rc = smk_access(smack, ssp->smk_in, MAY_WRITE);
+	if (rc == 0)
+		strncpy(ssp->smk_packet, smack, SMK_MAXLEN);
+
+	return rc;
+}
+
+/*
+ * Key management security hooks
+ *
+ * Casey has not tested key support very heavily.
+ * The permission check is most likely too restrictive.
+ * If you care about keys please have a look.
+ */
+#ifdef CONFIG_KEYS
+
+/**
+ * smack_key_alloc - Set the key security blob
+ * @key: object
+ * @tsk: the task associated with the key
+ * @flags: unused
+ *
+ * No allocation required
+ *
+ * Returns 0
+ */
+static int smack_key_alloc(struct key *key, struct task_struct *tsk,
+			   unsigned long flags)
+{
+	key->security = tsk->security;
+	return 0;
+}
+
+/**
+ * smack_key_free - Clear the key security blob
+ * @key: the object
+ *
+ * Clear the blob pointer
+ */
+static void smack_key_free(struct key *key)
+{
+	key->security = NULL;
+}
+
+/*
+ * smack_key_permission - Smack access on a key
+ * @key_ref: gets to the object
+ * @context: task involved
+ * @perm: unused
+ *
+ * Return 0 if the task has read and write to the object,
+ * an error code otherwise
+ */
+static int smack_key_permission(key_ref_t key_ref,
+				struct task_struct *context, key_perm_t perm)
+{
+	struct key *keyp;
+
+	keyp = key_ref_to_ptr(key_ref);
+	if (keyp == NULL)
+		return -EINVAL;
+	/*
+	 * If the key hasn't been initialized give it access so that
+	 * it may do so.
+	 */
+	if (keyp->security == NULL)
+		return 0;
+	/*
+	 * This should not occur
+	 */
+	if (context->security == NULL)
+		return -EACCES;
+
+	return smk_access(context->security, keyp->security, MAY_READWRITE);
+}
+#endif /* CONFIG_KEYS */
+
+/*
+ * smack_secid_to_secctx - return the smack label for a secid
+ * @secid: incoming integer
+ * @secdata: destination
+ * @seclen: how long it is
+ *
+ * Exists for networking code.
+ */
+static int smack_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
+{
+	char *sp = smack_from_secid(secid);
+
+	*secdata = sp;
+	*seclen = strlen(sp);
+	return 0;
+}
+
+/*
+ * smack_release_secctx - don't do anything.
+ * @key_ref: unused
+ * @context: unused
+ * @perm: unused
+ *
+ * Exists to make sure nothing gets done, and properly
+ */
+static void smack_release_secctx(char *secdata, u32 seclen)
+{
+}
+
+static struct security_operations smack_ops = {
+	.ptrace = 			smack_ptrace,
+	.capget = 			cap_capget,
+	.capset_check = 		cap_capset_check,
+	.capset_set = 			cap_capset_set,
+	.capable = 			cap_capable,
+	.syslog = 			smack_syslog,
+	.settime = 			cap_settime,
+	.vm_enough_memory = 		cap_vm_enough_memory,
+
+	.bprm_apply_creds = 		cap_bprm_apply_creds,
+	.bprm_set_security = 		cap_bprm_set_security,
+	.bprm_secureexec = 		cap_bprm_secureexec,
+
+	.sb_alloc_security = 		smack_sb_alloc_security,
+	.sb_free_security = 		smack_sb_free_security,
+	.sb_copy_data = 		smack_sb_copy_data,
+	.sb_kern_mount = 		smack_sb_kern_mount,
+	.sb_statfs = 			smack_sb_statfs,
+	.sb_mount = 			smack_sb_mount,
+	.sb_umount = 			smack_sb_umount,
+
+	.inode_alloc_security = 	smack_inode_alloc_security,
+	.inode_free_security = 		smack_inode_free_security,
+	.inode_init_security = 		smack_inode_init_security,
+	.inode_link = 			smack_inode_link,
+	.inode_unlink = 		smack_inode_unlink,
+	.inode_rmdir = 			smack_inode_rmdir,
+	.inode_rename = 		smack_inode_rename,
+	.inode_permission = 		smack_inode_permission,
+	.inode_setattr = 		smack_inode_setattr,
+	.inode_getattr = 		smack_inode_getattr,
+	.inode_setxattr = 		smack_inode_setxattr,
+	.inode_post_setxattr = 		smack_inode_post_setxattr,
+	.inode_getxattr = 		smack_inode_getxattr,
+	.inode_removexattr = 		smack_inode_removexattr,
+	.inode_getsecurity = 		smack_inode_getsecurity,
+	.inode_setsecurity = 		smack_inode_setsecurity,
+	.inode_listsecurity = 		smack_inode_listsecurity,
+
+	.file_permission = 		smack_file_permission,
+	.file_alloc_security = 		smack_file_alloc_security,
+	.file_free_security = 		smack_file_free_security,
+	.file_ioctl = 			smack_file_ioctl,
+	.file_lock = 			smack_file_lock,
+	.file_fcntl = 			smack_file_fcntl,
+	.file_set_fowner = 		smack_file_set_fowner,
+	.file_send_sigiotask = 		smack_file_send_sigiotask,
+	.file_receive = 		smack_file_receive,
+
+	.task_alloc_security = 		smack_task_alloc_security,
+	.task_free_security = 		smack_task_free_security,
+	.task_post_setuid =		cap_task_post_setuid,
+	.task_setpgid = 		smack_task_setpgid,
+	.task_getpgid = 		smack_task_getpgid,
+	.task_getsid = 			smack_task_getsid,
+	.task_getsecid = 		smack_task_getsecid,
+	.task_setnice = 		smack_task_setnice,
+	.task_setioprio = 		smack_task_setioprio,
+	.task_getioprio = 		smack_task_getioprio,
+	.task_setscheduler = 		smack_task_setscheduler,
+	.task_getscheduler = 		smack_task_getscheduler,
+	.task_movememory = 		smack_task_movememory,
+	.task_kill = 			smack_task_kill,
+	.task_wait = 			smack_task_wait,
+	.task_reparent_to_init =	cap_task_reparent_to_init,
+	.task_to_inode = 		smack_task_to_inode,
+
+	.ipc_permission = 		smack_ipc_permission,
+
+	.msg_msg_alloc_security = 	smack_msg_msg_alloc_security,
+	.msg_msg_free_security = 	smack_msg_msg_free_security,
+
+	.msg_queue_alloc_security = 	smack_msg_queue_alloc_security,
+	.msg_queue_free_security = 	smack_msg_queue_free_security,
+	.msg_queue_associate = 		smack_msg_queue_associate,
+	.msg_queue_msgctl = 		smack_msg_queue_msgctl,
+	.msg_queue_msgsnd = 		smack_msg_queue_msgsnd,
+	.msg_queue_msgrcv = 		smack_msg_queue_msgrcv,
+
+	.shm_alloc_security = 		smack_shm_alloc_security,
+	.shm_free_security = 		smack_shm_free_security,
+	.shm_associate = 		smack_shm_associate,
+	.shm_shmctl = 			smack_shm_shmctl,
+	.shm_shmat = 			smack_shm_shmat,
+
+	.sem_alloc_security = 		smack_sem_alloc_security,
+	.sem_free_security = 		smack_sem_free_security,
+	.sem_associate = 		smack_sem_associate,
+	.sem_semctl = 			smack_sem_semctl,
+	.sem_semop = 			smack_sem_semop,
+
+	.netlink_send =			cap_netlink_send,
+	.netlink_recv = 		cap_netlink_recv,
+
+	.d_instantiate = 		smack_d_instantiate,
+
+	.getprocattr = 			smack_getprocattr,
+	.setprocattr = 			smack_setprocattr,
+
+	.unix_stream_connect = 		smack_unix_stream_connect,
+	.unix_may_send = 		smack_unix_may_send,
+
+	.socket_post_create = 		smack_socket_post_create,
+	.socket_sock_rcv_skb = 		smack_socket_sock_rcv_skb,
+	.socket_getpeersec_stream =	smack_socket_getpeersec_stream,
+	.socket_getpeersec_dgram =	smack_socket_getpeersec_dgram,
+	.sk_alloc_security = 		smack_sk_alloc_security,
+	.sk_free_security = 		smack_sk_free_security,
+	.sock_graft = 			smack_sock_graft,
+	.inet_conn_request = 		smack_inet_conn_request,
+ /* key management security hooks */
+#ifdef CONFIG_KEYS
+	.key_alloc = 			smack_key_alloc,
+	.key_free = 			smack_key_free,
+	.key_permission = 		smack_key_permission,
+#endif /* CONFIG_KEYS */
+	.secid_to_secctx = 		smack_secid_to_secctx,
+	.release_secctx = 		smack_release_secctx,
+};
+
+/**
+ * smack_init - initialize the smack system
+ *
+ * Returns 0
+ */
+static __init int smack_init(void)
+{
+	printk(KERN_INFO "Smack:  Initializing.\n");
+
+	/*
+	 * Set the security state for the initial task.
+	 */
+	current->security = &smack_known_floor.smk_known;
+
+	/*
+	 * Initialize locks
+	 */
+	spin_lock_init(&smack_known_unset.smk_cipsolock);
+	spin_lock_init(&smack_known_huh.smk_cipsolock);
+	spin_lock_init(&smack_known_hat.smk_cipsolock);
+	spin_lock_init(&smack_known_star.smk_cipsolock);
+	spin_lock_init(&smack_known_floor.smk_cipsolock);
+	spin_lock_init(&smack_known_invalid.smk_cipsolock);
+
+	/*
+	 * Register with LSM
+	 */
+	if (register_security(&smack_ops))
+		panic("smack: Unable to register with kernel.\n");
+
+	return 0;
+}
+
+/*
+ * Smack requires early initialization in order to label
+ * all processes and objects when they are created.
+ */
+security_initcall(smack_init);
+
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
new file mode 100644
index 0000000..15aa37f
--- /dev/null
+++ b/security/smack/smackfs.c
@@ -0,0 +1,981 @@
+/*
+ * Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com>
+ *
+ *	This program is free software; you can redistribute it and/or modify
+ *  	it under the terms of the GNU General Public License as published by
+ *	the Free Software Foundation, version 2.
+ *
+ * Authors:
+ * 	Casey Schaufler <casey@schaufler-ca.com>
+ * 	Ahmed S. Darwish <darwish.07@gmail.com>
+ *
+ * Special thanks to the authors of selinuxfs.
+ *
+ *	Karl MacMillan <kmacmillan@tresys.com>
+ *	James Morris <jmorris@redhat.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/security.h>
+#include <linux/mutex.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <linux/seq_file.h>
+#include <linux/ctype.h>
+#include "smack.h"
+
+/*
+ * smackfs pseudo filesystem.
+ */
+
+enum smk_inos {
+	SMK_ROOT_INO	= 2,
+	SMK_LOAD	= 3,	/* load policy */
+	SMK_CIPSO	= 4,	/* load label -> CIPSO mapping */
+	SMK_DOI		= 5,	/* CIPSO DOI */
+	SMK_DIRECT	= 6,	/* CIPSO level indicating direct label */
+	SMK_AMBIENT	= 7,	/* internet ambient label */
+	SMK_NLTYPE	= 8,	/* label scheme to use by default */
+};
+
+/*
+ * List locks
+ */
+static DEFINE_MUTEX(smack_list_lock);
+static DEFINE_MUTEX(smack_cipso_lock);
+
+/*
+ * This is the "ambient" label for network traffic.
+ * If it isn't somehow marked, use this.
+ * It can be reset via smackfs/ambient
+ */
+char *smack_net_ambient = smack_known_floor.smk_known;
+
+/*
+ * This is the default packet marking scheme for network traffic.
+ * It can be reset via smackfs/nltype
+ */
+int smack_net_nltype = NETLBL_NLTYPE_CIPSOV4;
+
+/*
+ * This is the level in a CIPSO header that indicates a
+ * smack label is contained directly in the category set.
+ * It can be reset via smackfs/direct
+ */
+int smack_cipso_direct = SMACK_CIPSO_DIRECT_DEFAULT;
+
+static int smk_cipso_doi_value = SMACK_CIPSO_DOI_DEFAULT;
+struct smk_list_entry *smack_list;
+
+#define	SEQ_READ_FINISHED	1
+
+/*
+ * Disable concurrent writing open() operations
+ */
+static struct semaphore smack_write_sem;
+
+/*
+ * Values for parsing cipso rules
+ * SMK_DIGITLEN: Length of a digit field in a rule.
+ * SMK_CIPSOMEN: Minimum possible cipso rule length.
+ */
+#define SMK_DIGITLEN 4
+#define SMK_CIPSOMIN (SMK_MAXLEN + 2 * SMK_DIGITLEN)
+
+/*
+ * Seq_file read operations for /smack/load
+ */
+
+static void *load_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos == SEQ_READ_FINISHED)
+		return NULL;
+
+	return smack_list;
+}
+
+static void *load_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct smk_list_entry *skp = ((struct smk_list_entry *) v)->smk_next;
+
+	if (skp == NULL)
+		*pos = SEQ_READ_FINISHED;
+
+	return skp;
+}
+
+static int load_seq_show(struct seq_file *s, void *v)
+{
+	struct smk_list_entry *slp = (struct smk_list_entry *) v;
+	struct smack_rule *srp = &slp->smk_rule;
+
+	seq_printf(s, "%s %s", (char *)srp->smk_subject,
+		   (char *)srp->smk_object);
+
+	seq_putc(s, ' ');
+
+	if (srp->smk_access & MAY_READ)
+		seq_putc(s, 'r');
+	if (srp->smk_access & MAY_WRITE)
+		seq_putc(s, 'w');
+	if (srp->smk_access & MAY_EXEC)
+		seq_putc(s, 'x');
+	if (srp->smk_access & MAY_APPEND)
+		seq_putc(s, 'a');
+	if (srp->smk_access == 0)
+		seq_putc(s, '-');
+
+	seq_putc(s, '\n');
+
+	return 0;
+}
+
+static void load_seq_stop(struct seq_file *s, void *v)
+{
+	/* No-op */
+}
+
+static struct seq_operations load_seq_ops = {
+	.start = load_seq_start,
+	.next  = load_seq_next,
+	.show  = load_seq_show,
+	.stop  = load_seq_stop,
+};
+
+/**
+ * smk_open_load - open() for /smack/load
+ * @inode: inode structure representing file
+ * @file: "load" file pointer
+ *
+ * For reading, use load_seq_* seq_file reading operations.
+ */
+static int smk_open_load(struct inode *inode, struct file *file)
+{
+	if ((file->f_flags & O_ACCMODE) == O_RDONLY)
+		return seq_open(file, &load_seq_ops);
+
+	if (down_interruptible(&smack_write_sem))
+		return -ERESTARTSYS;
+
+	return 0;
+}
+
+/**
+ * smk_release_load - release() for /smack/load
+ * @inode: inode structure representing file
+ * @file: "load" file pointer
+ *
+ * For a reading session, use the seq_file release
+ * implementation.
+ * Otherwise, we are at the end of a writing session so
+ * clean everything up.
+ */
+static int smk_release_load(struct inode *inode, struct file *file)
+{
+	if ((file->f_flags & O_ACCMODE) == O_RDONLY)
+		return seq_release(inode, file);
+
+	up(&smack_write_sem);
+	return 0;
+}
+
+/**
+ * smk_set_access - add a rule to the rule list
+ * @srp: the new rule to add
+ *
+ * Looks through the current subject/object/access list for
+ * the subject/object pair and replaces the access that was
+ * there. If the pair isn't found add it with the specified
+ * access.
+ */
+static void smk_set_access(struct smack_rule *srp)
+{
+	struct smk_list_entry *sp;
+	struct smk_list_entry *newp;
+
+	mutex_lock(&smack_list_lock);
+
+	for (sp = smack_list; sp != NULL; sp = sp->smk_next)
+		if (sp->smk_rule.smk_subject == srp->smk_subject &&
+		    sp->smk_rule.smk_object == srp->smk_object) {
+			sp->smk_rule.smk_access = srp->smk_access;
+			break;
+		}
+
+	if (sp == NULL) {
+		newp = kzalloc(sizeof(struct smk_list_entry), GFP_KERNEL);
+		newp->smk_rule = *srp;
+		newp->smk_next = smack_list;
+		smack_list = newp;
+	}
+
+	mutex_unlock(&smack_list_lock);
+
+	return;
+}
+
+/**
+ * smk_write_load - write() for /smack/load
+ * @filp: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start - must be 0
+ *
+ * Get one smack access rule from above.
+ * The format is exactly:
+ *     char subject[SMK_LABELLEN]
+ *     char object[SMK_LABELLEN]
+ *     char access[SMK_ACCESSKINDS]
+ *
+ *     Anything following is commentary and ignored.
+ *
+ * writes must be SMK_LABELLEN+SMK_LABELLEN+4 bytes.
+ */
+#define MINIMUM_LOAD (SMK_LABELLEN + SMK_LABELLEN + SMK_ACCESSKINDS)
+
+static ssize_t smk_write_load(struct file *file, const char __user *buf,
+			      size_t count, loff_t *ppos)
+{
+	struct smack_rule rule;
+	char *data;
+	int rc = -EINVAL;
+
+	/*
+	 * Must have privilege.
+	 * No partial writes.
+	 * Enough data must be present.
+	 */
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+	if (*ppos != 0)
+		return -EINVAL;
+	if (count < MINIMUM_LOAD)
+		return -EINVAL;
+
+	data = kzalloc(count, GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(data, buf, count) != 0) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	rule.smk_subject = smk_import(data, 0);
+	if (rule.smk_subject == NULL)
+		goto out;
+
+	rule.smk_object = smk_import(data + SMK_LABELLEN, 0);
+	if (rule.smk_object == NULL)
+		goto out;
+
+	rule.smk_access = 0;
+
+	switch (data[SMK_LABELLEN + SMK_LABELLEN]) {
+	case '-':
+		break;
+	case 'r':
+	case 'R':
+		rule.smk_access |= MAY_READ;
+		break;
+	default:
+		goto out;
+	}
+
+	switch (data[SMK_LABELLEN + SMK_LABELLEN + 1]) {
+	case '-':
+		break;
+	case 'w':
+	case 'W':
+		rule.smk_access |= MAY_WRITE;
+		break;
+	default:
+		goto out;
+	}
+
+	switch (data[SMK_LABELLEN + SMK_LABELLEN + 2]) {
+	case '-':
+		break;
+	case 'x':
+	case 'X':
+		rule.smk_access |= MAY_EXEC;
+		break;
+	default:
+		goto out;
+	}
+
+	switch (data[SMK_LABELLEN + SMK_LABELLEN + 3]) {
+	case '-':
+		break;
+	case 'a':
+	case 'A':
+		rule.smk_access |= MAY_READ;
+		break;
+	default:
+		goto out;
+	}
+
+	smk_set_access(&rule);
+	rc = count;
+
+out:
+	kfree(data);
+	return rc;
+}
+
+static const struct file_operations smk_load_ops = {
+	.open           = smk_open_load,
+	.read		= seq_read,
+	.llseek         = seq_lseek,
+	.write		= smk_write_load,
+	.release        = smk_release_load,
+};
+
+/**
+ * smk_cipso_doi - initialize the CIPSO domain
+ */
+void smk_cipso_doi(void)
+{
+	int rc;
+	struct cipso_v4_doi *doip;
+	struct netlbl_audit audit_info;
+
+	rc = netlbl_cfg_map_del(NULL, &audit_info);
+	if (rc != 0)
+		printk(KERN_WARNING "%s:%d remove rc = %d\n",
+		       __func__, __LINE__, rc);
+
+	doip = kmalloc(sizeof(struct cipso_v4_doi), GFP_KERNEL);
+	if (doip == NULL)
+		panic("smack:  Failed to initialize cipso DOI.\n");
+	doip->map.std = NULL;
+	doip->doi = smk_cipso_doi_value;
+	doip->type = CIPSO_V4_MAP_PASS;
+	doip->tags[0] = CIPSO_V4_TAG_RBITMAP;
+	for (rc = 1; rc < CIPSO_V4_TAG_MAXCNT; rc++)
+		doip->tags[rc] = CIPSO_V4_TAG_INVALID;
+
+	rc = netlbl_cfg_cipsov4_add_map(doip, NULL, &audit_info);
+	if (rc != 0)
+		printk(KERN_WARNING "%s:%d add rc = %d\n",
+		       __func__, __LINE__, rc);
+}
+
+/*
+ * Seq_file read operations for /smack/cipso
+ */
+
+static void *cipso_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos == SEQ_READ_FINISHED)
+		return NULL;
+
+	return smack_known;
+}
+
+static void *cipso_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct smack_known *skp = ((struct smack_known *) v)->smk_next;
+
+	/*
+	 * Omit labels with no associated cipso value
+	 */
+	while (skp != NULL && !skp->smk_cipso)
+		skp = skp->smk_next;
+
+	if (skp == NULL)
+		*pos = SEQ_READ_FINISHED;
+
+	return skp;
+}
+
+/*
+ * Print cipso labels in format:
+ * label level[/cat[,cat]]
+ */
+static int cipso_seq_show(struct seq_file *s, void *v)
+{
+	struct smack_known *skp = (struct smack_known *) v;
+	struct smack_cipso *scp = skp->smk_cipso;
+	char *cbp;
+	char sep = '/';
+	int cat = 1;
+	int i;
+	unsigned char m;
+
+	if (scp == NULL)
+		return 0;
+
+	seq_printf(s, "%s %3d", (char *)&skp->smk_known, scp->smk_level);
+
+	cbp = scp->smk_catset;
+	for (i = 0; i < SMK_LABELLEN; i++)
+		for (m = 0x80; m != 0; m >>= 1) {
+			if (m & cbp[i]) {
+				seq_printf(s, "%c%d", sep, cat);
+				sep = ',';
+			}
+			cat++;
+		}
+
+	seq_putc(s, '\n');
+
+	return 0;
+}
+
+static void cipso_seq_stop(struct seq_file *s, void *v)
+{
+	/* No-op */
+}
+
+static struct seq_operations cipso_seq_ops = {
+	.start = cipso_seq_start,
+	.stop  = cipso_seq_stop,
+	.next  = cipso_seq_next,
+	.show  = cipso_seq_show,
+};
+
+/**
+ * smk_open_cipso - open() for /smack/cipso
+ * @inode: inode structure representing file
+ * @file: "cipso" file pointer
+ *
+ * Connect our cipso_seq_* operations with /smack/cipso
+ * file_operations
+ */
+static int smk_open_cipso(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &cipso_seq_ops);
+}
+
+/**
+ * smk_write_cipso - write() for /smack/cipso
+ * @filp: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start
+ *
+ * Accepts only one cipso rule per write call.
+ * Returns number of bytes written or error code, as appropriate
+ */
+static ssize_t smk_write_cipso(struct file *file, const char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct smack_known *skp;
+	struct smack_cipso *scp = NULL;
+	char mapcatset[SMK_LABELLEN];
+	int maplevel;
+	int cat;
+	int catlen;
+	ssize_t rc = -EINVAL;
+	char *data = NULL;
+	char *rule;
+	int ret;
+	int i;
+
+	/*
+	 * Must have privilege.
+	 * No partial writes.
+	 * Enough data must be present.
+	 */
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+	if (*ppos != 0)
+		return -EINVAL;
+	if (count <= SMK_CIPSOMIN)
+		return -EINVAL;
+
+	data = kzalloc(count + 1, GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(data, buf, count) != 0) {
+		rc = -EFAULT;
+		goto unlockedout;
+	}
+
+	data[count] = '\0';
+	rule = data;
+	/*
+	 * Only allow one writer at a time. Writes should be
+	 * quite rare and small in any case.
+	 */
+	mutex_lock(&smack_cipso_lock);
+
+	skp = smk_import_entry(rule, 0);
+	if (skp == NULL)
+		goto out;
+
+	rule += SMK_LABELLEN;;
+	ret = sscanf(rule, "%d", &maplevel);
+	if (ret != 1 || maplevel > SMACK_CIPSO_MAXLEVEL)
+		goto out;
+
+	rule += SMK_DIGITLEN;
+	ret = sscanf(rule, "%d", &catlen);
+	if (ret != 1 || catlen > SMACK_CIPSO_MAXCATNUM)
+		goto out;
+
+	if (count <= (SMK_CIPSOMIN + catlen * SMK_DIGITLEN))
+		goto out;
+
+	memset(mapcatset, 0, sizeof(mapcatset));
+
+	for (i = 0; i < catlen; i++) {
+		rule += SMK_DIGITLEN;
+		ret = sscanf(rule, "%d", &cat);
+		if (ret != 1 || cat > SMACK_CIPSO_MAXCATVAL)
+			goto out;
+
+		smack_catset_bit(cat, mapcatset);
+	}
+
+	if (skp->smk_cipso == NULL) {
+		scp = kzalloc(sizeof(struct smack_cipso), GFP_KERNEL);
+		if (scp == NULL) {
+			rc = -ENOMEM;
+			goto out;
+		}
+	}
+
+	spin_lock_bh(&skp->smk_cipsolock);
+
+	if (scp == NULL)
+		scp = skp->smk_cipso;
+	else
+		skp->smk_cipso = scp;
+
+	scp->smk_level = maplevel;
+	memcpy(scp->smk_catset, mapcatset, sizeof(mapcatset));
+
+	spin_unlock_bh(&skp->smk_cipsolock);
+
+	rc = count;
+out:
+	mutex_unlock(&smack_cipso_lock);
+unlockedout:
+	kfree(data);
+	return rc;
+}
+
+static const struct file_operations smk_cipso_ops = {
+	.open           = smk_open_cipso,
+	.read		= seq_read,
+	.llseek         = seq_lseek,
+	.write		= smk_write_cipso,
+	.release        = seq_release,
+};
+
+/**
+ * smk_read_doi - read() for /smack/doi
+ * @filp: file pointer, not actually used
+ * @buf: where to put the result
+ * @count: maximum to send along
+ * @ppos: where to start
+ *
+ * Returns number of bytes read or error code, as appropriate
+ */
+static ssize_t smk_read_doi(struct file *filp, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	char temp[80];
+	ssize_t rc;
+
+	if (*ppos != 0)
+		return 0;
+
+	sprintf(temp, "%d", smk_cipso_doi_value);
+	rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp));
+
+	return rc;
+}
+
+/**
+ * smk_write_doi - write() for /smack/doi
+ * @filp: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start
+ *
+ * Returns number of bytes written or error code, as appropriate
+ */
+static ssize_t smk_write_doi(struct file *file, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	char temp[80];
+	int i;
+
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	if (count >= sizeof(temp) || count == 0)
+		return -EINVAL;
+
+	if (copy_from_user(temp, buf, count) != 0)
+		return -EFAULT;
+
+	temp[count] = '\0';
+
+	if (sscanf(temp, "%d", &i) != 1)
+		return -EINVAL;
+
+	smk_cipso_doi_value = i;
+
+	smk_cipso_doi();
+
+	return count;
+}
+
+static const struct file_operations smk_doi_ops = {
+	.read		= smk_read_doi,
+	.write		= smk_write_doi,
+};
+
+/**
+ * smk_read_direct - read() for /smack/direct
+ * @filp: file pointer, not actually used
+ * @buf: where to put the result
+ * @count: maximum to send along
+ * @ppos: where to start
+ *
+ * Returns number of bytes read or error code, as appropriate
+ */
+static ssize_t smk_read_direct(struct file *filp, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	char temp[80];
+	ssize_t rc;
+
+	if (*ppos != 0)
+		return 0;
+
+	sprintf(temp, "%d", smack_cipso_direct);
+	rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp));
+
+	return rc;
+}
+
+/**
+ * smk_write_direct - write() for /smack/direct
+ * @filp: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start
+ *
+ * Returns number of bytes written or error code, as appropriate
+ */
+static ssize_t smk_write_direct(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	char temp[80];
+	int i;
+
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	if (count >= sizeof(temp) || count == 0)
+		return -EINVAL;
+
+	if (copy_from_user(temp, buf, count) != 0)
+		return -EFAULT;
+
+	temp[count] = '\0';
+
+	if (sscanf(temp, "%d", &i) != 1)
+		return -EINVAL;
+
+	smack_cipso_direct = i;
+
+	return count;
+}
+
+static const struct file_operations smk_direct_ops = {
+	.read		= smk_read_direct,
+	.write		= smk_write_direct,
+};
+
+/**
+ * smk_read_ambient - read() for /smack/ambient
+ * @filp: file pointer, not actually used
+ * @buf: where to put the result
+ * @cn: maximum to send along
+ * @ppos: where to start
+ *
+ * Returns number of bytes read or error code, as appropriate
+ */
+static ssize_t smk_read_ambient(struct file *filp, char __user *buf,
+				size_t cn, loff_t *ppos)
+{
+	ssize_t rc;
+	char out[SMK_LABELLEN];
+	int asize;
+
+	if (*ppos != 0)
+		return 0;
+	/*
+	 * Being careful to avoid a problem in the case where
+	 * smack_net_ambient gets changed in midstream.
+	 * Since smack_net_ambient is always set with a value
+	 * from the label list, including initially, and those
+	 * never get freed, the worst case is that the pointer
+	 * gets changed just after this strncpy, in which case
+	 * the value passed up is incorrect. Locking around
+	 * smack_net_ambient wouldn't be any better than this
+	 * copy scheme as by the time the caller got to look
+	 * at the ambient value it would have cleared the lock
+	 * and been changed.
+	 */
+	strncpy(out, smack_net_ambient, SMK_LABELLEN);
+	asize = strlen(out) + 1;
+
+	if (cn < asize)
+		return -EINVAL;
+
+	rc = simple_read_from_buffer(buf, cn, ppos, out, asize);
+
+	return rc;
+}
+
+/**
+ * smk_write_ambient - write() for /smack/ambient
+ * @filp: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start
+ *
+ * Returns number of bytes written or error code, as appropriate
+ */
+static ssize_t smk_write_ambient(struct file *file, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	char in[SMK_LABELLEN];
+	char *smack;
+
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	if (count >= SMK_LABELLEN)
+		return -EINVAL;
+
+	if (copy_from_user(in, buf, count) != 0)
+		return -EFAULT;
+
+	smack = smk_import(in, count);
+	if (smack == NULL)
+		return -EINVAL;
+
+	smack_net_ambient = smack;
+
+	return count;
+}
+
+static const struct file_operations smk_ambient_ops = {
+	.read		= smk_read_ambient,
+	.write		= smk_write_ambient,
+};
+
+struct option_names {
+	int	o_number;
+	char	*o_name;
+	char	*o_alias;
+};
+
+static struct option_names netlbl_choices[] = {
+	{ NETLBL_NLTYPE_RIPSO,
+		NETLBL_NLTYPE_RIPSO_NAME,	"ripso" },
+	{ NETLBL_NLTYPE_CIPSOV4,
+		NETLBL_NLTYPE_CIPSOV4_NAME,	"cipsov4" },
+	{ NETLBL_NLTYPE_CIPSOV4,
+		NETLBL_NLTYPE_CIPSOV4_NAME,	"cipso" },
+	{ NETLBL_NLTYPE_CIPSOV6,
+		NETLBL_NLTYPE_CIPSOV6_NAME,	"cipsov6" },
+	{ NETLBL_NLTYPE_UNLABELED,
+		NETLBL_NLTYPE_UNLABELED_NAME,	"unlabeled" },
+};
+
+/**
+ * smk_read_nltype - read() for /smack/nltype
+ * @filp: file pointer, not actually used
+ * @buf: where to put the result
+ * @count: maximum to send along
+ * @ppos: where to start
+ *
+ * Returns number of bytes read or error code, as appropriate
+ */
+static ssize_t smk_read_nltype(struct file *filp, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	char bound[40];
+	ssize_t rc;
+	int i;
+
+	if (count < SMK_LABELLEN)
+		return -EINVAL;
+
+	if (*ppos != 0)
+		return 0;
+
+	sprintf(bound, "unknown");
+
+	for (i = 0; i < ARRAY_SIZE(netlbl_choices); i++)
+		if (smack_net_nltype == netlbl_choices[i].o_number) {
+			sprintf(bound, "%s", netlbl_choices[i].o_name);
+			break;
+		}
+
+	rc = simple_read_from_buffer(buf, count, ppos, bound, strlen(bound));
+
+	return rc;
+}
+
+/**
+ * smk_write_nltype - write() for /smack/nltype
+ * @filp: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start
+ *
+ * Returns number of bytes written or error code, as appropriate
+ */
+static ssize_t smk_write_nltype(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	char bound[40];
+	char *cp;
+	int i;
+
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	if (count >= 40)
+		return -EINVAL;
+
+	if (copy_from_user(bound, buf, count) != 0)
+		return -EFAULT;
+
+	bound[count] = '\0';
+	cp = strchr(bound, ' ');
+	if (cp != NULL)
+		*cp = '\0';
+	cp = strchr(bound, '\n');
+	if (cp != NULL)
+		*cp = '\0';
+
+	for (i = 0; i < ARRAY_SIZE(netlbl_choices); i++)
+		if (strcmp(bound, netlbl_choices[i].o_name) == 0 ||
+		    strcmp(bound, netlbl_choices[i].o_alias) == 0) {
+			smack_net_nltype = netlbl_choices[i].o_number;
+			return count;
+		}
+	/*
+	 * Not a valid choice.
+	 */
+	return -EINVAL;
+}
+
+static const struct file_operations smk_nltype_ops = {
+	.read		= smk_read_nltype,
+	.write		= smk_write_nltype,
+};
+
+/**
+ * smk_fill_super - fill the /smackfs superblock
+ * @sb: the empty superblock
+ * @data: unused
+ * @silent: unused
+ *
+ * Fill in the well known entries for /smack
+ *
+ * Returns 0 on success, an error code on failure
+ */
+static int smk_fill_super(struct super_block *sb, void *data, int silent)
+{
+	int rc;
+	struct inode *root_inode;
+
+	static struct tree_descr smack_files[] = {
+		[SMK_LOAD]	=
+			{"load", &smk_load_ops, S_IRUGO|S_IWUSR},
+		[SMK_CIPSO]	=
+			{"cipso", &smk_cipso_ops, S_IRUGO|S_IWUSR},
+		[SMK_DOI]	=
+			{"doi", &smk_doi_ops, S_IRUGO|S_IWUSR},
+		[SMK_DIRECT]	=
+			{"direct", &smk_direct_ops, S_IRUGO|S_IWUSR},
+		[SMK_AMBIENT]	=
+			{"ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR},
+		[SMK_NLTYPE]	=
+			{"nltype", &smk_nltype_ops, S_IRUGO|S_IWUSR},
+		/* last one */ {""}
+	};
+
+	rc = simple_fill_super(sb, SMACK_MAGIC, smack_files);
+	if (rc != 0) {
+		printk(KERN_ERR "%s failed %d while creating inodes\n",
+			__func__, rc);
+		return rc;
+	}
+
+	root_inode = sb->s_root->d_inode;
+	root_inode->i_security = new_inode_smack(smack_known_floor.smk_known);
+
+	return 0;
+}
+
+/**
+ * smk_get_sb - get the smackfs superblock
+ * @fs_type: passed along without comment
+ * @flags: passed along without comment
+ * @dev_name: passed along without comment
+ * @data: passed along without comment
+ * @mnt: passed along without comment
+ *
+ * Just passes everything along.
+ *
+ * Returns what the lower level code does.
+ */
+static int smk_get_sb(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data,
+		      struct vfsmount *mnt)
+{
+	return get_sb_single(fs_type, flags, data, smk_fill_super, mnt);
+}
+
+static struct file_system_type smk_fs_type = {
+	.name		= "smackfs",
+	.get_sb		= smk_get_sb,
+	.kill_sb	= kill_litter_super,
+};
+
+static struct vfsmount *smackfs_mount;
+
+/**
+ * init_smk_fs - get the smackfs superblock
+ *
+ * register the smackfs
+ *
+ * Returns 0 unless the registration fails.
+ */
+static int __init init_smk_fs(void)
+{
+	int err;
+
+	err = register_filesystem(&smk_fs_type);
+	if (!err) {
+		smackfs_mount = kern_mount(&smk_fs_type);
+		if (IS_ERR(smackfs_mount)) {
+			printk(KERN_ERR "smackfs:  could not mount!\n");
+			err = PTR_ERR(smackfs_mount);
+			smackfs_mount = NULL;
+		}
+	}
+
+	sema_init(&smack_write_sem, 1);
+	smk_cipso_doi();
+
+	return err;
+}
+
+__initcall(init_smk_fs);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 6244911..61f5d42 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -23,7 +23,7 @@
 #include <linux/file.h>
 #include <linux/slab.h>
 #include <linux/time.h>
-#include <linux/latency.h>
+#include <linux/pm_qos_params.h>
 #include <linux/uio.h>
 #include <sound/core.h>
 #include <sound/control.h>
@@ -443,9 +443,11 @@
 	snd_pcm_timer_resolution_change(substream);
 	runtime->status->state = SNDRV_PCM_STATE_SETUP;
 
-	remove_acceptable_latency(substream->latency_id);
+	pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY,
+				substream->latency_id);
 	if ((usecs = period_to_usecs(runtime)) >= 0)
-		set_acceptable_latency(substream->latency_id, usecs);
+		pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY,
+					substream->latency_id, usecs);
 	return 0;
  _error:
 	/* hardware might be unuseable from this time,
@@ -505,7 +507,8 @@
 	if (substream->ops->hw_free)
 		result = substream->ops->hw_free(substream);
 	runtime->status->state = SNDRV_PCM_STATE_OPEN;
-	remove_acceptable_latency(substream->latency_id);
+	pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY,
+		substream->latency_id);
 	return result;
 }
 
