Merge branch 'for-3.2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

* 'for-3.2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: Replace Paul Menage with Tejun Heo as cgroups maintainer
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 5b31a8e..a790cc6 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -431,10 +431,6 @@
 #define kern_addr_valid(addr) \
 	(test_bit(__pa((unsigned long)(addr))>>20, sparc_valid_addr_bitmap))
 
-extern int io_remap_pfn_range(struct vm_area_struct *vma,
-			      unsigned long from, unsigned long pfn,
-			      unsigned long size, pgprot_t prot);
-
 /*
  * For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
  * its high 4 bits.  These macros/functions put it there or get it from there.
@@ -443,6 +439,22 @@
 #define GET_IOSPACE(pfn)		(pfn >> (BITS_PER_LONG - 4))
 #define GET_PFN(pfn)			(pfn & 0x0fffffffUL)
 
+extern int remap_pfn_range(struct vm_area_struct *, unsigned long, unsigned long,
+			   unsigned long, pgprot_t);
+
+static inline int io_remap_pfn_range(struct vm_area_struct *vma,
+				     unsigned long from, unsigned long pfn,
+				     unsigned long size, pgprot_t prot)
+{
+	unsigned long long offset, space, phys_base;
+
+	offset = ((unsigned long long) GET_PFN(pfn)) << PAGE_SHIFT;
+	space = GET_IOSPACE(pfn);
+	phys_base = offset | (space << 32ULL);
+
+	return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot);
+}
+
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
 ({									  \
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index adf8932..38ebb2c 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -757,10 +757,6 @@
 
 extern int page_in_phys_avail(unsigned long paddr);
 
-extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
-			       unsigned long pfn,
-			       unsigned long size, pgprot_t prot);
-
 /*
  * For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
  * its high 4 bits.  These macros/functions put it there or get it from there.
@@ -769,6 +765,22 @@
 #define GET_IOSPACE(pfn)		(pfn >> (BITS_PER_LONG - 4))
 #define GET_PFN(pfn)			(pfn & 0x0fffffffffffffffUL)
 
+extern int remap_pfn_range(struct vm_area_struct *, unsigned long, unsigned long,
+			   unsigned long, pgprot_t);
+
+static inline int io_remap_pfn_range(struct vm_area_struct *vma,
+				     unsigned long from, unsigned long pfn,
+				     unsigned long size, pgprot_t prot)
+{
+	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
+	int space = GET_IOSPACE(pfn);
+	unsigned long phys_base;
+
+	phys_base = offset | (((unsigned long) space) << 32UL);
+
+	return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot);
+}
+
 #include <asm-generic/pgtable.h>
 
 /* We provide our own get_unmapped_area to cope with VA holes and
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index e27f8ea..0c218e4 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -42,6 +42,9 @@
 extern void fpload(unsigned long *fpregs, unsigned long *fsr);
 
 #else /* CONFIG_SPARC32 */
+
+#include <asm/trap_block.h>
+
 struct popc_3insn_patch_entry {
 	unsigned int	addr;
 	unsigned int	insns[3];
@@ -57,6 +60,10 @@
 	__popc_6insn_patch_end;
 
 extern void __init per_cpu_patch(void);
+extern void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
+				    struct sun4v_1insn_patch_entry *);
+extern void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
+				    struct sun4v_2insn_patch_entry *);
 extern void __init sun4v_patch(void);
 extern void __init boot_cpu_id_too_large(int cpu);
 extern unsigned int dcache_parity_tl1_occurred;
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index da0c6c7..e551987 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -17,6 +17,8 @@
 #include <asm/processor.h>
 #include <asm/spitfire.h>
 
+#include "entry.h"
+
 #ifdef CONFIG_SPARC64
 
 #include <linux/jump_label.h>
@@ -203,6 +205,29 @@
 }
 
 #ifdef CONFIG_SPARC64
+static void do_patch_sections(const Elf_Ehdr *hdr,
+			      const Elf_Shdr *sechdrs)
+{
+	const Elf_Shdr *s, *sun4v_1insn = NULL, *sun4v_2insn = NULL;
+	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+		if (!strcmp(".sun4v_1insn_patch", secstrings + s->sh_name))
+			sun4v_1insn = s;
+		if (!strcmp(".sun4v_2insn_patch", secstrings + s->sh_name))
+			sun4v_2insn = s;
+	}
+
+	if (sun4v_1insn && tlb_type == hypervisor) {
+		void *p = (void *) sun4v_1insn->sh_addr;
+		sun4v_patch_1insn_range(p, p + sun4v_1insn->sh_size);
+	}
+	if (sun4v_2insn && tlb_type == hypervisor) {
+		void *p = (void *) sun4v_2insn->sh_addr;
+		sun4v_patch_2insn_range(p, p + sun4v_2insn->sh_size);
+	}
+}
+
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
@@ -210,6 +235,8 @@
 	/* make jump label nops */
 	jump_label_apply_nops(me);
 
+	do_patch_sections(hdr, sechdrs);
+
 	/* Cheetah's I-cache is fully coherent.  */
 	if (tlb_type == spitfire) {
 		unsigned long va;
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index c965595a..a854a1c 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -234,40 +234,50 @@
 	}
 }
 
+void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *start,
+			     struct sun4v_1insn_patch_entry *end)
+{
+	while (start < end) {
+		unsigned long addr = start->addr;
+
+		*(unsigned int *) (addr +  0) = start->insn;
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
+
+		start++;
+	}
+}
+
+void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
+			     struct sun4v_2insn_patch_entry *end)
+{
+	while (start < end) {
+		unsigned long addr = start->addr;
+
+		*(unsigned int *) (addr +  0) = start->insns[0];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
+
+		*(unsigned int *) (addr +  4) = start->insns[1];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  4));
+
+		start++;
+	}
+}
+
 void __init sun4v_patch(void)
 {
 	extern void sun4v_hvapi_init(void);
-	struct sun4v_1insn_patch_entry *p1;
-	struct sun4v_2insn_patch_entry *p2;
 
 	if (tlb_type != hypervisor)
 		return;
 
-	p1 = &__sun4v_1insn_patch;
-	while (p1 < &__sun4v_1insn_patch_end) {
-		unsigned long addr = p1->addr;
+	sun4v_patch_1insn_range(&__sun4v_1insn_patch,
+				&__sun4v_1insn_patch_end);
 
-		*(unsigned int *) (addr +  0) = p1->insn;
-		wmb();
-		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
-
-		p1++;
-	}
-
-	p2 = &__sun4v_2insn_patch;
-	while (p2 < &__sun4v_2insn_patch_end) {
-		unsigned long addr = p2->addr;
-
-		*(unsigned int *) (addr +  0) = p2->insns[0];
-		wmb();
-		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
-
-		*(unsigned int *) (addr +  4) = p2->insns[1];
-		wmb();
-		__asm__ __volatile__("flush	%0" : : "r" (addr +  4));
-
-		p2++;
-	}
+	sun4v_patch_2insn_range(&__sun4v_2insn_patch,
+				&__sun4v_2insn_patch_end);
 
 	sun4v_hvapi_init();
 }
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 2caa556..023b886 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -822,21 +822,23 @@
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
  * mistake.
  */
-void do_signal32(sigset_t *oldset, struct pt_regs * regs,
-		 int restart_syscall, unsigned long orig_i0)
+void do_signal32(sigset_t *oldset, struct pt_regs * regs)
 {
 	struct k_sigaction ka;
+	unsigned long orig_i0;
+	int restart_syscall;
 	siginfo_t info;
 	int signr;
 	
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
-	/* If the debugger messes with the program counter, it clears
-	 * the "in syscall" bit, directing us to not perform a syscall
-	 * restart.
-	 */
-	if (restart_syscall && !pt_regs_is_syscall(regs))
-		restart_syscall = 0;
+	restart_syscall = 0;
+	orig_i0 = 0;
+	if (pt_regs_is_syscall(regs) &&
+	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) {
+		restart_syscall = 1;
+		orig_i0 = regs->u_regs[UREG_G6];
+	}
 
 	if (signr > 0) {
 		if (restart_syscall)
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 8ce247a..d54c6e5 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -519,10 +519,26 @@
 	siginfo_t info;
 	int signr;
 
+	/* It's a lot of work and synchronization to add a new ptrace
+	 * register for GDB to save and restore in order to get
+	 * orig_i0 correct for syscall restarts when debugging.
+	 *
+	 * Although it should be the case that most of the global
+	 * registers are volatile across a system call, glibc already
+	 * depends upon that fact that we preserve them.  So we can't
+	 * just use any global register to save away the orig_i0 value.
+	 *
+	 * In particular %g2, %g3, %g4, and %g5 are all assumed to be
+	 * preserved across a system call trap by various pieces of
+	 * code in glibc.
+	 *
+	 * %g7 is used as the "thread register".   %g6 is not used in
+	 * any fixed manner.  %g6 is used as a scratch register and
+	 * a compiler temporary, but it's value is never used across
+	 * a system call.  Therefore %g6 is usable for orig_i0 storage.
+	 */
 	if (pt_regs_is_syscall(regs) && (regs->psr & PSR_C))
-		restart_syscall = 1;
-	else
-		restart_syscall = 0;
+		regs->u_regs[UREG_G6] = orig_i0;
 
 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
 		oldset = &current->saved_sigmask;
@@ -535,8 +551,12 @@
 	 * the software "in syscall" bit, directing us to not perform
 	 * a syscall restart.
 	 */
-	if (restart_syscall && !pt_regs_is_syscall(regs))
-		restart_syscall = 0;
+	restart_syscall = 0;
+	if (pt_regs_is_syscall(regs) && (regs->psr & PSR_C)) {
+		restart_syscall = 1;
+		orig_i0 = regs->u_regs[UREG_G6];
+	}
+
 
 	if (signr > 0) {
 		if (restart_syscall)
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index a2b8159..f0836cd 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -529,11 +529,27 @@
 	siginfo_t info;
 	int signr;
 	
+	/* It's a lot of work and synchronization to add a new ptrace
+	 * register for GDB to save and restore in order to get
+	 * orig_i0 correct for syscall restarts when debugging.
+	 *
+	 * Although it should be the case that most of the global
+	 * registers are volatile across a system call, glibc already
+	 * depends upon that fact that we preserve them.  So we can't
+	 * just use any global register to save away the orig_i0 value.
+	 *
+	 * In particular %g2, %g3, %g4, and %g5 are all assumed to be
+	 * preserved across a system call trap by various pieces of
+	 * code in glibc.
+	 *
+	 * %g7 is used as the "thread register".   %g6 is not used in
+	 * any fixed manner.  %g6 is used as a scratch register and
+	 * a compiler temporary, but it's value is never used across
+	 * a system call.  Therefore %g6 is usable for orig_i0 storage.
+	 */
 	if (pt_regs_is_syscall(regs) &&
-	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) {
-		restart_syscall = 1;
-	} else
-		restart_syscall = 0;
+	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY)))
+		regs->u_regs[UREG_G6] = orig_i0;
 
 	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
 		oldset = &current->saved_sigmask;
@@ -542,22 +558,20 @@
 
 #ifdef CONFIG_COMPAT
 	if (test_thread_flag(TIF_32BIT)) {
-		extern void do_signal32(sigset_t *, struct pt_regs *,
-					int restart_syscall,
-					unsigned long orig_i0);
-		do_signal32(oldset, regs, restart_syscall, orig_i0);
+		extern void do_signal32(sigset_t *, struct pt_regs *);
+		do_signal32(oldset, regs);
 		return;
 	}
 #endif	
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
-	/* If the debugger messes with the program counter, it clears
-	 * the software "in syscall" bit, directing us to not perform
-	 * a syscall restart.
-	 */
-	if (restart_syscall && !pt_regs_is_syscall(regs))
-		restart_syscall = 0;
+	restart_syscall = 0;
+	if (pt_regs_is_syscall(regs) &&
+	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) {
+		restart_syscall = 1;
+		orig_i0 = regs->u_regs[UREG_G6];
+	}
 
 	if (signr > 0) {
 		if (restart_syscall)
diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c
index e7dc508..b19570d 100644
--- a/arch/sparc/kernel/sigutil_64.c
+++ b/arch/sparc/kernel/sigutil_64.c
@@ -2,6 +2,7 @@
 #include <linux/types.h>
 #include <linux/thread_info.h>
 #include <linux/uaccess.h>
+#include <linux/errno.h>
 
 #include <asm/sigcontext.h>
 #include <asm/fpumacro.h>
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
index e3cda21..301421c 100644
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -8,7 +8,6 @@
 obj-y                   += fault_$(BITS).o
 obj-y                   += init_$(BITS).o
 obj-$(CONFIG_SPARC32)   += loadmmu.o
-obj-y                   += generic_$(BITS).o
 obj-$(CONFIG_SPARC32)   += extable.o btfixup.o srmmu.o iommu.o io-unit.o
 obj-$(CONFIG_SPARC32)   += hypersparc.o viking.o tsunami.o swift.o
 obj-$(CONFIG_SPARC_LEON)+= leon_mm.o
diff --git a/arch/sparc/mm/generic_32.c b/arch/sparc/mm/generic_32.c
deleted file mode 100644
index 6ca39a6..0000000
--- a/arch/sparc/mm/generic_32.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * generic.c: Generic Sparc mm routines that are not dependent upon
- *            MMU type but are Sparc specific.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/pagemap.h>
-#include <linux/export.h>
-
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
-/* Remap IO memory, the same way as remap_pfn_range(), but use
- * the obio memory space.
- *
- * They use a pgprot that sets PAGE_IO and does not check the
- * mem_map table as this is independent of normal memory.
- */
-static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, unsigned long address, unsigned long size,
-	unsigned long offset, pgprot_t prot, int space)
-{
-	unsigned long end;
-
-	address &= ~PMD_MASK;
-	end = address + size;
-	if (end > PMD_SIZE)
-		end = PMD_SIZE;
-	do {
-		set_pte_at(mm, address, pte, mk_pte_io(offset, prot, space));
-		address += PAGE_SIZE;
-		offset += PAGE_SIZE;
-		pte++;
-	} while (address < end);
-}
-
-static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
-	unsigned long offset, pgprot_t prot, int space)
-{
-	unsigned long end;
-
-	address &= ~PGDIR_MASK;
-	end = address + size;
-	if (end > PGDIR_SIZE)
-		end = PGDIR_SIZE;
-	offset -= address;
-	do {
-		pte_t *pte = pte_alloc_map(mm, NULL, pmd, address);
-		if (!pte)
-			return -ENOMEM;
-		io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address < end);
-	return 0;
-}
-
-int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
-		       unsigned long pfn, unsigned long size, pgprot_t prot)
-{
-	int error = 0;
-	pgd_t * dir;
-	unsigned long beg = from;
-	unsigned long end = from + size;
-	struct mm_struct *mm = vma->vm_mm;
-	int space = GET_IOSPACE(pfn);
-	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
-
-	/* See comment in mm/memory.c remap_pfn_range */
-	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
-	vma->vm_pgoff = (offset >> PAGE_SHIFT) |
-		((unsigned long)space << 28UL);
-
-	offset -= from;
-	dir = pgd_offset(mm, from);
-	flush_cache_range(vma, beg, end);
-
-	while (from < end) {
-		pmd_t *pmd = pmd_alloc(mm, dir, from);
-		error = -ENOMEM;
-		if (!pmd)
-			break;
-		error = io_remap_pmd_range(mm, pmd, from, end - from, offset + from, prot, space);
-		if (error)
-			break;
-		from = (from + PGDIR_SIZE) & PGDIR_MASK;
-		dir++;
-	}
-
-	flush_tlb_range(vma, beg, end);
-	return error;
-}
-EXPORT_SYMBOL(io_remap_pfn_range);
diff --git a/arch/sparc/mm/generic_64.c b/arch/sparc/mm/generic_64.c
deleted file mode 100644
index 9b357dd..0000000
--- a/arch/sparc/mm/generic_64.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * generic.c: Generic Sparc mm routines that are not dependent upon
- *            MMU type but are Sparc specific.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/export.h>
-#include <linux/pagemap.h>
-
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-
-/* Remap IO memory, the same way as remap_pfn_range(), but use
- * the obio memory space.
- *
- * They use a pgprot that sets PAGE_IO and does not check the
- * mem_map table as this is independent of normal memory.
- */
-static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte,
-				      unsigned long address,
-				      unsigned long size,
-				      unsigned long offset, pgprot_t prot,
-				      int space)
-{
-	unsigned long end;
-
-	/* clear hack bit that was used as a write_combine side-effect flag */
-	offset &= ~0x1UL;
-	address &= ~PMD_MASK;
-	end = address + size;
-	if (end > PMD_SIZE)
-		end = PMD_SIZE;
-	do {
-		pte_t entry;
-		unsigned long curend = address + PAGE_SIZE;
-		
-		entry = mk_pte_io(offset, prot, space, PAGE_SIZE);
-		if (!(address & 0xffff)) {
-			if (PAGE_SIZE < (4 * 1024 * 1024) &&
-			    !(address & 0x3fffff) &&
-			    !(offset & 0x3ffffe) &&
-			    end >= address + 0x400000) {
-				entry = mk_pte_io(offset, prot, space,
-						  4 * 1024 * 1024);
-				curend = address + 0x400000;
-				offset += 0x400000;
-			} else if (PAGE_SIZE < (512 * 1024) &&
-				   !(address & 0x7ffff) &&
-				   !(offset & 0x7fffe) &&
-				   end >= address + 0x80000) {
-				entry = mk_pte_io(offset, prot, space,
-						  512 * 1024 * 1024);
-				curend = address + 0x80000;
-				offset += 0x80000;
-			} else if (PAGE_SIZE < (64 * 1024) &&
-				   !(offset & 0xfffe) &&
-				   end >= address + 0x10000) {
-				entry = mk_pte_io(offset, prot, space,
-						  64 * 1024);
-				curend = address + 0x10000;
-				offset += 0x10000;
-			} else
-				offset += PAGE_SIZE;
-		} else
-			offset += PAGE_SIZE;
-
-		if (pte_write(entry))
-			entry = pte_mkdirty(entry);
-		do {
-			BUG_ON(!pte_none(*pte));
-			set_pte_at(mm, address, pte, entry);
-			address += PAGE_SIZE;
-			pte_val(entry) += PAGE_SIZE;
-			pte++;
-		} while (address < curend);
-	} while (address < end);
-}
-
-static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
-	unsigned long offset, pgprot_t prot, int space)
-{
-	unsigned long end;
-
-	address &= ~PGDIR_MASK;
-	end = address + size;
-	if (end > PGDIR_SIZE)
-		end = PGDIR_SIZE;
-	offset -= address;
-	do {
-		pte_t *pte = pte_alloc_map(mm, NULL, pmd, address);
-		if (!pte)
-			return -ENOMEM;
-		io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
-		pte_unmap(pte);
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address < end);
-	return 0;
-}
-
-static inline int io_remap_pud_range(struct mm_struct *mm, pud_t * pud, unsigned long address, unsigned long size,
-	unsigned long offset, pgprot_t prot, int space)
-{
-	unsigned long end;
-
-	address &= ~PUD_MASK;
-	end = address + size;
-	if (end > PUD_SIZE)
-		end = PUD_SIZE;
-	offset -= address;
-	do {
-		pmd_t *pmd = pmd_alloc(mm, pud, address);
-		if (!pud)
-			return -ENOMEM;
-		io_remap_pmd_range(mm, pmd, address, end - address, address + offset, prot, space);
-		address = (address + PUD_SIZE) & PUD_MASK;
-		pud++;
-	} while (address < end);
-	return 0;
-}
-
-int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
-		unsigned long pfn, unsigned long size, pgprot_t prot)
-{
-	int error = 0;
-	pgd_t * dir;
-	unsigned long beg = from;
-	unsigned long end = from + size;
-	struct mm_struct *mm = vma->vm_mm;
-	int space = GET_IOSPACE(pfn);
-	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
-	unsigned long phys_base;
-
-	phys_base = offset | (((unsigned long) space) << 32UL);
-
-	/* See comment in mm/memory.c remap_pfn_range */
-	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
-	vma->vm_pgoff = phys_base >> PAGE_SHIFT;
-
-	offset -= from;
-	dir = pgd_offset(mm, from);
-	flush_cache_range(vma, beg, end);
-
-	while (from < end) {
-		pud_t *pud = pud_alloc(mm, dir, from);
-		error = -ENOMEM;
-		if (!pud)
-			break;
-		error = io_remap_pud_range(mm, pud, from, end - from, offset + from, prot, space);
-		if (error)
-			break;
-		from = (from + PGDIR_SIZE) & PGDIR_MASK;
-		dir++;
-	}
-
-	flush_tlb_range(vma, beg, end);
-	return error;
-}
-EXPORT_SYMBOL(io_remap_pfn_range);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 9b7273c..1a6c09a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -49,6 +49,7 @@
 extern int local_apic_timer_c2_ok;
 
 extern int disable_apic;
+extern unsigned int lapic_timer_frequency;
 
 #ifdef CONFIG_SMP
 extern void __inquire_remote_apic(int apicid);
diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h
index 72a8b52..a01e7ec7 100644
--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -17,7 +17,7 @@
 #define NMI_REASON_CLEAR_IOCHK	0x08
 #define NMI_REASON_CLEAR_MASK	0x0f
 
-static inline unsigned char get_nmi_reason(void)
+static inline unsigned char default_get_nmi_reason(void)
 {
 	return inb(NMI_REASON_PORT);
 }
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c9321f3..0e8ae57 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -201,7 +201,10 @@
 void mce_notify_process(void);
 
 DECLARE_PER_CPU(struct mce, injectm);
-extern struct file_operations mce_chrdev_ops;
+
+extern void register_mce_write_callback(ssize_t (*)(struct file *filp,
+				    const char __user *ubuf,
+				    size_t usize, loff_t *off));
 
 /*
  * Exception handler
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 719f00b..e628312 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -44,6 +44,13 @@
 
 extern enum mrst_timer_options mrst_timer_options;
 
+/*
+ * Penwell uses spread spectrum clock, so the freq number is not exactly
+ * the same as reported by MSR based on SDM.
+ */
+#define PENWELL_FSB_FREQ_83SKU         83200
+#define PENWELL_FSB_FREQ_100SKU        99840
+
 #define SFI_MTMR_MAX_NUM 8
 #define SFI_MRTC_MAX	8
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index d3d8590..1971e65 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -152,6 +152,7 @@
 /**
  * struct x86_platform_ops - platform specific runtime functions
  * @calibrate_tsc:		calibrate TSC
+ * @wallclock_init:		init the wallclock device
  * @get_wallclock:		get time from HW clock like RTC etc.
  * @set_wallclock:		set time back to HW clock
  * @is_untracked_pat_range	exclude from PAT logic
@@ -160,11 +161,13 @@
  */
 struct x86_platform_ops {
 	unsigned long (*calibrate_tsc)(void);
+	void (*wallclock_init)(void);
 	unsigned long (*get_wallclock)(void);
 	int (*set_wallclock)(unsigned long nowtime);
 	void (*iommu_shutdown)(void);
 	bool (*is_untracked_pat_range)(u64 start, u64 end);
 	void (*nmi_init)(void);
+	unsigned char (*get_nmi_reason)(void);
 	int (*i8042_detect)(void);
 };
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c638228..1f84794 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -738,5 +738,5 @@
 
 	atomic_set(&stop_machine_first, 1);
 	wrote_text = 0;
-	__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+	__stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
 }
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a2fd72e..f98d84c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -186,7 +186,7 @@
 	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
 };
 
-static unsigned int calibration_result;
+unsigned int lapic_timer_frequency = 0;
 
 static void apic_pm_activate(void);
 
@@ -454,7 +454,7 @@
 	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
 	case CLOCK_EVT_MODE_ONESHOT:
-		__setup_APIC_LVTT(calibration_result,
+		__setup_APIC_LVTT(lapic_timer_frequency,
 				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
 		break;
 	case CLOCK_EVT_MODE_UNUSED:
@@ -638,6 +638,25 @@
 	long delta, deltatsc;
 	int pm_referenced = 0;
 
+	/**
+	 * check if lapic timer has already been calibrated by platform
+	 * specific routine, such as tsc calibration code. if so, we just fill
+	 * in the clockevent structure and return.
+	 */
+
+	if (lapic_timer_frequency) {
+		apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
+				lapic_timer_frequency);
+		lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+					TICK_NSEC, lapic_clockevent.shift);
+		lapic_clockevent.max_delta_ns =
+			clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+		lapic_clockevent.min_delta_ns =
+			clockevent_delta2ns(0xF, &lapic_clockevent);
+		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+		return 0;
+	}
+
 	local_irq_disable();
 
 	/* Replace the global interrupt handler */
@@ -679,12 +698,12 @@
 	lapic_clockevent.min_delta_ns =
 		clockevent_delta2ns(0xF, &lapic_clockevent);
 
-	calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+	lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
 
 	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
 	apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
 	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
-		    calibration_result);
+		    lapic_timer_frequency);
 
 	if (cpu_has_tsc) {
 		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
@@ -695,13 +714,13 @@
 
 	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
 		    "%u.%04u MHz.\n",
-		    calibration_result / (1000000 / HZ),
-		    calibration_result % (1000000 / HZ));
+		    lapic_timer_frequency / (1000000 / HZ),
+		    lapic_timer_frequency % (1000000 / HZ));
 
 	/*
 	 * Do a sanity check on the APIC calibration result
 	 */
-	if (calibration_result < (1000000 / HZ)) {
+	if (lapic_timer_frequency < (1000000 / HZ)) {
 		local_irq_enable();
 		pr_warning("APIC frequency too slow, disabling apic timer\n");
 		return -1;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3c31fa9..6d939d7 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -193,10 +193,8 @@
 	struct irq_cfg *cfg;
 	int count, node, i;
 
-	if (!legacy_pic->nr_legacy_irqs) {
-		nr_irqs_gsi = 0;
+	if (!legacy_pic->nr_legacy_irqs)
 		io_apic_irqs = ~0UL;
-	}
 
 	for (i = 0; i < nr_ioapics; i++) {
 		ioapics[i].saved_registers =
@@ -1696,6 +1694,7 @@
 	int ioapic_idx;
 	struct irq_cfg *cfg;
 	unsigned int irq;
+	struct irq_chip *chip;
 
 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
@@ -1716,6 +1715,10 @@
 	for_each_active_irq(irq) {
 		struct irq_pin_list *entry;
 
+		chip = irq_get_chip(irq);
+		if (chip != &ioapic_chip)
+			continue;
+
 		cfg = irq_get_chip_data(irq);
 		if (!cfg)
 			continue;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 6199232..319882e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -208,7 +208,7 @@
 	if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
 		return -ENOMEM;
 	printk(KERN_INFO "Machine check injector initialized\n");
-	mce_chrdev_ops.write = mce_write;
+	register_mce_write_callback(mce_write);
 	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
 				"mce_notify");
 	return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 362056a..2af127d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1634,16 +1634,35 @@
 	}
 }
 
-/* Modified in mce-inject.c, so not static or const */
-struct file_operations mce_chrdev_ops = {
+static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf,
+			    size_t usize, loff_t *off);
+
+void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
+			     const char __user *ubuf,
+			     size_t usize, loff_t *off))
+{
+	mce_write = fn;
+}
+EXPORT_SYMBOL_GPL(register_mce_write_callback);
+
+ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
+			 size_t usize, loff_t *off)
+{
+	if (mce_write)
+		return mce_write(filp, ubuf, usize, off);
+	else
+		return -EINVAL;
+}
+
+static const struct file_operations mce_chrdev_ops = {
 	.open			= mce_chrdev_open,
 	.release		= mce_chrdev_release,
 	.read			= mce_chrdev_read,
+	.write			= mce_chrdev_write,
 	.poll			= mce_chrdev_poll,
 	.unlocked_ioctl		= mce_chrdev_ioctl,
 	.llseek			= no_llseek,
 };
-EXPORT_SYMBOL_GPL(mce_chrdev_ops);
 
 static struct miscdevice mce_chrdev_device = {
 	MISC_MCELOG_MINOR,
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index b9c8628..e88f37b 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -29,6 +29,7 @@
 #include <asm/traps.h>
 #include <asm/mach_traps.h>
 #include <asm/nmi.h>
+#include <asm/x86_init.h>
 
 #define NMI_MAX_NAMELEN	16
 struct nmiaction {
@@ -348,7 +349,7 @@
 
 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
 	raw_spin_lock(&nmi_reason_lock);
-	reason = get_nmi_reason();
+	reason = x86_platform.get_nmi_reason();
 
 	if (reason & NMI_REASON_MASK) {
 		if (reason & NMI_REASON_SERR)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index afaf384..cf0ef98 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1045,6 +1045,8 @@
 
 	x86_init.timers.wallclock_init();
 
+	x86_platform.wallclock_init();
+
 	mcheck_init();
 
 	arch_init_ideal_nops();
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 6f164bd..c1d6cd5 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -21,12 +21,14 @@
 #include <asm/pat.h>
 #include <asm/tsc.h>
 #include <asm/iommu.h>
+#include <asm/mach_traps.h>
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
 void __init x86_init_pgd_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
+void wallclock_init_noop(void) { }
 
 /*
  * The platform setup functions are preset with the default functions
@@ -97,11 +99,13 @@
 
 struct x86_platform_ops x86_platform = {
 	.calibrate_tsc			= native_calibrate_tsc,
+	.wallclock_init			= wallclock_init_noop,
 	.get_wallclock			= mach_get_cmos_time,
 	.set_wallclock			= mach_set_rtc_mmss,
 	.iommu_shutdown			= iommu_shutdown_noop,
 	.is_untracked_pat_range		= is_ISA_range,
 	.nmi_init			= default_nmi_init,
+	.get_nmi_reason			= default_get_nmi_reason,
 	.i8042_detect			= default_i8042_detect
 };
 
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 541020d..b1489a0 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -187,11 +187,34 @@
 static unsigned long __init mrst_calibrate_tsc(void)
 {
 	unsigned long flags, fast_calibrate;
+	if (__mrst_cpu_chip == MRST_CPU_CHIP_PENWELL) {
+		u32 lo, hi, ratio, fsb;
 
-	local_irq_save(flags);
-	fast_calibrate = apbt_quick_calibrate();
-	local_irq_restore(flags);
-
+		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+		pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
+		ratio = (hi >> 8) & 0x1f;
+		pr_debug("ratio is %d\n", ratio);
+		if (!ratio) {
+			pr_err("read a zero ratio, should be incorrect!\n");
+			pr_err("force tsc ratio to 16 ...\n");
+			ratio = 16;
+		}
+		rdmsr(MSR_FSB_FREQ, lo, hi);
+		if ((lo & 0x7) == 0x7)
+			fsb = PENWELL_FSB_FREQ_83SKU;
+		else
+			fsb = PENWELL_FSB_FREQ_100SKU;
+		fast_calibrate = ratio * fsb;
+		pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
+		lapic_timer_frequency = fsb * 1000 / HZ;
+		/* mark tsc clocksource as reliable */
+		set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
+	} else {
+		local_irq_save(flags);
+		fast_calibrate = apbt_quick_calibrate();
+		local_irq_restore(flags);
+	}
+	
 	if (fast_calibrate)
 		return fast_calibrate;
 
@@ -254,6 +277,17 @@
 }
 
 /*
+ * Moorestown does not have external NMI source nor port 0x61 to report
+ * NMI status. The possible NMI sources are from pmu as a result of NMI
+ * watchdog or lock debug. Reading io port 0x61 results in 0xff which
+ * misled NMI handler.
+ */
+static unsigned char mrst_get_nmi_reason(void)
+{
+	return 0;
+}
+
+/*
  * Moorestown specific x86_init function overrides and early setup
  * calls.
  */
@@ -274,6 +308,8 @@
 	x86_platform.calibrate_tsc = mrst_calibrate_tsc;
 	x86_platform.i8042_detect = mrst_i8042_detect;
 	x86_init.timers.wallclock_init = mrst_rtc_init;
+	x86_platform.get_nmi_reason = mrst_get_nmi_reason;
+
 	x86_init.pci.init = pci_mrst_init;
 	x86_init.pci.fixup_irqs = x86_init_noop;
 
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index 67cbcfa..847553f 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -1,7 +1,7 @@
 /*
  *  Copyright (C) 1998-2000 Andreas S. Krebs (akrebs@altavista.net), Maintainer
  *  Copyright (C) 1998-2002 Andre Hedrick <andre@linux-ide.org>, Integrator
- *  Copyright (C) 2007-2010 Bartlomiej Zolnierkiewicz
+ *  Copyright (C) 2007-2011 Bartlomiej Zolnierkiewicz
  *
  * CYPRESS CY82C693 chipset IDE controller
  *
@@ -90,7 +90,7 @@
 	u8 time_16, time_8;
 
 	/* select primary or secondary channel */
-	if (hwif->index > 0) {  /* drive is on the secondary channel */
+	if (drive->dn > 1) {  /* drive is on the secondary channel */
 		dev = pci_get_slot(dev->bus, dev->devfn+1);
 		if (!dev) {
 			printk(KERN_ERR "%s: tune_drive: "
@@ -141,7 +141,7 @@
 		pci_write_config_byte(dev, CY82_IDE_SLAVE_IOW, time_16);
 		pci_write_config_byte(dev, CY82_IDE_SLAVE_8BIT, time_8);
 	}
-	if (hwif->index > 0)
+	if (drive->dn > 1)
 		pci_dev_put(dev);
 }
 
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 4a697a2..8716066 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -521,8 +521,8 @@
 	if (ec->dma != NO_DMA && !request_dma(ec->dma, DRV_NAME)) {
 		d.init_dma = icside_dma_init;
 		d.port_ops = &icside_v6_port_ops;
+	} else
 		d.dma_ops = NULL;
-	}
 
 	ret = ide_host_register(host, &d, hws);
 	if (ret)
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
index b59d04c..1892e81 100644
--- a/drivers/ide/piix.c
+++ b/drivers/ide/piix.c
@@ -331,7 +331,7 @@
 		.udma_mask	= udma,			\
 	}
 
-#define DECLARE_ICH_DEV(udma) \
+#define DECLARE_ICH_DEV(mwdma, udma) \
 	{ \
 		.name		= DRV_NAME, \
 		.init_chipset	= init_chipset_ich, \
@@ -340,7 +340,7 @@
 		.port_ops	= &ich_port_ops, \
 		.pio_mask	= ATA_PIO4, \
 		.swdma_mask	= ATA_SWDMA2_ONLY, \
-		.mwdma_mask	= ATA_MWDMA12_ONLY, \
+		.mwdma_mask	= mwdma, \
 		.udma_mask	= udma, \
 	}
 
@@ -362,13 +362,15 @@
 	/* 2: PIIX4 */
 	DECLARE_PIIX_DEV(ATA_UDMA2),
 	/* 3: ICH0 */
-	DECLARE_ICH_DEV(ATA_UDMA2),
+	DECLARE_ICH_DEV(ATA_MWDMA12_ONLY, ATA_UDMA2),
 	/* 4: ICH */
-	DECLARE_ICH_DEV(ATA_UDMA4),
+	DECLARE_ICH_DEV(ATA_MWDMA12_ONLY, ATA_UDMA4),
 	/* 5: PIIX4 */
 	DECLARE_PIIX_DEV(ATA_UDMA4),
-	/* 6: ICH[2-7]/ICH[2-3]M/C-ICH/ICH5-SATA/ESB2/ICH8M */
-	DECLARE_ICH_DEV(ATA_UDMA5),
+	/* 6: ICH[2-6]/ICH[2-3]M/C-ICH/ICH5-SATA/ESB2/ICH8M */
+	DECLARE_ICH_DEV(ATA_MWDMA12_ONLY, ATA_UDMA5),
+	/* 7: ICH7/7-R, no MWDMA1 */
+	DECLARE_ICH_DEV(ATA_MWDMA2_ONLY, ATA_UDMA5),
 };
 
 /**
@@ -438,9 +440,9 @@
 #endif
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ESB_2),      6 },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH6_19),    6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH7_21),    6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH7_21),    7 },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801DB_1),  6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ESB2_18),    6 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ESB2_18),    7 },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH8_6),     6 },
 	{ 0, },
 };
diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c
index e53a1b7..281c914 100644
--- a/drivers/ide/triflex.c
+++ b/drivers/ide/triflex.c
@@ -113,12 +113,26 @@
 };
 MODULE_DEVICE_TABLE(pci, triflex_pci_tbl);
 
+#ifdef CONFIG_PM
+static int triflex_ide_pci_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	/*
+	 * We must not disable or powerdown the device.
+	 * APM bios refuses to suspend if IDE is not accessible.
+	 */
+	pci_save_state(dev);
+	return 0;
+}
+#else
+#define triflex_ide_pci_suspend NULL
+#endif
+
 static struct pci_driver triflex_pci_driver = {
 	.name		= "TRIFLEX_IDE",
 	.id_table	= triflex_pci_tbl,
 	.probe		= triflex_init_one,
 	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
+	.suspend	= triflex_ide_pci_suspend,
 	.resume		= ide_pci_resume,
 };