[SPARC64]: Use ASI_SCRATCHPAD address 0x0 properly.

This is where the virtual address of the fault status
area belongs.

To set it up we don't make a hypervisor call, instead
we call OBP's SUNW,set-trap-table with the real address
of the fault status area as the second argument.  And
right before that call we write the virtual address into
ASI_SCRATCHPAD vaddr 0x0.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index d048f0d..f581f0e 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -521,11 +521,36 @@
 	wrpr	%g0, 15, %pil
 
 	/* Make the firmware call to jump over to the Linux trap table.  */
-	call	prom_set_trap_table
+	sethi	%hi(is_sun4v), %o0
+	lduw	[%o0 + %lo(is_sun4v)], %o0
+	brz,pt	%o0, 1f
+	 nop
+
+	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+	add	%g2, TRAP_PER_CPU_FAULT_INFO, %g2
+	stxa	%g2, [%g0] ASI_SCRATCHPAD
+
+	/* Compute physical address:
+	 *
+	 * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
+	 */
+	sethi	%hi(KERNBASE), %g3
+	sub	%g2, %g3, %g2
+	sethi	%hi(kern_base), %g3
+	ldx	[%g3 + %lo(kern_base)], %g3
+	add	%g2, %g3, %o1
+
+	call	prom_set_trap_table_sun4v
+	 sethi	%hi(sparc64_ttable_tl0), %o0
+
+	ba,pt	%xcc, 2f
+	 nop
+
+1:	call	prom_set_trap_table
 	 sethi	%hi(sparc64_ttable_tl0), %o0
 
 	/* Start using proper page size encodings in ctx register.  */
-	sethi	%hi(sparc64_kern_pri_context), %g3
+2:	sethi	%hi(sparc64_kern_pri_context), %g3
 	ldx	[%g3 + %lo(sparc64_kern_pri_context)], %g2
 
 	mov		PRIMARY_CONTEXT, %g1
diff --git a/arch/sparc64/kernel/sun4v_ivec.S b/arch/sparc64/kernel/sun4v_ivec.S
index d9d4420..c0367ef 100644
--- a/arch/sparc64/kernel/sun4v_ivec.S
+++ b/arch/sparc64/kernel/sun4v_ivec.S
@@ -22,11 +22,8 @@
 	 nop
 
 	/* Get &trap_block[smp_processor_id()] into %g3.  */
-	__GET_CPUID(%g1)
-	sethi	%hi(trap_block), %g3
-	sllx	%g1, TRAP_BLOCK_SZ_SHIFT, %g7
-	or	%g3, %lo(trap_block), %g3
-	add	%g3, %g7, %g3
+	ldxa	[%g0] ASI_SCRATCHPAD, %g3
+	sub	%g3, TRAP_PER_CPU_FAULT_INFO, %g3
 
 	/* Get CPU mondo queue base phys address into %g7.  */
 	ldx	[%g3 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
@@ -74,11 +71,8 @@
 	 nop
 
 	/* Get &trap_block[smp_processor_id()] into %g3.  */
-	__GET_CPUID(%g1)
-	sethi	%hi(trap_block), %g3
-	sllx	%g1, TRAP_BLOCK_SZ_SHIFT, %g7
-	or	%g3, %lo(trap_block), %g3
-	add	%g3, %g7, %g3
+	ldxa	[%g0] ASI_SCRATCHPAD, %g3
+	sub	%g3, TRAP_PER_CPU_FAULT_INFO, %g3
 
 	/* Get DEV mondo queue base phys address into %g5.  */
 	ldx	[%g3 + TRAP_PER_CPU_DEV_MONDO_PA], %g5
@@ -143,11 +137,8 @@
 	 nop
 
 	/* Get &trap_block[smp_processor_id()] into %g3.  */
-	__GET_CPUID(%g1)
-	sethi	%hi(trap_block), %g3
-	sllx	%g1, TRAP_BLOCK_SZ_SHIFT, %g7
-	or	%g3, %lo(trap_block), %g3
-	add	%g3, %g7, %g3
+	ldxa	[%g0] ASI_SCRATCHPAD, %g3
+	sub	%g3, TRAP_PER_CPU_FAULT_INFO, %g3
 
 	/* Get RES mondo queue base phys address into %g5.  */
 	ldx	[%g3 + TRAP_PER_CPU_RESUM_MONDO_PA], %g5
@@ -251,11 +242,8 @@
 	 nop
 
 	/* Get &trap_block[smp_processor_id()] into %g3.  */
-	__GET_CPUID(%g1)
-	sethi	%hi(trap_block), %g3
-	sllx	%g1, TRAP_BLOCK_SZ_SHIFT, %g7
-	or	%g3, %lo(trap_block), %g3
-	add	%g3, %g7, %g3
+	ldxa	[%g0] ASI_SCRATCHPAD, %g3
+	sub	%g3, TRAP_PER_CPU_FAULT_INFO, %g3
 
 	/* Get RES mondo queue base phys address into %g5.  */
 	ldx	[%g3 + TRAP_PER_CPU_NONRESUM_MONDO_PA], %g5
diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S
index c408b05..f622262 100644
--- a/arch/sparc64/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc64/kernel/sun4v_tlb_miss.S
@@ -7,26 +7,20 @@
 	.align	32
 
 sun4v_itlb_miss:
-	/* Load CPU ID into %g3.  */
-	mov	SCRATCHPAD_CPUID, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g3
+	/* Load MMU Miss base into %g2.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g3
 	
 	/* Load UTSB reg into %g1.  */
-	ldxa	[%g1 + %g1] ASI_SCRATCHPAD, %g1
-
-	/* Load &trap_block[smp_processor_id()] into %g2.  */
-	sethi	%hi(trap_block), %g2
-	or	%g2, %lo(trap_block), %g2
-	sllx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-	add	%g2, %g3, %g2
+	mov	SCRATCHPAD_UTSBREG1, %g1
+	ldxa	[%g1] ASI_SCRATCHPAD, %g1
 
 	/* Create a TAG TARGET, "(vaddr>>22) | (ctx << 48)", in %g6.
 	 * Branch if kernel TLB miss.  The kernel TSB and user TSB miss
 	 * code wants the missing virtual address in %g4, so that value
 	 * cannot be modified through the entirety of this handler.
 	 */
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_ADDR_OFFSET], %g4
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_CTX_OFFSET], %g5
+	ldx	[%g2 + HV_FAULT_I_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_I_CTX_OFFSET], %g5
 	srlx	%g4, 22, %g3
 	sllx	%g5, 48, %g6
 	or	%g6, %g3, %g6
@@ -90,26 +84,20 @@
 	retry
 
 sun4v_dtlb_miss:
-	/* Load CPU ID into %g3.  */
-	mov	SCRATCHPAD_CPUID, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g3
+	/* Load MMU Miss base into %g2.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
 	
 	/* Load UTSB reg into %g1.  */
+	mov	SCRATCHPAD_UTSBREG1, %g1
 	ldxa	[%g1 + %g1] ASI_SCRATCHPAD, %g1
 
-	/* Load &trap_block[smp_processor_id()] into %g2.  */
-	sethi	%hi(trap_block), %g2
-	or	%g2, %lo(trap_block), %g2
-	sllx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-	add	%g2, %g3, %g2
-
 	/* Create a TAG TARGET, "(vaddr>>22) | (ctx << 48)", in %g6.
 	 * Branch if kernel TLB miss.  The kernel TSB and user TSB miss
 	 * code wants the missing virtual address in %g4, so that value
 	 * cannot be modified through the entirety of this handler.
 	 */
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
 	srlx	%g4, 22, %g3
 	sllx	%g5, 48, %g6
 	or	%g6, %g3, %g6
@@ -169,17 +157,10 @@
 	retry
 
 sun4v_dtlb_prot:
-	/* Load CPU ID into %g3.  */
-	mov	SCRATCHPAD_CPUID, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g3
+	/* Load MMU Miss base into %g2.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
 	
-	/* Load &trap_block[smp_processor_id()] into %g2.  */
-	sethi	%hi(trap_block), %g2
-	or	%g2, %lo(trap_block), %g2
-	sllx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-	add	%g2, %g3, %g2
-
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g5
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g5
 	rdpr	%tl, %g1
 	cmp	%g1, 1
 	bgu,pn		%xcc, winfix_trampoline
@@ -187,35 +168,17 @@
 	ba,pt		%xcc, sparc64_realfault_common
 	 mov		FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
 
-	/* Called from trap table with &trap_block[smp_processor_id()] in
-	 * %g5 and SCRATCHPAD_UTSBREG1 contents in %g1.
+	/* Called from trap table with TAG TARGET placed into
+	 * %g6 and SCRATCHPAD_UTSBREG1 contents in %g1.
 	 */
 sun4v_itsb_miss:
-	ldx	[%g5 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_ADDR_OFFSET], %g4
-	ldx	[%g5 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_CTX_OFFSET], %g5
-
-	srlx	%g4, 22, %g7
-	sllx	%g5, 48, %g6
-	or	%g6, %g7, %g6
-	brz,pn	%g5, kvmap_itlb_4v
-	 nop
-
 	ba,pt	%xcc, sun4v_tsb_miss_common
 	 mov	FAULT_CODE_ITLB, %g3
 
-	/* Called from trap table with &trap_block[smp_processor_id()] in
-	 * %g5 and SCRATCHPAD_UTSBREG1 contents in %g1.
+	/* Called from trap table with TAG TARGET placed into
+	 * %g6 and SCRATCHPAD_UTSBREG1 contents in %g1.
 	 */
 sun4v_dtsb_miss:
-	ldx	[%g5 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g5 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5
-
-	srlx	%g4, 22, %g7
-	sllx	%g5, 48, %g6
-	or	%g6, %g7, %g6
-	brz,pn	%g5, kvmap_dtlb_4v
-	 nop
-
 	mov	FAULT_CODE_DTLB, %g3
 
 	/* Create TSB pointer into %g1.  This is something like:
@@ -239,15 +202,10 @@
 
 	/* Instruction Access Exception, tl0. */
 sun4v_iacc:
-	mov	SCRATCHPAD_CPUID, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g3
-	sethi	%hi(trap_block), %g2
-	or	%g2, %lo(trap_block), %g2
-	sllx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-	add	%g2, %g3, %g2
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_TYPE_OFFSET], %g3
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_ADDR_OFFSET], %g4
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_CTX_OFFSET], %g5
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_I_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_I_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_I_CTX_OFFSET], %g5
 	sllx	%g3, 16, %g3
 	or	%g5, %g3, %g5
 	ba,pt	%xcc, etrap
@@ -260,15 +218,10 @@
 
 	/* Instruction Access Exception, tl1. */
 sun4v_iacc_tl1:
-	mov	SCRATCHPAD_CPUID, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g3
-	sethi	%hi(trap_block), %g2
-	or	%g2, %lo(trap_block), %g2
-	sllx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-	add	%g2, %g3, %g2
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_TYPE_OFFSET], %g3
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_ADDR_OFFSET], %g4
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_CTX_OFFSET], %g5
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_I_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_I_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_I_CTX_OFFSET], %g5
 	sllx	%g3, 16, %g3
 	or	%g5, %g3, %g5
 	ba,pt	%xcc, etraptl1
@@ -281,15 +234,10 @@
 
 	/* Data Access Exception, tl0. */
 sun4v_dacc:
-	mov	SCRATCHPAD_CPUID, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g3
-	sethi	%hi(trap_block), %g2
-	or	%g2, %lo(trap_block), %g2
-	sllx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-	add	%g2, %g3, %g2
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_TYPE_OFFSET], %g3
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
 	sllx	%g3, 16, %g3
 	or	%g5, %g3, %g5
 	ba,pt	%xcc, etrap
@@ -302,15 +250,10 @@
 
 	/* Data Access Exception, tl1. */
 sun4v_dacc_tl1:
-	mov	SCRATCHPAD_CPUID, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g3
-	sethi	%hi(trap_block), %g2
-	or	%g2, %lo(trap_block), %g2
-	sllx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-	add	%g2, %g3, %g2
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_TYPE_OFFSET], %g3
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
 	sllx	%g3, 16, %g3
 	or	%g5, %g3, %g5
 	ba,pt	%xcc, etraptl1
@@ -323,15 +266,10 @@
 
 	/* Memory Address Unaligned.  */
 sun4v_mna:
-	mov	SCRATCHPAD_CPUID, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g3
-	sethi	%hi(trap_block), %g2
-	or	%g2, %lo(trap_block), %g2
-	sllx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-	add	%g2, %g3, %g2
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
 	mov	HV_FAULT_TYPE_UNALIGNED, %g3
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
 	sllx	%g3, 16, %g3
 	or	%g5, %g3, %g5
 
@@ -359,15 +297,10 @@
 
 	/* Unaligned ldd float, tl0. */
 sun4v_lddfmna:
-	mov	SCRATCHPAD_CPUID, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g3
-	sethi	%hi(trap_block), %g2
-	or	%g2, %lo(trap_block), %g2
-	sllx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-	add	%g2, %g3, %g2
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_TYPE_OFFSET], %g3
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
 	sllx	%g3, 16, %g3
 	or	%g5, %g3, %g5
 	ba,pt	%xcc, etrap
@@ -380,15 +313,10 @@
 
 	/* Unaligned std float, tl0. */
 sun4v_stdfmna:
-	mov	SCRATCHPAD_CPUID, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g3
-	sethi	%hi(trap_block), %g2
-	or	%g2, %lo(trap_block), %g2
-	sllx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-	add	%g2, %g3, %g2
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_TYPE_OFFSET], %g3
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
 	sllx	%g3, 16, %g3
 	or	%g5, %g3, %g5
 	ba,pt	%xcc, etrap
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index c476f5b..8838220 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -389,10 +389,35 @@
 	or		%o1, PSTATE_IE, %o1
 	wrpr		%o1, 0, %pstate
 
-	call		prom_set_trap_table
+	sethi		%hi(is_sun4v), %o0
+	lduw		[%o0 + %lo(is_sun4v)], %o0
+	brz,pt		%o0, 1f
+	 nop
+
+	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+	add		%g2, TRAP_PER_CPU_FAULT_INFO, %g2
+	stxa		%g2, [%g0] ASI_SCRATCHPAD
+
+	/* Compute physical address:
+	 *
+	 * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
+	 */
+	sethi		%hi(KERNBASE), %g3
+	sub		%g2, %g3, %g2
+	sethi		%hi(kern_base), %g3
+	ldx		[%g3 + %lo(kern_base)], %g3
+	add		%g2, %g3, %o1
+
+	call		prom_set_trap_table_sun4v
 	 sethi		%hi(sparc64_ttable_tl0), %o0
 
-	call		smp_callin
+	ba,pt		%xcc, 2f
+	 nop
+
+1:	call		prom_set_trap_table
+	 sethi		%hi(sparc64_ttable_tl0), %o0
+
+2:	call		smp_callin
 	 nop
 	call		cpu_idle
 	 mov		0, %o0
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 7faba33..88eb6f6 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1109,24 +1109,6 @@
 	}
 }
 
-/* Register this cpu's fault status area with the hypervisor.  */
-void __cpuinit sun4v_register_fault_status(void)
-{
-	register unsigned long func asm("%o5");
-	register unsigned long arg0 asm("%o0");
-	int cpu = hard_smp_processor_id();
-	struct trap_per_cpu *tb = &trap_block[cpu];
-	unsigned long pa;
-
-	pa = kern_base + ((unsigned long) tb - KERNBASE);
-	func = HV_FAST_MMU_FAULT_AREA_CONF;
-	arg0 = pa;
-	__asm__ __volatile__("ta	%4"
-			     : "=&r" (func), "=&r" (arg0)
-			     : "0" (func), "1" (arg0),
-			       "i" (HV_FAST_TRAP));
-}
-
 /* paging_init() sets up the page tables */
 
 extern void cheetah_ecache_flush_init(void);
@@ -1147,10 +1129,8 @@
 	    tlb_type == hypervisor)
 		tsb_phys_patch();
 
-	if (tlb_type == hypervisor) {
+	if (tlb_type == hypervisor)
 		sun4v_patch_tlb_handlers();
-		sun4v_register_fault_status();
-	}
 
 	/* Find available physical memory... */
 	read_obp_memory("available", &pavail[0], &pavail_ents);
diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c
index 87f5cfc..713cbac 100644
--- a/arch/sparc64/prom/misc.c
+++ b/arch/sparc64/prom/misc.c
@@ -136,6 +136,11 @@
 	p1275_cmd("SUNW,set-trap-table", P1275_INOUT(1, 0), tba);
 }
 
+void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa)
+{
+	p1275_cmd("SUNW,set-trap-table", P1275_INOUT(2, 0), tba, mmfsa);
+}
+
 int prom_get_mmu_ihandle(void)
 {
 	int node, ret;
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index 338b0ca..5a970f5 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -156,13 +156,16 @@
 	nop;						\
 	.previous;
 
-/* Clobbers TMP, current address space PGD phys address into DEST.  */
-#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
 	__GET_CPUID(TMP)			\
 	sethi	%hi(trap_block), DEST;		\
 	sllx	TMP, TRAP_BLOCK_SZ_SHIFT, TMP;	\
 	or	DEST, %lo(trap_block), DEST;	\
 	add	DEST, TMP, DEST;		\
+
+/* Clobbers TMP, current address space PGD phys address into DEST.  */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
 	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
 
 /* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
@@ -175,11 +178,8 @@
 
 /* Clobbers TMP, loads DEST with current thread info pointer.  */
 #define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
-	__GET_CPUID(TMP)			\
-	sethi	%hi(trap_block), DEST;		\
-	sllx	TMP, TRAP_BLOCK_SZ_SHIFT, TMP;	\
-	or	DEST, %lo(trap_block), DEST;	\
-	ldx	[DEST + TMP], DEST;
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_THREAD], DEST;
 
 /* Given the current thread info pointer in THR, load the per-cpu
  * area base of the current processor into DEST.  REG1, REG2, and REG3 are
@@ -201,13 +201,13 @@
 
 #else
 
-#define __GET_CPUID(REG)				\
-	mov	0, REG;
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	sethi	%hi(trap_block), DEST;		\
+	or	DEST, %lo(trap_block), DEST;	\
 
 /* Uniprocessor versions, we know the cpuid is zero.  */
 #define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
-	sethi	%hi(trap_block), DEST;		\
-	or	DEST, %lo(trap_block), DEST;	\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
 	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
 
 #define TRAP_LOAD_IRQ_WORK(DEST, TMP)		\
@@ -215,8 +215,8 @@
 	or	DEST, %lo(__irq_work), DEST;
 
 #define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
-	sethi	%hi(trap_block), DEST;		\
-	ldx	[DEST + %lo(trap_block)], DEST;
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_THREAD], DEST;
 
 /* No per-cpu areas on uniprocessor, so no need to load DEST.  */
 #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)
diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h
index 2ea545b..ce5066e 100644
--- a/include/asm-sparc64/oplib.h
+++ b/include/asm-sparc64/oplib.h
@@ -338,6 +338,7 @@
 
 /* Client interface level routines. */
 extern void prom_set_trap_table(unsigned long tba);
+extern void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa);
 
 extern long p1275_cmd(const char *, long, ...);
 				   
diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h
index 972f913..6bb86a7 100644
--- a/include/asm-sparc64/ttable.h
+++ b/include/asm-sparc64/ttable.h
@@ -180,25 +180,25 @@
 #define KPROBES_TRAP(lvl) TRAP_ARG(bad_trap, lvl)
 #endif
 
-#define SUN4V_ITSB_MISS				\
-	mov	SCRATCHPAD_CPUID, %g1;		\
-	ldxa	[%g1] ASI_SCRATCHPAD, %g2;	\
-	ldxa	[%g1 + %g1] ASI_SCRATCHPAD, %g1;\
-	sethi	%hi(trap_block), %g5;		\
-	sllx	%g2, TRAP_BLOCK_SZ_SHIFT, %g2;	\
-	or	%g5, %lo(trap_block), %g5;	\
-	ba,pt	%xcc, sun4v_itsb_miss;		\
-	 add	%g5, %g2, %g5;
+#define SUN4V_ITSB_MISS					\
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2;		\
+	ldx	[%g2 + HV_FAULT_I_ADDR_OFFSET], %g4;	\
+	ldx	[%g2 + HV_FAULT_I_CTX_OFFSET], %g5;	\
+	srlx	%g4, 22, %g7;				\
+	sllx	%g5, 48, %g6;				\
+	brz,pn	%g5, kvmap_itlb_4v;			\
+	 or	%g6, %g7, %g6;				\
+	ba,a,pt	%xcc, sun4v_itsb_miss;
 
 #define SUN4V_DTSB_MISS				\
-	mov	SCRATCHPAD_CPUID, %g1;		\
-	ldxa	[%g1] ASI_SCRATCHPAD, %g2;	\
-	ldxa	[%g1 + %g1] ASI_SCRATCHPAD, %g1;\
-	sethi	%hi(trap_block), %g5;		\
-	sllx	%g2, TRAP_BLOCK_SZ_SHIFT, %g2;	\
-	or	%g5, %lo(trap_block), %g5;	\
-	ba,pt	%xcc, sun4v_dtsb_miss;		\
-	 add	%g5, %g2, %g5;
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2;		\
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4;	\
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5;	\
+	srlx	%g4, 22, %g7;				\
+	sllx	%g5, 48, %g6;				\
+	brz,pn	%g5, kvmap_dtlb_4v;			\
+	 or	%g6, %g7, %g6;				\
+	ba,a,pt	%xcc, sun4v_dtsb_miss;
 
 /* Before touching these macros, you owe it to yourself to go and
  * see how arch/sparc64/kernel/winfixup.S works... -DaveM