[SPARC64]: Use ASI_SCRATCHPAD address 0x0 properly.

This is where the virtual address of the fault status
area belongs.

To set it up we don't make a hypervisor call, instead
we call OBP's SUNW,set-trap-table with the real address
of the fault status area as the second argument.  And
right before that call we write the virtual address into
ASI_SCRATCHPAD vaddr 0x0.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index 338b0ca..5a970f5 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -156,13 +156,16 @@
 	nop;						\
 	.previous;
 
-/* Clobbers TMP, current address space PGD phys address into DEST.  */
-#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
 	__GET_CPUID(TMP)			\
 	sethi	%hi(trap_block), DEST;		\
 	sllx	TMP, TRAP_BLOCK_SZ_SHIFT, TMP;	\
 	or	DEST, %lo(trap_block), DEST;	\
 	add	DEST, TMP, DEST;		\
+
+/* Clobbers TMP, current address space PGD phys address into DEST.  */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
 	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
 
 /* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
@@ -175,11 +178,8 @@
 
 /* Clobbers TMP, loads DEST with current thread info pointer.  */
 #define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
-	__GET_CPUID(TMP)			\
-	sethi	%hi(trap_block), DEST;		\
-	sllx	TMP, TRAP_BLOCK_SZ_SHIFT, TMP;	\
-	or	DEST, %lo(trap_block), DEST;	\
-	ldx	[DEST + TMP], DEST;
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_THREAD], DEST;
 
 /* Given the current thread info pointer in THR, load the per-cpu
  * area base of the current processor into DEST.  REG1, REG2, and REG3 are
@@ -201,13 +201,13 @@
 
 #else
 
-#define __GET_CPUID(REG)				\
-	mov	0, REG;
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	sethi	%hi(trap_block), DEST;		\
+	or	DEST, %lo(trap_block), DEST;	\
 
 /* Uniprocessor versions, we know the cpuid is zero.  */
 #define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
-	sethi	%hi(trap_block), DEST;		\
-	or	DEST, %lo(trap_block), DEST;	\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
 	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
 
 #define TRAP_LOAD_IRQ_WORK(DEST, TMP)		\
@@ -215,8 +215,8 @@
 	or	DEST, %lo(__irq_work), DEST;
 
 #define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
-	sethi	%hi(trap_block), DEST;		\
-	ldx	[DEST + %lo(trap_block)], DEST;
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_THREAD], DEST;
 
 /* No per-cpu areas on uniprocessor, so no need to load DEST.  */
 #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)