sparc64: Stop using memory barriers for atomics and locks.

The kernel always executes in the TSO memory model now,
so none of this stuff is necessary any more.

With helpful feedback from Nick Piggin.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index f500b06..c6d0636 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -163,7 +163,7 @@
 	for (i = 0; i < NUM_ITERS; i++) {
 		t0 = tick_ops->get_tick();
 		go[MASTER] = 1;
-		membar_storeload();
+		membar_safe("#StoreLoad");
 		while (!(tm = go[SLAVE]))
 			rmb();
 		go[SLAVE] = 0;
@@ -257,7 +257,7 @@
 
 	/* now let the client proceed into his loop */
 	go[MASTER] = 0;
-	membar_storeload();
+	membar_safe("#StoreLoad");
 
 	spin_lock_irqsave(&itc_sync_lock, flags);
 	{
@@ -267,7 +267,7 @@
 			go[MASTER] = 0;
 			wmb();
 			go[SLAVE] = tick_ops->get_tick();
-			membar_storeload();
+			membar_safe("#StoreLoad");
 		}
 	}
 	spin_unlock_irqrestore(&itc_sync_lock, flags);
@@ -1122,7 +1122,6 @@
 		       smp_processor_id());
 #endif
 		penguins_are_doing_time = 1;
-		membar_storestore_loadstore();
 		atomic_inc(&smp_capture_registry);
 		smp_cross_call(&xcall_capture, 0, 0, 0);
 		while (atomic_read(&smp_capture_registry) != ncpus)
@@ -1142,7 +1141,7 @@
 		       smp_processor_id());
 #endif
 		penguins_are_doing_time = 0;
-		membar_storeload_storestore();
+		membar_safe("#StoreLoad");
 		atomic_dec(&smp_capture_registry);
 	}
 }
@@ -1161,7 +1160,7 @@
 	__asm__ __volatile__("flushw");
 	prom_world(1);
 	atomic_inc(&smp_capture_registry);
-	membar_storeload_storestore();
+	membar_safe("#StoreLoad");
 	while (penguins_are_doing_time)
 		rmb();
 	atomic_dec(&smp_capture_registry);
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 83abd5a..da1b781 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -109,7 +109,6 @@
 	 */
 	sethi		%hi(prom_entry_lock), %g2
 1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
-	membar		#StoreLoad | #StoreStore
 	brnz,pn		%g1, 1b
 	 nop
 
@@ -214,7 +213,6 @@
 
 	sethi		%hi(prom_entry_lock), %g2
 	stb		%g0, [%g2 + %lo(prom_entry_lock)]
-	membar		#StoreStore | #StoreLoad
 
 	ba,pt		%xcc, after_lock_tlb
 	 nop
@@ -330,7 +328,6 @@
 
 	sethi		%hi(prom_entry_lock), %g2
 1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
-	membar		#StoreLoad | #StoreStore
 	brnz,pn		%g1, 1b
 	 nop
 
@@ -394,7 +391,6 @@
 
 3:	sethi		%hi(prom_entry_lock), %g2
 	stb		%g0, [%g2 + %lo(prom_entry_lock)]
-	membar		#StoreStore | #StoreLoad
 
 	ldx		[%l0], %g6
 	ldx		[%g6 + TI_TASK], %g4
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 81ccd22..04994fc 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -1371,7 +1371,6 @@
 	__asm__ __volatile__("ldxa	[%0] %3, %%g0\n\t"
 			     "ldxa	[%1] %3, %%g0\n\t"
 			     "casxa	[%2] %3, %%g0, %%g0\n\t"
-			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "ldxa	[%0] %3, %%g0\n\t"
 			     "ldxa	[%1] %3, %%g0\n\t"
 			     "membar	#Sync"
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index c499214..8c91d9b 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -317,7 +317,7 @@
 	srlx	%g1, 32, %o3
 	andcc	%o3, %g2, %g0
 	bne,pn	%icc, 1b
-	 membar	#LoadLoad
+	 nop
 	cmp	%g1, %o1
 	mov	1, %o3
 	bne,pt	%xcc, 2f
@@ -327,7 +327,7 @@
 	bne,pn	%xcc, 1b
 	 nop
 2:	retl
-	 TSB_MEMBAR
+	 nop
 	.size	tsb_flush, .-tsb_flush
 
 	/* Reload MMU related context switch state at
@@ -478,7 +478,7 @@
 	 nop
 
 	retl
-	 TSB_MEMBAR
+	 nop
 	.size		copy_tsb, .-copy_tsb
 
 	/* Set the invalid bit in all TSB entries.  */
diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S
index 70ac418..0268210 100644
--- a/arch/sparc64/lib/atomic.S
+++ b/arch/sparc64/lib/atomic.S
@@ -43,29 +43,10 @@
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
 	.size	atomic_sub, .-atomic_sub
 
-	/* On SMP we need to use memory barriers to ensure
-	 * correct memory operation ordering, nop these out
-	 * for uniprocessor.
-	 */
-#ifdef CONFIG_SMP
-
-#define ATOMIC_PRE_BARRIER	membar #StoreLoad | #LoadLoad;
-#define ATOMIC_POST_BARRIER	\
-	ba,pt %xcc, 80b;	\
-	membar #StoreLoad | #StoreStore
-
-80:	retl
-	 nop
-#else
-#define ATOMIC_PRE_BARRIER
-#define ATOMIC_POST_BARRIER
-#endif
-
 	.globl	atomic_add_ret
 	.type	atomic_add_ret,#function
 atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
-	ATOMIC_PRE_BARRIER
 1:	lduw	[%o1], %g1
 	add	%g1, %o0, %g7
 	cas	[%o1], %g1, %g7
@@ -73,7 +54,6 @@
 	bne,pn	%icc, 2f
 	 add	%g7, %o0, %g7
 	sra	%g7, 0, %o0
-	ATOMIC_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
@@ -83,7 +63,6 @@
 	.type	atomic_sub_ret,#function
 atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
-	ATOMIC_PRE_BARRIER
 1:	lduw	[%o1], %g1
 	sub	%g1, %o0, %g7
 	cas	[%o1], %g1, %g7
@@ -91,7 +70,6 @@
 	bne,pn	%icc, 2f
 	 sub	%g7, %o0, %g7
 	sra	%g7, 0, %o0
-	ATOMIC_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
@@ -131,7 +109,6 @@
 	.type	atomic64_add_ret,#function
 atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
-	ATOMIC_PRE_BARRIER
 1:	ldx	[%o1], %g1
 	add	%g1, %o0, %g7
 	casx	[%o1], %g1, %g7
@@ -139,7 +116,6 @@
 	bne,pn	%xcc, 2f
 	 add	%g7, %o0, %g7
 	mov	%g7, %o0
-	ATOMIC_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
@@ -149,7 +125,6 @@
 	.type	atomic64_sub_ret,#function
 atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
-	ATOMIC_PRE_BARRIER
 1:	ldx	[%o1], %g1
 	sub	%g1, %o0, %g7
 	casx	[%o1], %g1, %g7
@@ -157,7 +132,6 @@
 	bne,pn	%xcc, 2f
 	 sub	%g7, %o0, %g7
 	mov	%g7, %o0
-	ATOMIC_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S
index 6b015a6..2b7228c 100644
--- a/arch/sparc64/lib/bitops.S
+++ b/arch/sparc64/lib/bitops.S
@@ -8,29 +8,10 @@
 
 	.text
 
-	/* On SMP we need to use memory barriers to ensure
-	 * correct memory operation ordering, nop these out
-	 * for uniprocessor.
-	 */
-
-#ifdef CONFIG_SMP
-#define BITOP_PRE_BARRIER	membar #StoreLoad | #LoadLoad
-#define BITOP_POST_BARRIER	\
-	ba,pt	%xcc, 80b;	\
-	membar #StoreLoad | #StoreStore
-
-80:	retl
-	 nop
-#else
-#define BITOP_PRE_BARRIER
-#define BITOP_POST_BARRIER
-#endif
-
 	.globl	test_and_set_bit
 	.type	test_and_set_bit,#function
 test_and_set_bit:	/* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
-	BITOP_PRE_BARRIER
 	srlx	%o0, 6, %g1
 	mov	1, %o2
 	sllx	%g1, 3, %g3
@@ -45,7 +26,6 @@
 	 and	%g7, %o2, %g2
 	clr	%o0
 	movrne	%g2, 1, %o0
-	BITOP_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
@@ -55,7 +35,6 @@
 	.type	test_and_clear_bit,#function
 test_and_clear_bit:	/* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
-	BITOP_PRE_BARRIER
 	srlx	%o0, 6, %g1
 	mov	1, %o2
 	sllx	%g1, 3, %g3
@@ -70,7 +49,6 @@
 	 and	%g7, %o2, %g2
 	clr	%o0
 	movrne	%g2, 1, %o0
-	BITOP_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
@@ -80,7 +58,6 @@
 	.type	test_and_change_bit,#function
 test_and_change_bit:	/* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
-	BITOP_PRE_BARRIER
 	srlx	%o0, 6, %g1
 	mov	1, %o2
 	sllx	%g1, 3, %g3
@@ -95,7 +72,6 @@
 	 and	%g7, %o2, %g2
 	clr	%o0
 	movrne	%g2, 1, %o0
-	BITOP_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S
index 1a4cc56..91a7d29 100644
--- a/arch/sparc64/lib/rwsem.S
+++ b/arch/sparc64/lib/rwsem.S
@@ -17,7 +17,6 @@
 	bne,pn		%icc, 1b
 	 add		%g7, 1, %g7
 	cmp		%g7, 0
-	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	 nop
 2:
@@ -42,7 +41,6 @@
 	cmp		%g1, %g7
 	bne,pn		%icc, 1b
 	 mov		1, %o1
-	membar		#StoreLoad | #StoreStore
 2:	retl
 	 mov		%o1, %o0
 	.size		__down_read_trylock, .-__down_read_trylock
@@ -58,7 +56,6 @@
 	cmp		%g3, %g7
 	bne,pn		%icc, 1b
 	 cmp		%g7, 0
-	membar		#StoreLoad | #StoreStore
 	bne,pn		%icc, 3f
 	 nop
 2:	retl
@@ -85,7 +82,6 @@
 	cmp		%g3, %g7
 	bne,pn		%icc, 1b
 	 mov		1, %o1
-	membar		#StoreLoad | #StoreStore
 2:	retl
 	 mov		%o1, %o0
 	.size		__down_write_trylock, .-__down_write_trylock
@@ -99,7 +95,6 @@
 	cmp		%g1, %g7
 	bne,pn		%icc, 1b
 	 cmp		%g7, 0
-	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	 nop
 2:	retl
@@ -129,7 +124,6 @@
 	bne,pn		%icc, 1b
 	 sub		%g7, %g1, %g7
 	cmp		%g7, 0
-	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	 nop
 2:
@@ -155,7 +149,6 @@
 	bne,pn		%icc, 1b
 	 sub		%g7, %g1, %g7
 	cmp		%g7, 0
-	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	 nop
 2:
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 185f346..4bd6396 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -214,7 +214,6 @@
 			     "or	%%g1, %0, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
-			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
 			     " nop"
 			     : /* no outputs */
@@ -236,7 +235,6 @@
 			     " andn	%%g7, %1, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
-			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
 			     " nop\n"
 			     "2:"
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index 587f8ef..f0282fa 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -41,10 +41,8 @@
 					      KERNEL_TSB_NENTRIES);
 		struct tsb *ent = &swapper_tsb[hash];
 
-		if (tag_compare(ent->tag, v)) {
+		if (tag_compare(ent->tag, v))
 			ent->tag = (1UL << TSB_TAG_INVALID_BIT);
-			membar_storeload_storestore();
-		}
 	}
 }
 
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index 86773e8..e4c146f 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -125,7 +125,6 @@
 	.align		32
 	.globl		__flush_icache_page
 __flush_icache_page:	/* %o0 = phys_page */
-	membar		#StoreStore
 	srlx		%o0, PAGE_SHIFT, %o0
 	sethi		%uhi(PAGE_OFFSET), %g1
 	sllx		%o0, PAGE_SHIFT, %o0
@@ -507,7 +506,6 @@
 	sllx		%g2, TRAP_BLOCK_SZ_SHIFT, %g2
 	add		%g7, %g2, %g7
 	ldx		[%g7 + TRAP_PER_CPU_THREAD], %g3
-	membar		#StoreStore
 	stx		%g3, [%g1 + GR_SNAP_THREAD]
 	retry