mmc: dw_mmc endian fix

Need to use endian neutral functions to read/write h/w registers. I.e
__raw_readl replaced with readl_relaxed and __raw_writel replaced with
writel_relaxed. The relaxed version of function will read/write LE h/w
register and byteswap it if host operates in BE mode.

However in case of this file __raw_read(wlq) and __raw_write(wlq) are also
used to transfer data from uchar buffer into h/w mmc host register. And in
this case byteswap is not need - bytes of data buffer should go into h/w
register in the same order as they are in memory. So we need to split control
mci_readl/mci_writel macros from one that operates on data mci_readw_data,
mci_readl_data, mci_readq_data, mci_writew, mci_writel_data, mci_writeq_data.
The latter one do not do byte swaps.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 55cd110..6c64dac 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1448,7 +1448,7 @@
 		buf += len;
 		cnt -= len;
 		if (host->part_buf_count == 2) {
-			mci_writew(host, DATA(host->data_offset),
+			mci_writew_data(host, DATA(host->data_offset),
 					host->part_buf16);
 			host->part_buf_count = 0;
 		}
@@ -1466,7 +1466,7 @@
 			cnt -= len;
 			/* push data from aligned buffer into fifo */
 			for (i = 0; i < items; ++i)
-				mci_writew(host, DATA(host->data_offset),
+				mci_writew_data(host, DATA(host->data_offset),
 						aligned_buf[i]);
 		}
 	} else
@@ -1474,7 +1474,7 @@
 	{
 		u16 *pdata = buf;
 		for (; cnt >= 2; cnt -= 2)
-			mci_writew(host, DATA(host->data_offset), *pdata++);
+			mci_writew_data(host, DATA(host->data_offset), *pdata++);
 		buf = pdata;
 	}
 	/* put anything remaining in the part_buf */
@@ -1483,7 +1483,7 @@
 		 /* Push data if we have reached the expected data length */
 		if ((data->bytes_xfered + init_cnt) ==
 		    (data->blksz * data->blocks))
-			mci_writew(host, DATA(host->data_offset),
+			mci_writew_data(host, DATA(host->data_offset),
 				   host->part_buf16);
 	}
 }
@@ -1499,7 +1499,7 @@
 			int items = len >> 1;
 			int i;
 			for (i = 0; i < items; ++i)
-				aligned_buf[i] = mci_readw(host,
+				aligned_buf[i] = mci_readw_data(host,
 						DATA(host->data_offset));
 			/* memcpy from aligned buffer into output buffer */
 			memcpy(buf, aligned_buf, len);
@@ -1511,11 +1511,11 @@
 	{
 		u16 *pdata = buf;
 		for (; cnt >= 2; cnt -= 2)
-			*pdata++ = mci_readw(host, DATA(host->data_offset));
+			*pdata++ = mci_readw_data(host, DATA(host->data_offset));
 		buf = pdata;
 	}
 	if (cnt) {
-		host->part_buf16 = mci_readw(host, DATA(host->data_offset));
+		host->part_buf16 = mci_readw_data(host, DATA(host->data_offset));
 		dw_mci_pull_final_bytes(host, buf, cnt);
 	}
 }
@@ -1531,7 +1531,7 @@
 		buf += len;
 		cnt -= len;
 		if (host->part_buf_count == 4) {
-			mci_writel(host, DATA(host->data_offset),
+			mci_writel_data(host, DATA(host->data_offset),
 					host->part_buf32);
 			host->part_buf_count = 0;
 		}
@@ -1549,7 +1549,7 @@
 			cnt -= len;
 			/* push data from aligned buffer into fifo */
 			for (i = 0; i < items; ++i)
-				mci_writel(host, DATA(host->data_offset),
+				mci_writel_data(host, DATA(host->data_offset),
 						aligned_buf[i]);
 		}
 	} else
@@ -1557,7 +1557,7 @@
 	{
 		u32 *pdata = buf;
 		for (; cnt >= 4; cnt -= 4)
-			mci_writel(host, DATA(host->data_offset), *pdata++);
+			mci_writel_data(host, DATA(host->data_offset), *pdata++);
 		buf = pdata;
 	}
 	/* put anything remaining in the part_buf */
@@ -1566,7 +1566,7 @@
 		 /* Push data if we have reached the expected data length */
 		if ((data->bytes_xfered + init_cnt) ==
 		    (data->blksz * data->blocks))
-			mci_writel(host, DATA(host->data_offset),
+			mci_writel_data(host, DATA(host->data_offset),
 				   host->part_buf32);
 	}
 }
@@ -1582,7 +1582,7 @@
 			int items = len >> 2;
 			int i;
 			for (i = 0; i < items; ++i)
-				aligned_buf[i] = mci_readl(host,
+				aligned_buf[i] = mci_readl_data(host,
 						DATA(host->data_offset));
 			/* memcpy from aligned buffer into output buffer */
 			memcpy(buf, aligned_buf, len);
@@ -1594,11 +1594,11 @@
 	{
 		u32 *pdata = buf;
 		for (; cnt >= 4; cnt -= 4)
-			*pdata++ = mci_readl(host, DATA(host->data_offset));
+			*pdata++ = mci_readl_data(host, DATA(host->data_offset));
 		buf = pdata;
 	}
 	if (cnt) {
-		host->part_buf32 = mci_readl(host, DATA(host->data_offset));
+		host->part_buf32 = mci_readl_data(host, DATA(host->data_offset));
 		dw_mci_pull_final_bytes(host, buf, cnt);
 	}
 }
@@ -1615,7 +1615,7 @@
 		cnt -= len;
 
 		if (host->part_buf_count == 8) {
-			mci_writeq(host, DATA(host->data_offset),
+			mci_writeq_data(host, DATA(host->data_offset),
 					host->part_buf);
 			host->part_buf_count = 0;
 		}
@@ -1633,7 +1633,7 @@
 			cnt -= len;
 			/* push data from aligned buffer into fifo */
 			for (i = 0; i < items; ++i)
-				mci_writeq(host, DATA(host->data_offset),
+				mci_writeq_data(host, DATA(host->data_offset),
 						aligned_buf[i]);
 		}
 	} else
@@ -1641,7 +1641,7 @@
 	{
 		u64 *pdata = buf;
 		for (; cnt >= 8; cnt -= 8)
-			mci_writeq(host, DATA(host->data_offset), *pdata++);
+			mci_writeq_data(host, DATA(host->data_offset), *pdata++);
 		buf = pdata;
 	}
 	/* put anything remaining in the part_buf */
@@ -1650,7 +1650,7 @@
 		/* Push data if we have reached the expected data length */
 		if ((data->bytes_xfered + init_cnt) ==
 		    (data->blksz * data->blocks))
-			mci_writeq(host, DATA(host->data_offset),
+			mci_writeq_data(host, DATA(host->data_offset),
 				   host->part_buf);
 	}
 }
@@ -1666,7 +1666,7 @@
 			int items = len >> 3;
 			int i;
 			for (i = 0; i < items; ++i)
-				aligned_buf[i] = mci_readq(host,
+				aligned_buf[i] = mci_readq_data(host,
 						DATA(host->data_offset));
 			/* memcpy from aligned buffer into output buffer */
 			memcpy(buf, aligned_buf, len);
@@ -1678,11 +1678,11 @@
 	{
 		u64 *pdata = buf;
 		for (; cnt >= 8; cnt -= 8)
-			*pdata++ = mci_readq(host, DATA(host->data_offset));
+			*pdata++ = mci_readq_data(host, DATA(host->data_offset));
 		buf = pdata;
 	}
 	if (cnt) {
-		host->part_buf = mci_readq(host, DATA(host->data_offset));
+		host->part_buf = mci_readq_data(host, DATA(host->data_offset));
 		dw_mci_pull_final_bytes(host, buf, cnt);
 	}
 }
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 6bf24ab..6e3ed89 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -152,21 +152,27 @@
 
 /* Register access macros */
 #define mci_readl(dev, reg)			\
-	__raw_readl((dev)->regs + SDMMC_##reg)
+	readl_relaxed((dev)->regs + SDMMC_##reg)
 #define mci_writel(dev, reg, value)			\
-	__raw_writel((value), (dev)->regs + SDMMC_##reg)
+	writel_relaxed((value), (dev)->regs + SDMMC_##reg)
 
 /* 16-bit FIFO access macros */
-#define mci_readw(dev, reg)			\
+#define mci_readw_data(dev, reg)			\
 	__raw_readw((dev)->regs + SDMMC_##reg)
-#define mci_writew(dev, reg, value)			\
+#define mci_writew_data(dev, reg, value)			\
 	__raw_writew((value), (dev)->regs + SDMMC_##reg)
 
+/* 32-bit FIFO access macros */
+#define mci_readl_data(dev, reg)                             \
+	__raw_readl((dev)->regs + SDMMC_##reg)
+#define mci_writel_data(dev, reg, value)			\
+	__raw_writel((value), (dev)->regs + SDMMC_##reg)
+
 /* 64-bit FIFO access macros */
 #ifdef readq
-#define mci_readq(dev, reg)			\
+#define mci_readq_data(dev, reg)			\
 	__raw_readq((dev)->regs + SDMMC_##reg)
-#define mci_writeq(dev, reg, value)			\
+#define mci_writeq_data(dev, reg, value)			\
 	__raw_writeq((value), (dev)->regs + SDMMC_##reg)
 #else
 /*
@@ -177,10 +183,10 @@
  * executed on those machines. Defining these macros here keeps the
  * rest of the code free from ifdefs.
  */
-#define mci_readq(dev, reg)			\
-	(*(volatile u64 __force *)((dev)->regs + SDMMC_##reg))
-#define mci_writeq(dev, reg, value)			\
-	(*(volatile u64 __force *)((dev)->regs + SDMMC_##reg) = (value))
+#define mci_readq_data(dev, reg)			\
+	 ({ u64 __r = le64_to_cpu(*(volatile u64 __force *)((dev)->regs + SDMMC_##reg)); __r; })
+#define mci_writeq_data(dev, reg, value)			\
+	 (*(volatile u64 __force *)((dev)->regs + SDMMC_##reg) = (cpu_to_le64(value)))
 #endif
 
 extern int dw_mci_probe(struct dw_mci *host);