ioat: fix size of 'completion' for Xen

Starting with v3.2 Jonathan reports that Xen crashes loading the ioatdma
driver.  A debug run shows:

  ioatdma 0000:00:16.4: desc[0]: (0x300cc7000->0x300cc7040) cookie: 0 flags: 0x2 ctl: 0x29 (op: 0 int_en: 1 compl: 1)
  ...
  ioatdma 0000:00:16.4: ioat_get_current_completion: phys_complete: 0xcc7000

...which shows that in this environment GFP_KERNEL memory may be backed
by a 64-bit dma address.  This breaks the driver's assumption that an
unsigned long should be able to contain the physical address for
descriptor memory.  Switch to dma_addr_t which beyond being the right
size, is the true type for the data i.e. an io-virtual address
inidicating the engine's last processed descriptor.

[stable: 3.2+]
Cc: <stable@vger.kernel.org>
Reported-by: Jonathan Nieder <jrnieder@gmail.com>
Reported-by: William Dauchy <wdauchy@gmail.com>
Tested-by: William Dauchy <wdauchy@gmail.com>
Tested-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index a4d6cb0..6595180 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -548,9 +548,9 @@
 			   PCI_DMA_TODEVICE, flags, 0);
 }
 
-unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
+dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan)
 {
-	unsigned long phys_complete;
+	dma_addr_t phys_complete;
 	u64 completion;
 
 	completion = *chan->completion;
@@ -571,7 +571,7 @@
 }
 
 bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
-			   unsigned long *phys_complete)
+			   dma_addr_t *phys_complete)
 {
 	*phys_complete = ioat_get_current_completion(chan);
 	if (*phys_complete == chan->last_completion)
@@ -582,14 +582,14 @@
 	return true;
 }
 
-static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
+static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete)
 {
 	struct ioat_chan_common *chan = &ioat->base;
 	struct list_head *_desc, *n;
 	struct dma_async_tx_descriptor *tx;
 
-	dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
-		 __func__, phys_complete);
+	dev_dbg(to_dev(chan), "%s: phys_complete: %llx\n",
+		 __func__, (unsigned long long) phys_complete);
 	list_for_each_safe(_desc, n, &ioat->used_desc) {
 		struct ioat_desc_sw *desc;
 
@@ -655,7 +655,7 @@
 static void ioat1_cleanup(struct ioat_dma_chan *ioat)
 {
 	struct ioat_chan_common *chan = &ioat->base;
-	unsigned long phys_complete;
+	dma_addr_t phys_complete;
 
 	prefetch(chan->completion);
 
@@ -701,7 +701,7 @@
 		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
 		spin_unlock_bh(&ioat->desc_lock);
 	} else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
-		unsigned long phys_complete;
+		dma_addr_t phys_complete;
 
 		spin_lock_bh(&ioat->desc_lock);
 		/* if we haven't made progress and we have already