drm/radeon/kms: Add support for multi-ring sync in CS ioctl (v2)

Use semaphores to sync buffers across rings in the CS
ioctl.  Add a reloc flag to allow userspace to skip
sync for buffers.

agd5f: port to latest CS ioctl changes.

v2: add ring lock/unlock to make sure changes hit the ring.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 4d59540..17af0e8 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -84,6 +84,13 @@
 			p->relocs[i].flags = r->flags;
 			radeon_bo_list_add_object(&p->relocs[i].lobj,
 						  &p->validated);
+
+			if (p->relocs[i].robj->tbo.sync_obj && !(r->flags & RADEON_RELOC_DONT_SYNC)) {
+				struct radeon_fence *fence = p->relocs[i].robj->tbo.sync_obj;
+				if (!radeon_fence_signaled(fence)) {
+					p->sync_to_ring[fence->ring] = true;
+				}
+			}
 		} else
 			p->relocs[i].handle = 0;
 	}
@@ -109,6 +116,36 @@
 	return 0;
 }
 
+static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
+{
+	int i, r;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		/* no need to sync to our own or unused rings */
+		if (i == p->ring || !p->sync_to_ring[i] || !p->rdev->ring[i].ready)
+			continue;
+
+		if (!p->ib->fence->semaphore) {
+			r = radeon_semaphore_create(p->rdev, &p->ib->fence->semaphore);
+			if (r)
+				return r;
+		}
+
+		r = radeon_ring_lock(p->rdev, &p->rdev->ring[i], 3);
+		if (r)
+			return r;
+		radeon_semaphore_emit_signal(p->rdev, i, p->ib->fence->semaphore);
+		radeon_ring_unlock_commit(p->rdev, &p->rdev->ring[i]);
+
+		r = radeon_ring_lock(p->rdev, &p->rdev->ring[p->ring], 3);
+		if (r)
+			return r;
+		radeon_semaphore_emit_wait(p->rdev, p->ring, p->ib->fence->semaphore);
+		radeon_ring_unlock_commit(p->rdev, &p->rdev->ring[p->ring]);
+	}
+	return 0;
+}
+
 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 {
 	struct drm_radeon_cs *cs = data;
@@ -314,6 +351,10 @@
 		DRM_ERROR("Invalid command stream !\n");
 		return r;
 	}
+	r = radeon_cs_sync_rings(parser);
+	if (r) {
+		DRM_ERROR("Failed to synchronize rings !\n");
+	}
 	parser->ib->vm_id = 0;
 	r = radeon_ib_schedule(rdev, parser->ib);
 	if (r) {
@@ -384,6 +425,10 @@
 	if (r) {
 		goto out;
 	}
+	r = radeon_cs_sync_rings(parser);
+	if (r) {
+		DRM_ERROR("Failed to synchronize rings !\n");
+	}
 	parser->ib->vm_id = vm->id;
 	/* ib pool is bind at 0 in virtual address space to gpu_addr is the
 	 * offset inside the pool bo