dm raid1: fix deadlock when suspending failed device
To prevent deadlock, bios in the hold list should be flushed before
dm_rh_stop_recovery() is called in mirror_suspend().
The recovery can't start because there are pending bios and therefore
dm_rh_stop_recovery deadlocks.
When there are pending bios in the hold list, the recovery waits for
the completion of the bios after recovery_count is acquired.
The recovery_count is released when the recovery finished, however,
the bios in the hold list are processed after dm_rh_stop_recovery() in
mirror_presuspend(). dm_rh_stop_recovery() also acquires recovery_count,
then deadlock occurs.
Signed-off-by: Takahiro Yasui <tyasui@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Reviewed-by: Mikulas Patocka <mpatocka@redhat.com>
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 6d66ddf..ddda531 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -465,9 +465,17 @@
static void hold_bio(struct mirror_set *ms, struct bio *bio)
{
/*
- * If device is suspended, complete the bio.
+ * Lock is required to avoid race condition during suspend
+ * process.
*/
+ spin_lock_irq(&ms->lock);
+
if (atomic_read(&ms->suspend)) {
+ spin_unlock_irq(&ms->lock);
+
+ /*
+ * If device is suspended, complete the bio.
+ */
if (dm_noflush_suspending(ms->ti))
bio_endio(bio, DM_ENDIO_REQUEUE);
else
@@ -478,7 +486,6 @@
/*
* Hold bio until the suspend is complete.
*/
- spin_lock_irq(&ms->lock);
bio_list_add(&ms->holds, bio);
spin_unlock_irq(&ms->lock);
}
@@ -1261,6 +1268,20 @@
atomic_set(&ms->suspend, 1);
/*
+ * Process bios in the hold list to start recovery waiting
+ * for bios in the hold list. After the process, no bio has
+ * a chance to be added in the hold list because ms->suspend
+ * is set.
+ */
+ spin_lock_irq(&ms->lock);
+ holds = ms->holds;
+ bio_list_init(&ms->holds);
+ spin_unlock_irq(&ms->lock);
+
+ while ((bio = bio_list_pop(&holds)))
+ hold_bio(ms, bio);
+
+ /*
* We must finish up all the work that we've
* generated (i.e. recovery work).
*/
@@ -1280,22 +1301,6 @@
* we know that all of our I/O has been pushed.
*/
flush_workqueue(ms->kmirrord_wq);
-
- /*
- * Now set ms->suspend is set and the workqueue flushed, no more
- * entries can be added to ms->hold list, so process it.
- *
- * Bios can still arrive concurrently with or after this
- * presuspend function, but they cannot join the hold list
- * because ms->suspend is set.
- */
- spin_lock_irq(&ms->lock);
- holds = ms->holds;
- bio_list_init(&ms->holds);
- spin_unlock_irq(&ms->lock);
-
- while ((bio = bio_list_pop(&holds)))
- hold_bio(ms, bio);
}
static void mirror_postsuspend(struct dm_target *ti)