Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"A smaller collection of fixes for the block core that would be nice to
have in -rc2. This pull request contains:
- Fixes for races in the wait/wakeup logic used in blk-mq from
Alexander. No issues have been observed, but it is definitely a
bit flakey currently. Alternatively, we may drop the cyclic
wakeups going forward, but that needs more testing.
- Some cleanups from Christoph.
- Fix for an oops in null_blk if queue_mode=1 and softirq completions
are used. From me.
- A fix for a regression caused by the chunk size setting. It
inadvertently used max_hw_sectors instead of max_sectors, which is
incorrect, and causes hangs on btrfs multi-disk setups (where hw
sectors apparently isn't set). From me.
- Removal of WQ_POWER_EFFICIENT in the kblockd creation. This was a
recent addition as well, but it actually breaks blk-mq which relies
on strict scheduling. If the workqueue power_efficient mode is
turned on, this breaks blk-mq. From Matias.
- null_blk module parameter description fix from Mike"
* 'for-linus' of git://git.kernel.dk/linux-block:
blk-mq: bitmap tag: fix races in bt_get() function
blk-mq: bitmap tag: fix race on blk_mq_bitmap_tags::wake_cnt
blk-mq: bitmap tag: fix races on shared ::wake_index fields
block: blk_max_size_offset() should check ->max_sectors
null_blk: fix softirq completions for queue_mode == 1
blk-mq: merge blk_mq_drain_queue and __blk_mq_drain_queue
blk-mq: properly drain stopped queues
block: remove WQ_POWER_EFFICIENT from kblockd
null_blk: fix name and description of 'queue_mode' module parameter
block: remove elv_abort_queue and blk_abort_flushes
diff --git a/block/blk-core.c b/block/blk-core.c
index f6f6b9a..6f8dba1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3312,8 +3312,7 @@
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd",
- WQ_MEM_RECLAIM | WQ_HIGHPRI |
- WQ_POWER_EFFICIENT, 0);
+ WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 8ffee4b..3cb5e9e 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -422,44 +422,6 @@
}
/**
- * blk_abort_flushes - @q is being aborted, abort flush requests
- * @q: request_queue being aborted
- *
- * To be called from elv_abort_queue(). @q is being aborted. Prepare all
- * FLUSH/FUA requests for abortion.
- *
- * CONTEXT:
- * spin_lock_irq(q->queue_lock)
- */
-void blk_abort_flushes(struct request_queue *q)
-{
- struct request *rq, *n;
- int i;
-
- /*
- * Requests in flight for data are already owned by the dispatch
- * queue or the device driver. Just restore for normal completion.
- */
- list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
- list_del_init(&rq->flush.list);
- blk_flush_restore_request(rq);
- }
-
- /*
- * We need to give away requests on flush queues. Restore for
- * normal completion and put them on the dispatch queue.
- */
- for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) {
- list_for_each_entry_safe(rq, n, &q->flush_queue[i],
- flush.list) {
- list_del_init(&rq->flush.list);
- blk_flush_restore_request(rq);
- list_add_tail(&rq->queuelist, &q->queue_head);
- }
- }
-}
-
-/**
* blkdev_issue_flush - queue a flush
* @bdev: blockdev to issue flush for
* @gfp_mask: memory allocation flags (for bio_alloc)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1aab39f..c1b9242 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -43,9 +43,16 @@
return bt_has_free_tags(&tags->bitmap_tags);
}
-static inline void bt_index_inc(unsigned int *index)
+static inline int bt_index_inc(int index)
{
- *index = (*index + 1) & (BT_WAIT_QUEUES - 1);
+ return (index + 1) & (BT_WAIT_QUEUES - 1);
+}
+
+static inline void bt_index_atomic_inc(atomic_t *index)
+{
+ int old = atomic_read(index);
+ int new = bt_index_inc(old);
+ atomic_cmpxchg(index, old, new);
}
/*
@@ -69,14 +76,14 @@
int i, wake_index;
bt = &tags->bitmap_tags;
- wake_index = bt->wake_index;
+ wake_index = atomic_read(&bt->wake_index);
for (i = 0; i < BT_WAIT_QUEUES; i++) {
struct bt_wait_state *bs = &bt->bs[wake_index];
if (waitqueue_active(&bs->wait))
wake_up(&bs->wait);
- bt_index_inc(&wake_index);
+ wake_index = bt_index_inc(wake_index);
}
}
@@ -212,12 +219,14 @@
struct blk_mq_hw_ctx *hctx)
{
struct bt_wait_state *bs;
+ int wait_index;
if (!hctx)
return &bt->bs[0];
- bs = &bt->bs[hctx->wait_index];
- bt_index_inc(&hctx->wait_index);
+ wait_index = atomic_read(&hctx->wait_index);
+ bs = &bt->bs[wait_index];
+ bt_index_atomic_inc(&hctx->wait_index);
return bs;
}
@@ -239,18 +248,12 @@
bs = bt_wait_ptr(bt, hctx);
do {
- bool was_empty;
-
- was_empty = list_empty(&wait.task_list);
prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
tag = __bt_get(hctx, bt, last_tag);
if (tag != -1)
break;
- if (was_empty)
- atomic_set(&bs->wait_cnt, bt->wake_cnt);
-
blk_mq_put_ctx(data->ctx);
io_schedule();
@@ -313,18 +316,19 @@
{
int i, wake_index;
- wake_index = bt->wake_index;
+ wake_index = atomic_read(&bt->wake_index);
for (i = 0; i < BT_WAIT_QUEUES; i++) {
struct bt_wait_state *bs = &bt->bs[wake_index];
if (waitqueue_active(&bs->wait)) {
- if (wake_index != bt->wake_index)
- bt->wake_index = wake_index;
+ int o = atomic_read(&bt->wake_index);
+ if (wake_index != o)
+ atomic_cmpxchg(&bt->wake_index, o, wake_index);
return bs;
}
- bt_index_inc(&wake_index);
+ wake_index = bt_index_inc(wake_index);
}
return NULL;
@@ -334,6 +338,7 @@
{
const int index = TAG_TO_INDEX(bt, tag);
struct bt_wait_state *bs;
+ int wait_cnt;
/*
* The unlock memory barrier need to order access to req in free
@@ -342,10 +347,19 @@
clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word);
bs = bt_wake_ptr(bt);
- if (bs && atomic_dec_and_test(&bs->wait_cnt)) {
- atomic_set(&bs->wait_cnt, bt->wake_cnt);
- bt_index_inc(&bt->wake_index);
+ if (!bs)
+ return;
+
+ wait_cnt = atomic_dec_return(&bs->wait_cnt);
+ if (wait_cnt == 0) {
+wake:
+ atomic_add(bt->wake_cnt, &bs->wait_cnt);
+ bt_index_atomic_inc(&bt->wake_index);
wake_up(&bs->wait);
+ } else if (wait_cnt < 0) {
+ wait_cnt = atomic_inc_return(&bs->wait_cnt);
+ if (!wait_cnt)
+ goto wake;
}
}
@@ -499,10 +513,13 @@
return -ENOMEM;
}
- for (i = 0; i < BT_WAIT_QUEUES; i++)
- init_waitqueue_head(&bt->bs[i].wait);
-
bt_update_count(bt, depth);
+
+ for (i = 0; i < BT_WAIT_QUEUES; i++) {
+ init_waitqueue_head(&bt->bs[i].wait);
+ atomic_set(&bt->bs[i].wait_cnt, bt->wake_cnt);
+ }
+
return 0;
}
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 98696a6..6206ed1 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -24,7 +24,7 @@
unsigned int map_nr;
struct blk_align_bitmap *map;
- unsigned int wake_index;
+ atomic_t wake_index;
struct bt_wait_state *bs;
};
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e11f5f8..0ef2dc7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -109,7 +109,7 @@
__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
}
-static void __blk_mq_drain_queue(struct request_queue *q)
+void blk_mq_drain_queue(struct request_queue *q)
{
while (true) {
s64 count;
@@ -120,7 +120,7 @@
if (count == 0)
break;
- blk_mq_run_queues(q, false);
+ blk_mq_start_hw_queues(q);
msleep(10);
}
}
@@ -139,12 +139,7 @@
spin_unlock_irq(q->queue_lock);
if (drain)
- __blk_mq_drain_queue(q);
-}
-
-void blk_mq_drain_queue(struct request_queue *q)
-{
- __blk_mq_drain_queue(q);
+ blk_mq_drain_queue(q);
}
static void blk_mq_unfreeze_queue(struct request_queue *q)
diff --git a/block/blk.h b/block/blk.h
index 45385e9..6748c4f 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -84,7 +84,6 @@
#define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED)
void blk_insert_flush(struct request *rq);
-void blk_abort_flushes(struct request_queue *q);
static inline struct request *__elv_next_request(struct request_queue *q)
{
diff --git a/block/elevator.c b/block/elevator.c
index f35eddd..34bded1 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -729,26 +729,6 @@
return ELV_MQUEUE_MAY;
}
-void elv_abort_queue(struct request_queue *q)
-{
- struct request *rq;
-
- blk_abort_flushes(q);
-
- while (!list_empty(&q->queue_head)) {
- rq = list_entry_rq(q->queue_head.next);
- rq->cmd_flags |= REQ_QUIET;
- trace_block_rq_abort(q, rq);
- /*
- * Mark this request as started so we don't trigger
- * any debug logic in the end I/O path.
- */
- blk_start_request(rq);
- __blk_end_request_all(rq, -EIO);
- }
-}
-EXPORT_SYMBOL(elv_abort_queue);
-
void elv_completed_request(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 77087a2..a3b042c 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -79,7 +79,7 @@
static int queue_mode = NULL_Q_MQ;
module_param(queue_mode, int, S_IRUGO);
-MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)");
+MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
static int gb = 250;
module_param(gb, int, S_IRUGO);
@@ -227,7 +227,10 @@
static void null_softirq_done_fn(struct request *rq)
{
- end_cmd(blk_mq_rq_to_pdu(rq));
+ if (queue_mode == NULL_Q_MQ)
+ end_cmd(blk_mq_rq_to_pdu(rq));
+ else
+ end_cmd(rq->special);
}
static inline void null_handle_cmd(struct nullb_cmd *cmd)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a002cf1..eb726b9 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -42,7 +42,7 @@
unsigned int nr_ctx;
struct blk_mq_ctx **ctxs;
- unsigned int wait_index;
+ atomic_t wait_index;
struct blk_mq_tags *tags;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 31e1105..713f8b6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -920,7 +920,7 @@
sector_t offset)
{
if (!q->limits.chunk_sectors)
- return q->limits.max_hw_sectors;
+ return q->limits.max_sectors;
return q->limits.chunk_sectors -
(offset & (q->limits.chunk_sectors - 1));
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 4ff262e..e2a6bd7 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -133,7 +133,6 @@
extern int elv_register_queue(struct request_queue *q);
extern void elv_unregister_queue(struct request_queue *q);
extern int elv_may_queue(struct request_queue *, int);
-extern void elv_abort_queue(struct request_queue *);
extern void elv_completed_request(struct request_queue *, struct request *);
extern int elv_set_request(struct request_queue *q, struct request *rq,
struct bio *bio, gfp_t gfp_mask);