blk-mq: fix dispatch from sw queue
authorMing Lei <ming.lei@redhat.com>
Mon, 17 Dec 2018 15:44:05 +0000 (08:44 -0700)
committerJens Axboe <axboe@kernel.dk>
Mon, 17 Dec 2018 18:19:54 +0000 (11:19 -0700)
When a request is added to rq list of sw queue(ctx), the rq may be from
a different type of hctx, especially after multi queue mapping is
introduced.

So when dispach request from sw queue via blk_mq_flush_busy_ctxs() or
blk_mq_dequeue_from_ctx(), one request belonging to other queue type of
hctx can be dispatched to current hctx in case that read queue or poll
queue is enabled.

This patch fixes this issue by introducing per-queue-type list.

Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Changed by me to not use separately cacheline aligned lists, just
place them all in the same cacheline where we had just the one list
and lock before.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-mq-debugfs.c
block/blk-mq-sched.c
block/blk-mq.c
block/blk-mq.h

index 1e12033be9ea7e0f9d3c56cb3b344a87c3a868a3..90d68760af086bd6de78c71dc4a1f6954ceb7a5b 100644 (file)
@@ -652,36 +652,43 @@ static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
        return 0;
 }
 
-static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
-       __acquires(&ctx->lock)
-{
-       struct blk_mq_ctx *ctx = m->private;
-
-       spin_lock(&ctx->lock);
-       return seq_list_start(&ctx->rq_list, *pos);
-}
-
-static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos)
-{
-       struct blk_mq_ctx *ctx = m->private;
-
-       return seq_list_next(v, &ctx->rq_list, pos);
-}
+#define CTX_RQ_SEQ_OPS(name, type)                                     \
+static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \
+       __acquires(&ctx->lock)                                          \
+{                                                                      \
+       struct blk_mq_ctx *ctx = m->private;                            \
+                                                                       \
+       spin_lock(&ctx->lock);                                          \
+       return seq_list_start(&ctx->rq_lists[type], *pos);              \
+}                                                                      \
+                                                                       \
+static void *ctx_##name##_rq_list_next(struct seq_file *m, void *v,    \
+                                    loff_t *pos)                       \
+{                                                                      \
+       struct blk_mq_ctx *ctx = m->private;                            \
+                                                                       \
+       return seq_list_next(v, &ctx->rq_lists[type], pos);             \
+}                                                                      \
+                                                                       \
+static void ctx_##name##_rq_list_stop(struct seq_file *m, void *v)     \
+       __releases(&ctx->lock)                                          \
+{                                                                      \
+       struct blk_mq_ctx *ctx = m->private;                            \
+                                                                       \
+       spin_unlock(&ctx->lock);                                        \
+}                                                                      \
+                                                                       \
+static const struct seq_operations ctx_##name##_rq_list_seq_ops = {    \
+       .start  = ctx_##name##_rq_list_start,                           \
+       .next   = ctx_##name##_rq_list_next,                            \
+       .stop   = ctx_##name##_rq_list_stop,                            \
+       .show   = blk_mq_debugfs_rq_show,                               \
+}
+
+CTX_RQ_SEQ_OPS(default, HCTX_TYPE_DEFAULT);
+CTX_RQ_SEQ_OPS(read, HCTX_TYPE_READ);
+CTX_RQ_SEQ_OPS(poll, HCTX_TYPE_POLL);
 
-static void ctx_rq_list_stop(struct seq_file *m, void *v)
-       __releases(&ctx->lock)
-{
-       struct blk_mq_ctx *ctx = m->private;
-
-       spin_unlock(&ctx->lock);
-}
-
-static const struct seq_operations ctx_rq_list_seq_ops = {
-       .start  = ctx_rq_list_start,
-       .next   = ctx_rq_list_next,
-       .stop   = ctx_rq_list_stop,
-       .show   = blk_mq_debugfs_rq_show,
-};
 static int ctx_dispatched_show(void *data, struct seq_file *m)
 {
        struct blk_mq_ctx *ctx = data;
@@ -819,7 +826,9 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 };
 
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
-       {"rq_list", 0400, .seq_ops = &ctx_rq_list_seq_ops},
+       {"default_rq_list", 0400, .seq_ops = &ctx_default_rq_list_seq_ops},
+       {"read_rq_list", 0400, .seq_ops = &ctx_read_rq_list_seq_ops},
+       {"poll_rq_list", 0400, .seq_ops = &ctx_poll_rq_list_seq_ops},
        {"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write},
        {"merged", 0600, ctx_merged_show, ctx_merged_write},
        {"completed", 0600, ctx_completed_show, ctx_completed_write},
index 056fa9baf44e69798108d0eafb52e783d566b5e2..140933e4a7d12efd628695b10acf71ba63d5c01a 100644 (file)
@@ -302,11 +302,14 @@ EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
  * too much time checking for merges.
  */
 static bool blk_mq_attempt_merge(struct request_queue *q,
+                                struct blk_mq_hw_ctx *hctx,
                                 struct blk_mq_ctx *ctx, struct bio *bio)
 {
+       enum hctx_type type = hctx->type;
+
        lockdep_assert_held(&ctx->lock);
 
-       if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
+       if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio)) {
                ctx->rq_merged++;
                return true;
        }
@@ -320,17 +323,19 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
        struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
        struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx->cpu);
        bool ret = false;
+       enum hctx_type type;
 
        if (e && e->type->ops.bio_merge) {
                blk_mq_put_ctx(ctx);
                return e->type->ops.bio_merge(hctx, bio);
        }
 
+       type = hctx->type;
        if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
-                       !list_empty_careful(&ctx->rq_list)) {
+                       !list_empty_careful(&ctx->rq_lists[type])) {
                /* default per sw-queue merge */
                spin_lock(&ctx->lock);
-               ret = blk_mq_attempt_merge(q, ctx, bio);
+               ret = blk_mq_attempt_merge(q, hctx, ctx, bio);
                spin_unlock(&ctx->lock);
        }
 
index 313f28b2d079060591ad59d98108bef7ccc8bf21..9c1c1544bac31623665e0055d0e3db21ed987a44 100644 (file)
@@ -958,9 +958,10 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
        struct flush_busy_ctx_data *flush_data = data;
        struct blk_mq_hw_ctx *hctx = flush_data->hctx;
        struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+       enum hctx_type type = hctx->type;
 
        spin_lock(&ctx->lock);
-       list_splice_tail_init(&ctx->rq_list, flush_data->list);
+       list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
        sbitmap_clear_bit(sb, bitnr);
        spin_unlock(&ctx->lock);
        return true;
@@ -992,12 +993,13 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
        struct dispatch_rq_data *dispatch_data = data;
        struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
        struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+       enum hctx_type type = hctx->type;
 
        spin_lock(&ctx->lock);
-       if (!list_empty(&ctx->rq_list)) {
-               dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
+       if (!list_empty(&ctx->rq_lists[type])) {
+               dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
                list_del_init(&dispatch_data->rq->queuelist);
-               if (list_empty(&ctx->rq_list))
+               if (list_empty(&ctx->rq_lists[type]))
                        sbitmap_clear_bit(sb, bitnr);
        }
        spin_unlock(&ctx->lock);
@@ -1608,15 +1610,16 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
                                            bool at_head)
 {
        struct blk_mq_ctx *ctx = rq->mq_ctx;
+       enum hctx_type type = hctx->type;
 
        lockdep_assert_held(&ctx->lock);
 
        trace_block_rq_insert(hctx->queue, rq);
 
        if (at_head)
-               list_add(&rq->queuelist, &ctx->rq_list);
+               list_add(&rq->queuelist, &ctx->rq_lists[type]);
        else
-               list_add_tail(&rq->queuelist, &ctx->rq_list);
+               list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
 }
 
 void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
@@ -1651,6 +1654,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 
 {
        struct request *rq;
+       enum hctx_type type = hctx->type;
 
        /*
         * preemption doesn't flush plug list, so it's possible ctx->cpu is
@@ -1662,7 +1666,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
        }
 
        spin_lock(&ctx->lock);
-       list_splice_tail_init(list, &ctx->rq_list);
+       list_splice_tail_init(list, &ctx->rq_lists[type]);
        blk_mq_hctx_mark_pending(hctx, ctx);
        spin_unlock(&ctx->lock);
 }
@@ -2200,13 +2204,15 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
        struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
        LIST_HEAD(tmp);
+       enum hctx_type type;
 
        hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
        ctx = __blk_mq_get_ctx(hctx->queue, cpu);
+       type = hctx->type;
 
        spin_lock(&ctx->lock);
-       if (!list_empty(&ctx->rq_list)) {
-               list_splice_init(&ctx->rq_list, &tmp);
+       if (!list_empty(&ctx->rq_lists[type])) {
+               list_splice_init(&ctx->rq_lists[type], &tmp);
                blk_mq_hctx_clear_pending(hctx, ctx);
        }
        spin_unlock(&ctx->lock);
@@ -2343,10 +2349,13 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
        for_each_possible_cpu(i) {
                struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
                struct blk_mq_hw_ctx *hctx;
+               int k;
 
                __ctx->cpu = i;
                spin_lock_init(&__ctx->lock);
-               INIT_LIST_HEAD(&__ctx->rq_list);
+               for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++)
+                       INIT_LIST_HEAD(&__ctx->rq_lists[k]);
+
                __ctx->queue = q;
 
                /*
index d1ed096723fb057a6223abd922c32bfba6a5362f..d943d46b078547e5f48d488575be5178fb0195e5 100644 (file)
@@ -18,8 +18,8 @@ struct blk_mq_ctxs {
 struct blk_mq_ctx {
        struct {
                spinlock_t              lock;
-               struct list_head        rq_list;
-       }  ____cacheline_aligned_in_smp;
+               struct list_head        rq_lists[HCTX_MAX_TYPES];
+       } ____cacheline_aligned_in_smp;
 
        unsigned int            cpu;
        unsigned short          index_hw[HCTX_MAX_TYPES];