blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set()
authorMing Lei <ming.lei@redhat.com>
Mon, 25 Jun 2018 11:31:48 +0000 (19:31 +0800)
committerJens Axboe <axboe@kernel.dk>
Mon, 9 Jul 2018 15:07:52 +0000 (09:07 -0600)
We have to remove synchronize_rcu() from blk_queue_cleanup(),
otherwise long delay can be caused during lun probe. For removing
it, we have to avoid to iterate the set->tag_list in IO path, eg,
blk_mq_sched_restart().

This patch reverts 5b79413946d (Revert "blk-mq: don't handle
TAG_SHARED in restart"). Given we have fixed enough IO hang issue,
and there isn't any reason to restart all queues in one tags any more,
see the following reasons:

1) blk-mq core can deal with shared-tags case well via blk_mq_get_driver_tag(),
which can wake up queues waiting for driver tag.

2) SCSI is a bit special because it may return BLK_STS_RESOURCE if queue,
target or host is ready, but SCSI built-in restart can cover all these well,
see scsi_end_request(), queue will be rerun after any request initiated from
this host/target is completed.

In my test on scsi_debug(8 luns), this patch may improve IOPS by 20% ~ 30%
when running I/O on these 8 luns concurrently.

Fixes: 705cda97ee3a ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list")
Cc: Omar Sandoval <osandov@fb.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: linux-scsi@vger.kernel.org
Reported-by: Andrew Jones <drjones@redhat.com>
Tested-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-mq-sched.c
block/blk-mq.c
include/linux/blkdev.h

index 56c493c6cd903ed1b8eefc609861feb04ad2d1aa..4e027f6108aef9f009b376f6f54315a852103b21 100644 (file)
@@ -59,29 +59,16 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
        if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
                return;
 
-       if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
-               struct request_queue *q = hctx->queue;
-
-               if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-                       atomic_inc(&q->shared_hctx_restart);
-       } else
-               set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+       set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 }
 
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
 {
        if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-               return false;
-
-       if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
-               struct request_queue *q = hctx->queue;
-
-               if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-                       atomic_dec(&q->shared_hctx_restart);
-       } else
-               clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+               return;
+       clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 
-       return blk_mq_run_hw_queue(hctx, true);
+       blk_mq_run_hw_queue(hctx, true);
 }
 
 /*
@@ -380,68 +367,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
        return false;
 }
 
-/**
- * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
- * @pos:    loop cursor.
- * @skip:   the list element that will not be examined. Iteration starts at
- *          @skip->next.
- * @head:   head of the list to examine. This list must have at least one
- *          element, namely @skip.
- * @member: name of the list_head structure within typeof(*pos).
- */
-#define list_for_each_entry_rcu_rr(pos, skip, head, member)            \
-       for ((pos) = (skip);                                            \
-            (pos = (pos)->member.next != (head) ? list_entry_rcu(      \
-                       (pos)->member.next, typeof(*pos), member) :     \
-             list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
-            (pos) != (skip); )
-
-/*
- * Called after a driver tag has been freed to check whether a hctx needs to
- * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
- * queues in a round-robin fashion if the tag set of @hctx is shared with other
- * hardware queues.
- */
-void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
-{
-       struct blk_mq_tags *const tags = hctx->tags;
-       struct blk_mq_tag_set *const set = hctx->queue->tag_set;
-       struct request_queue *const queue = hctx->queue, *q;
-       struct blk_mq_hw_ctx *hctx2;
-       unsigned int i, j;
-
-       if (set->flags & BLK_MQ_F_TAG_SHARED) {
-               /*
-                * If this is 0, then we know that no hardware queues
-                * have RESTART marked. We're done.
-                */
-               if (!atomic_read(&queue->shared_hctx_restart))
-                       return;
-
-               rcu_read_lock();
-               list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
-                                          tag_set_list) {
-                       queue_for_each_hw_ctx(q, hctx2, i)
-                               if (hctx2->tags == tags &&
-                                   blk_mq_sched_restart_hctx(hctx2))
-                                       goto done;
-               }
-               j = hctx->queue_num + 1;
-               for (i = 0; i < queue->nr_hw_queues; i++, j++) {
-                       if (j == queue->nr_hw_queues)
-                               j = 0;
-                       hctx2 = queue->queue_hw_ctx[j];
-                       if (hctx2->tags == tags &&
-                           blk_mq_sched_restart_hctx(hctx2))
-                               break;
-               }
-done:
-               rcu_read_unlock();
-       } else {
-               blk_mq_sched_restart_hctx(hctx);
-       }
-}
-
 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
                                 bool run_queue, bool async)
 {
index df84281f6af613d9f11a19051ba1c3bf8109107b..7c6ff13171efcc883de2144aa574a3ead1b40274 100644 (file)
@@ -2335,15 +2335,10 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
        int i;
 
        queue_for_each_hw_ctx(q, hctx, i) {
-               if (shared) {
-                       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-                               atomic_inc(&q->shared_hctx_restart);
+               if (shared)
                        hctx->flags |= BLK_MQ_F_TAG_SHARED;
-               } else {
-                       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-                               atomic_dec(&q->shared_hctx_restart);
+               else
                        hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
-               }
        }
 }
 
@@ -2374,7 +2369,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
                blk_mq_update_tag_set_depth(set, false);
        }
        mutex_unlock(&set->tag_list_lock);
-       synchronize_rcu();
        INIT_LIST_HEAD(&q->tag_set_list);
 }
 
index ca5a8b046894abf3a0632c1ec97cc44bd8c743f0..9d05646d50596e622ee2e4a4b9691d7d532c856e 100644 (file)
@@ -442,8 +442,6 @@ struct request_queue {
        int                     nr_rqs[2];      /* # allocated [a]sync rqs */
        int                     nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
 
-       atomic_t                shared_hctx_restart;
-
        struct blk_queue_stats  *stats;
        struct rq_wb            *rq_wb;