nvme-rdma: Support ctrl_loss_tmo
authorSagi Grimberg <sagi@grimberg.me>
Sat, 18 Mar 2017 18:58:29 +0000 (20:58 +0200)
committerJens Axboe <axboe@fb.com>
Tue, 4 Apr 2017 15:48:23 +0000 (09:48 -0600)
Before scheduling a reconnect attempt, check
nr_reconnects against max_reconnects, if not
exhausted (or max_reconnects is not -1), schedule
a reconnect attempts, otherwise schedule ctrl
removal.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
drivers/nvme/host/rdma.c

index 8c873be308c6100e83a860380749216569ef8aea..37a4201d623deb6436ae61a3e32bab76b2c33e05 100644 (file)
@@ -706,6 +706,26 @@ free_ctrl:
        kfree(ctrl);
 }
 
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+{
+       /* If we are resetting/deleting then do nothing */
+       if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+               WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
+                       ctrl->ctrl.state == NVME_CTRL_LIVE);
+               return;
+       }
+
+       if (nvmf_should_reconnect(&ctrl->ctrl)) {
+               dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
+                       ctrl->ctrl.opts->reconnect_delay);
+               queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+                               ctrl->ctrl.opts->reconnect_delay * HZ);
+       } else {
+               dev_info(ctrl->ctrl.device, "Removing controller...\n");
+               queue_work(nvme_rdma_wq, &ctrl->delete_work);
+       }
+}
+
 static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 {
        struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
@@ -713,6 +733,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        bool changed;
        int ret;
 
+       ++ctrl->ctrl.opts->nr_reconnects;
+
        if (ctrl->queue_count > 1) {
                nvme_rdma_free_io_queues(ctrl);
 
@@ -757,6 +779,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
        WARN_ON_ONCE(!changed);
+       ctrl->ctrl.opts->nr_reconnects = 0;
 
        if (ctrl->queue_count > 1) {
                nvme_start_queues(&ctrl->ctrl);
@@ -771,13 +794,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 stop_admin_q:
        blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
 requeue:
-       /* Make sure we are not resetting/deleting */
-       if (ctrl->ctrl.state == NVME_CTRL_RECONNECTING) {
-               dev_info(ctrl->ctrl.device,
-                       "Failed reconnect attempt, requeueing...\n");
-               queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
-                               ctrl->ctrl.opts->reconnect_delay * HZ);
-       }
+       dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
+                       ctrl->ctrl.opts->nr_reconnects);
+       nvme_rdma_reconnect_or_remove(ctrl);
 }
 
 static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -804,11 +823,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
 
-       dev_info(ctrl->ctrl.device, "reconnecting in %d seconds\n",
-               ctrl->ctrl.opts->reconnect_delay);
-
-       queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
-                               ctrl->ctrl.opts->reconnect_delay * HZ);
+       nvme_rdma_reconnect_or_remove(ctrl);
 }
 
 static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -1986,7 +2001,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
        .name           = "rdma",
        .required_opts  = NVMF_OPT_TRADDR,
        .allowed_opts   = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
-                         NVMF_OPT_HOST_TRADDR,
+                         NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO,
        .create_ctrl    = nvme_rdma_create_ctrl,
 };