nvme-pci: Remove watchdog timer

author Keith Busch <keith.busch@intel.com>

Wed, 7 Jun 2017 18:32:50 +0000 (20:32 +0200)

committer Christoph Hellwig <hch@lst.de>

Thu, 15 Jun 2017 12:30:08 +0000 (14:30 +0200)
author Keith Busch <keith.busch@intel.com>
Wed, 7 Jun 2017 18:32:50 +0000 (20:32 +0200)
committer Christoph Hellwig <hch@lst.de>
Thu, 15 Jun 2017 12:30:08 +0000 (14:30 +0200)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index 5278ed9811a61229dc7b3444a5b363827dac300f..ef2b1537afe2ccf780e7961ce3e861fe0dea08e9 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -98,7 +98,6 @@ struct nvme_dev {
         unsigned long bar_mapped_size;
         struct work_struct reset_work;
         struct work_struct remove_work;
-       struct timer_list watchdog_timer;
         struct mutex shutdown_lock;
         bool subsystem;
         void __iomem *cmb;
@@ -960,6 +959,51 @@ static void abort_endio(struct request *req, blk_status_t error)
         blk_mq_free_request(req);
  }
  
+static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+{
+
+       /* If true, indicates loss of adapter communication, possibly by a
+        * NVMe Subsystem reset.
+        */
+       bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+
+       /* If there is a reset ongoing, we shouldn't reset again. */
+       if (dev->ctrl.state == NVME_CTRL_RESETTING)
+               return false;
+
+       /* We shouldn't reset unless the controller is on fatal error state
+        * _or_ if we lost the communication with it.
+        */
+       if (!(csts & NVME_CSTS_CFS) && !nssro)
+               return false;
+
+       /* If PCI error recovery process is happening, we cannot reset or
+        * the recovery mechanism will surely fail.
+        */
+       if (pci_channel_offline(to_pci_dev(dev->dev)))
+               return false;
+
+       return true;
+}
+
+static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
+{
+       /* Read a config register to help see what died. */
+       u16 pci_status;
+       int result;
+
+       result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
+                                     &pci_status);
+       if (result == PCIBIOS_SUCCESSFUL)
+               dev_warn(dev->ctrl.device,
+                        "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
+                        csts, pci_status);
+       else
+               dev_warn(dev->ctrl.device,
+                        "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
+                        csts, result);
+}
+
  static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
  {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -967,6 +1011,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
         struct nvme_dev *dev = nvmeq->dev;
         struct request *abort_req;
         struct nvme_command cmd;
+       u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+       /*
+        * Reset immediately if the controller is failed
+        */
+       if (nvme_should_reset(dev, csts)) {
+               nvme_warn_reset(dev, csts);
+               nvme_dev_disable(dev, false);
+               nvme_reset(dev);
+               return BLK_EH_HANDLED;
+       }
  
         /*
          * Did we miss an interrupt?
@@ -1398,66 +1453,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
         return result;
  }
  
-static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
-{
-
-       /* If true, indicates loss of adapter communication, possibly by a
-        * NVMe Subsystem reset.
-        */
-       bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
-
-       /* If there is a reset ongoing, we shouldn't reset again. */
-       if (dev->ctrl.state == NVME_CTRL_RESETTING)
-               return false;
-
-       /* We shouldn't reset unless the controller is on fatal error state
-        * _or_ if we lost the communication with it.
-        */
-       if (!(csts & NVME_CSTS_CFS) && !nssro)
-               return false;
-
-       /* If PCI error recovery process is happening, we cannot reset or
-        * the recovery mechanism will surely fail.
-        */
-       if (pci_channel_offline(to_pci_dev(dev->dev)))
-               return false;
-
-       return true;
-}
-
-static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
-{
-       /* Read a config register to help see what died. */
-       u16 pci_status;
-       int result;
-
-       result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
-                                     &pci_status);
-       if (result == PCIBIOS_SUCCESSFUL)
-               dev_warn(dev->ctrl.device,
-                        "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
-                        csts, pci_status);
-       else
-               dev_warn(dev->ctrl.device,
-                        "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
-                        csts, result);
-}
-
-static void nvme_watchdog_timer(unsigned long data)
-{
-       struct nvme_dev *dev = (struct nvme_dev *)data;
-       u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
-       /* Skip controllers under certain specific conditions. */
-       if (nvme_should_reset(dev, csts)) {
-               if (!nvme_reset(dev))
-                       nvme_warn_reset(dev, csts);
-               return;
-       }
-
-       mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-}
-
  static int nvme_create_io_queues(struct nvme_dev *dev)
  {
         unsigned i, max;
@@ -1986,8 +1981,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
         bool dead = true;
         struct pci_dev *pdev = to_pci_dev(dev->dev);
  
-       del_timer_sync(&dev->watchdog_timer);
-
         mutex_lock(&dev->shutdown_lock);
         if (pci_is_enabled(pdev)) {
                 u32 csts = readl(dev->bar + NVME_REG_CSTS);
@@ -2163,8 +2156,6 @@ static void nvme_reset_work(struct work_struct *work)
         if (dev->online_queues > 1)
                 nvme_queue_async_events(&dev->ctrl);
  
-       mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-
         /*
          * Keep the controller around but remove all namespaces if we don't have
          * any working I/O queue.
@@ -2318,8 +2309,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  
         INIT_WORK(&dev->reset_work, nvme_reset_work);
         INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
-       setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
-               (unsigned long)dev);
         mutex_init(&dev->shutdown_lock);
         init_completion(&dev->ioq_wait);
author	Keith Busch <keith.busch@intel.com>
	Wed, 7 Jun 2017 18:32:50 +0000 (20:32 +0200)
committer	Christoph Hellwig <hch@lst.de>
	Thu, 15 Jun 2017 12:30:08 +0000 (14:30 +0200)