unsigned long bar_mapped_size;
struct work_struct reset_work;
struct work_struct remove_work;
- struct timer_list watchdog_timer;
struct mutex shutdown_lock;
bool subsystem;
void __iomem *cmb;
blk_mq_free_request(req);
}
+static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+{
+
+ /* If true, indicates loss of adapter communication, possibly by a
+ * NVMe Subsystem reset.
+ */
+ bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+
+ /* If there is a reset ongoing, we shouldn't reset again. */
+ if (dev->ctrl.state == NVME_CTRL_RESETTING)
+ return false;
+
+ /* We shouldn't reset unless the controller is on fatal error state
+ * _or_ if we lost the communication with it.
+ */
+ if (!(csts & NVME_CSTS_CFS) && !nssro)
+ return false;
+
+ /* If PCI error recovery process is happening, we cannot reset or
+ * the recovery mechanism will surely fail.
+ */
+ if (pci_channel_offline(to_pci_dev(dev->dev)))
+ return false;
+
+ return true;
+}
+
+static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
+{
+ /* Read a config register to help see what died. */
+ u16 pci_status;
+ int result;
+
+ result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
+ &pci_status);
+ if (result == PCIBIOS_SUCCESSFUL)
+ dev_warn(dev->ctrl.device,
+ "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
+ csts, pci_status);
+ else
+ dev_warn(dev->ctrl.device,
+ "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
+ csts, result);
+}
+
static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_dev *dev = nvmeq->dev;
struct request *abort_req;
struct nvme_command cmd;
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ /*
+ * Reset immediately if the controller is failed
+ */
+ if (nvme_should_reset(dev, csts)) {
+ nvme_warn_reset(dev, csts);
+ nvme_dev_disable(dev, false);
+ nvme_reset(dev);
+ return BLK_EH_HANDLED;
+ }
/*
* Did we miss an interrupt?
return result;
}
-static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
-{
-
- /* If true, indicates loss of adapter communication, possibly by a
- * NVMe Subsystem reset.
- */
- bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
-
- /* If there is a reset ongoing, we shouldn't reset again. */
- if (dev->ctrl.state == NVME_CTRL_RESETTING)
- return false;
-
- /* We shouldn't reset unless the controller is on fatal error state
- * _or_ if we lost the communication with it.
- */
- if (!(csts & NVME_CSTS_CFS) && !nssro)
- return false;
-
- /* If PCI error recovery process is happening, we cannot reset or
- * the recovery mechanism will surely fail.
- */
- if (pci_channel_offline(to_pci_dev(dev->dev)))
- return false;
-
- return true;
-}
-
-static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
-{
- /* Read a config register to help see what died. */
- u16 pci_status;
- int result;
-
- result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
- &pci_status);
- if (result == PCIBIOS_SUCCESSFUL)
- dev_warn(dev->ctrl.device,
- "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
- csts, pci_status);
- else
- dev_warn(dev->ctrl.device,
- "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
- csts, result);
-}
-
-static void nvme_watchdog_timer(unsigned long data)
-{
- struct nvme_dev *dev = (struct nvme_dev *)data;
- u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
- /* Skip controllers under certain specific conditions. */
- if (nvme_should_reset(dev, csts)) {
- if (!nvme_reset(dev))
- nvme_warn_reset(dev, csts);
- return;
- }
-
- mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-}
-
static int nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i, max;
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
- del_timer_sync(&dev->watchdog_timer);
-
mutex_lock(&dev->shutdown_lock);
if (pci_is_enabled(pdev)) {
u32 csts = readl(dev->bar + NVME_REG_CSTS);
if (dev->online_queues > 1)
nvme_queue_async_events(&dev->ctrl);
- mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-
/*
* Keep the controller around but remove all namespaces if we don't have
* any working I/O queue.
INIT_WORK(&dev->reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
- setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
- (unsigned long)dev);
mutex_init(&dev->shutdown_lock);
init_completion(&dev->ioq_wait);