[SCSI] mpt2sas: Support for stopping driver when Firmware encounters
authorKashyap, Desai <kashyap.desai@lsi.com>
Wed, 23 Sep 2009 11:56:58 +0000 (17:26 +0530)
committerJames Bottomley <James.Bottomley@suse.de>
Thu, 29 Oct 2009 17:03:10 +0000 (13:03 -0400)
Added command line option and shost sysfs attribute called
mpt2sas_fwfault_debug. When enduser writes a "1" to this parameter, this
will enable support in the driver for debugging firmware timeout related
issues.  This handling was added in three areas (a) scsi error handling
callback called task_abort, (b) IOCTL interface, and (c) other timeouts that
result in diag resets, such as manufacturing config pages.  When this
support is enabled, the driver will provide dump_stack to console, halt
controller firmware, and panic driver. The end user probably would want to
setup serial console redirection so the dump stack can be seen.

Here are the three methods for enable this support:

(a) # insmod mpt2sas.ko mpt2sas_fwfault_debug=1
(b) # echo 1 > /sys/module/mpt2sas/parameters/mpt2sas_fwfault_debug
(c) # echo 1 > /sys/class/scsi_host/host#/fwfault_debug  (where # is
the host number)

Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com>
Signed-off-by: Eric Moore <Eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
drivers/scsi/mpt2sas/mpt2sas_base.c
drivers/scsi/mpt2sas/mpt2sas_base.h
drivers/scsi/mpt2sas/mpt2sas_ctl.c
drivers/scsi/mpt2sas/mpt2sas_scsih.c

index 670241efa4b555041e5b4fcb88cfe27929709d50..617664cbf3f714c11164b577b016897df349dcdd 100644 (file)
@@ -77,6 +77,32 @@ static int msix_disable = -1;
 module_param(msix_disable, int, 0);
 MODULE_PARM_DESC(msix_disable, " disable msix routed interrupts (default=0)");
 
+int mpt2sas_fwfault_debug;
+MODULE_PARM_DESC(mpt2sas_fwfault_debug, " enable detection of firmware fault "
+    "and halt firmware - (default=0)");
+
+/**
+ * _scsih_set_fwfault_debug - global setting of ioc->fwfault_debug.
+ *
+ */
+static int
+_scsih_set_fwfault_debug(const char *val, struct kernel_param *kp)
+{
+       int ret = param_set_int(val, kp);
+       struct MPT2SAS_ADAPTER *ioc;
+
+       if (ret)
+               return ret;
+
+       printk(KERN_INFO "setting logging_level(0x%08x)\n",
+                               mpt2sas_fwfault_debug);
+       list_for_each_entry(ioc, &mpt2sas_ioc_list, list)
+               ioc->fwfault_debug = mpt2sas_fwfault_debug;
+       return 0;
+}
+module_param_call(mpt2sas_fwfault_debug, _scsih_set_fwfault_debug,
+    param_get_int, &mpt2sas_fwfault_debug, 0644);
+
 /**
  * _base_fault_reset_work - workq handling ioc fault conditions
  * @work: input argument, used to derive ioc
@@ -177,6 +203,51 @@ mpt2sas_base_stop_watchdog(struct MPT2SAS_ADAPTER *ioc)
        }
 }
 
+/**
+ * mpt2sas_base_fault_info - verbose translation of firmware FAULT code
+ * @ioc: per adapter object
+ * @fault_code: fault code
+ *
+ * Return nothing.
+ */
+void
+mpt2sas_base_fault_info(struct MPT2SAS_ADAPTER *ioc , u16 fault_code)
+{
+       printk(MPT2SAS_ERR_FMT "fault_state(0x%04x)!\n",
+           ioc->name, fault_code);
+}
+
+/**
+ * mpt2sas_halt_firmware - halt's mpt controller firmware
+ * @ioc: per adapter object
+ *
+ * For debugging timeout related issues.  Writing 0xCOFFEE00
+ * to the doorbell register will halt controller firmware. With
+ * the purpose to stop both driver and firmware, the enduser can
+ * obtain a ring buffer from controller UART.
+ */
+void
+mpt2sas_halt_firmware(struct MPT2SAS_ADAPTER *ioc)
+{
+       u32 doorbell;
+
+       if (!ioc->fwfault_debug)
+               return;
+
+       dump_stack();
+
+       doorbell = readl(&ioc->chip->Doorbell);
+       if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT)
+               mpt2sas_base_fault_info(ioc , doorbell);
+       else {
+               writel(0xC0FFEE00, &ioc->chip->Doorbell);
+               printk(MPT2SAS_ERR_FMT "Firmware is halted due to command "
+                   "timeout\n", ioc->name);
+       }
+
+       panic("panic in %s\n", __func__);
+}
+
 #ifdef CONFIG_SCSI_MPT2SAS_LOGGING
 /**
  * _base_sas_ioc_info - verbose translation of the ioc status
@@ -525,20 +596,6 @@ _base_sas_log_info(struct MPT2SAS_ADAPTER *ioc , u32 log_info)
             sas_loginfo.dw.subcode);
 }
 
-/**
- * mpt2sas_base_fault_info - verbose translation of firmware FAULT code
- * @ioc: pointer to scsi command object
- * @fault_code: fault code
- *
- * Return nothing.
- */
-void
-mpt2sas_base_fault_info(struct MPT2SAS_ADAPTER *ioc , u16 fault_code)
-{
-       printk(MPT2SAS_ERR_FMT "fault_state(0x%04x)!\n",
-           ioc->name, fault_code);
-}
-
 /**
  * _base_display_reply_info -
  * @ioc: pointer to scsi command object
@@ -3684,6 +3741,9 @@ mpt2sas_base_hard_reset_handler(struct MPT2SAS_ADAPTER *ioc, int sleep_flag,
        dtmprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: enter\n", ioc->name,
            __func__));
 
+       if (mpt2sas_fwfault_debug)
+               mpt2sas_halt_firmware(ioc);
+
        spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
        if (ioc->shost_recovery) {
                spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
index fa99ff204e46b884886ee753f604a390f8a745a7..0c75c0e137f7aec790c8e0d38ceba09bed26652b 100644 (file)
@@ -466,6 +466,7 @@ typedef void (*MPT_ADD_SGE)(void *paddr, u32 flags_length, dma_addr_t dma_addr);
  * @chip_phys: physical addrss prior to mapping
  * @pio_chip: I/O mapped register space
  * @logging_level: see mpt2sas_debug.h
+ * @fwfault_debug: debuging FW timeouts
  * @ir_firmware: IR firmware present
  * @bars: bitmask of BAR's that must be configured
  * @mask_interrupts: ignore interrupt
@@ -587,6 +588,7 @@ struct MPT2SAS_ADAPTER {
        unsigned long   chip_phys;
        unsigned long   pio_chip;
        int             logging_level;
+       int             fwfault_debug;
        u8              ir_firmware;
        int             bars;
        u8              mask_interrupts;
@@ -803,6 +805,8 @@ int mpt2sas_base_scsi_enclosure_processor(struct MPT2SAS_ADAPTER *ioc,
     Mpi2SepReply_t *mpi_reply, Mpi2SepRequest_t *mpi_request);
 void mpt2sas_base_validate_event_type(struct MPT2SAS_ADAPTER *ioc, u32 *event_type);
 
+void mpt2sas_halt_firmware(struct MPT2SAS_ADAPTER *ioc);
+
 /* scsih shared API */
 u8 mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
     u32 reply);
index 57d7246339067992792a5b24b81d3cdb0a2e0692..6901a6706ede3d9f8fdb062939d7f2c2da75e33a 100644 (file)
@@ -896,6 +896,7 @@ _ctl_do_mpt_command(struct MPT2SAS_ADAPTER *ioc,
                        printk(MPT2SAS_INFO_FMT "issue target reset: handle "
                            "= (0x%04x)\n", ioc->name,
                            mpi_request->FunctionDependent1);
+                       mpt2sas_halt_firmware(ioc);
                        mutex_lock(&ioc->tm_cmds.mutex);
                        mpt2sas_scsih_issue_tm(ioc,
                            mpi_request->FunctionDependent1, 0,
@@ -2474,6 +2475,43 @@ _ctl_logging_level_store(struct device *cdev, struct device_attribute *attr,
 static DEVICE_ATTR(logging_level, S_IRUGO | S_IWUSR,
     _ctl_logging_level_show, _ctl_logging_level_store);
 
+/* device attributes */
+/*
+ * _ctl_fwfault_debug_show - show/store fwfault_debug
+ * @cdev - pointer to embedded class device
+ * @buf - the buffer returned
+ *
+ * mpt2sas_fwfault_debug is command line option
+ * A sysfs 'read/write' shost attribute.
+ */
+static ssize_t
+_ctl_fwfault_debug_show(struct device *cdev,
+    struct device_attribute *attr, char *buf)
+{
+       struct Scsi_Host *shost = class_to_shost(cdev);
+       struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", ioc->fwfault_debug);
+}
+static ssize_t
+_ctl_fwfault_debug_store(struct device *cdev,
+    struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct Scsi_Host *shost = class_to_shost(cdev);
+       struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
+       int val = 0;
+
+       if (sscanf(buf, "%d", &val) != 1)
+               return -EINVAL;
+
+       ioc->fwfault_debug = val;
+       printk(MPT2SAS_INFO_FMT "fwfault_debug=%d\n", ioc->name,
+           ioc->fwfault_debug);
+       return strlen(buf);
+}
+static DEVICE_ATTR(fwfault_debug, S_IRUGO | S_IWUSR,
+    _ctl_fwfault_debug_show, _ctl_fwfault_debug_store);
+
 struct device_attribute *mpt2sas_host_attrs[] = {
        &dev_attr_version_fw,
        &dev_attr_version_bios,
@@ -2487,13 +2525,12 @@ struct device_attribute *mpt2sas_host_attrs[] = {
        &dev_attr_io_delay,
        &dev_attr_device_delay,
        &dev_attr_logging_level,
+       &dev_attr_fwfault_debug,
        &dev_attr_fw_queue_depth,
        &dev_attr_host_sas_address,
        NULL,
 };
 
-/* device attributes */
-
 /**
  * _ctl_device_sas_address_show - sas address
  * @cdev - pointer to embedded class device
index 91d61154a46c002eb5246435522840e41e98a384..59ea821c2a3c7e7e303284af22add95085b6c287 100644 (file)
@@ -1929,6 +1929,8 @@ _scsih_abort(struct scsi_cmnd *scmd)
                goto out;
        }
 
+       mpt2sas_halt_firmware(ioc);
+
        mutex_lock(&ioc->tm_cmds.mutex);
        handle = sas_device_priv_data->sas_target->handle;
        mpt2sas_scsih_issue_tm(ioc, handle, sas_device_priv_data->lun,