/* check for vector0 reset event sources */
if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & rst_src_reg) {
+ dev_info(&hdev->pdev->dev, "global reset interrupt\n");
set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending);
*clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B);
}
if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_src_reg) {
+ dev_info(&hdev->pdev->dev, "core reset interrupt\n");
set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
set_bit(HNAE3_CORE_RESET, &hdev->reset_pending);
*clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
}
if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & rst_src_reg) {
+ dev_info(&hdev->pdev->dev, "IMP reset interrupt\n");
set_bit(HNAE3_IMP_RESET, &hdev->reset_pending);
*clearval = BIT(HCLGE_VECTOR0_IMPRESET_INT_B);
return HCLGE_VECTOR0_EVENT_RST;
int ret;
ret = client->ops->reset_notify(handle, type);
- if (ret)
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "notify nic client failed %d(%d)\n", type, ret);
return ret;
+ }
}
return 0;
return ret;
}
+static bool hclge_reset_err_handle(struct hclge_dev *hdev, bool is_timeout)
+{
+#define MAX_RESET_FAIL_CNT 5
+#define RESET_UPGRADE_DELAY_SEC 10
+
+ if (hdev->reset_pending) {
+ dev_info(&hdev->pdev->dev, "Reset pending %lu\n",
+ hdev->reset_pending);
+ return true;
+ } else if ((hdev->reset_type != HNAE3_IMP_RESET) &&
+ (hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG) &
+ BIT(HCLGE_IMP_RESET_BIT))) {
+ dev_info(&hdev->pdev->dev,
+ "reset failed because IMP Reset is pending\n");
+ hclge_clear_reset_cause(hdev);
+ return false;
+ } else if (hdev->reset_fail_cnt < MAX_RESET_FAIL_CNT) {
+ hdev->reset_fail_cnt++;
+ if (is_timeout) {
+ set_bit(hdev->reset_type, &hdev->reset_pending);
+ dev_info(&hdev->pdev->dev,
+ "re-schedule to wait for hw reset done\n");
+ return true;
+ }
+
+ dev_info(&hdev->pdev->dev, "Upgrade reset level\n");
+ hclge_clear_reset_cause(hdev);
+ mod_timer(&hdev->reset_timer,
+ jiffies + RESET_UPGRADE_DELAY_SEC * HZ);
+
+ return false;
+ }
+
+ hclge_clear_reset_cause(hdev);
+ dev_err(&hdev->pdev->dev, "Reset fail!\n");
+ return false;
+}
+
static void hclge_reset(struct hclge_dev *hdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+ bool is_timeout = false;
+ int ret;
/* Initialize ae_dev reset status as well, in case enet layer wants to
* know if device is undergoing reset
hdev->reset_count++;
hdev->last_reset_time = jiffies;
/* perform reset of the stack & ae device for a client */
- hclge_notify_roce_client(hdev, HNAE3_DOWN_CLIENT);
+ ret = hclge_notify_roce_client(hdev, HNAE3_DOWN_CLIENT);
+ if (ret)
+ goto err_reset;
+
rtnl_lock();
- hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
- rtnl_unlock();
+ ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+ if (ret)
+ goto err_reset_lock;
- hclge_reset_prepare_wait(hdev);
+ rtnl_unlock();
- if (!hclge_reset_wait(hdev)) {
- hclge_notify_roce_client(hdev, HNAE3_UNINIT_CLIENT);
- rtnl_lock();
- hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
- hclge_reset_ae_dev(hdev->ae_dev);
- hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
+ ret = hclge_reset_prepare_wait(hdev);
+ if (ret)
+ goto err_reset;
- hclge_clear_reset_cause(hdev);
- } else {
- rtnl_lock();
- /* schedule again to check pending resets later */
- set_bit(hdev->reset_type, &hdev->reset_pending);
- hclge_reset_task_schedule(hdev);
+ if (hclge_reset_wait(hdev)) {
+ is_timeout = true;
+ goto err_reset;
}
- hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+ ret = hclge_notify_roce_client(hdev, HNAE3_UNINIT_CLIENT);
+ if (ret)
+ goto err_reset;
+
+ rtnl_lock();
+ ret = hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+ if (ret)
+ goto err_reset_lock;
+
+ ret = hclge_reset_ae_dev(hdev->ae_dev);
+ if (ret)
+ goto err_reset_lock;
+
+ ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
+ if (ret)
+ goto err_reset_lock;
+
+ hclge_clear_reset_cause(hdev);
+
+ ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+ if (ret)
+ goto err_reset_lock;
+
rtnl_unlock();
- ae_dev->reset_type = HNAE3_NONE_RESET;
- hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT);
- hclge_notify_roce_client(hdev, HNAE3_UP_CLIENT);
+ ret = hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT);
+ if (ret)
+ goto err_reset;
+
+ ret = hclge_notify_roce_client(hdev, HNAE3_UP_CLIENT);
+ if (ret)
+ goto err_reset;
+
+ return;
+
+err_reset_lock:
+ rtnl_unlock();
+err_reset:
+ if (hclge_reset_err_handle(hdev, is_timeout))
+ hclge_reset_task_schedule(hdev);
}
static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle)
set_bit(rst_type, &hdev->default_reset_request);
}
+static void hclge_reset_timer(struct timer_list *t)
+{
+ struct hclge_dev *hdev = from_timer(hdev, t, reset_timer);
+
+ dev_info(&hdev->pdev->dev,
+ "triggering global reset in reset timer\n");
+ set_bit(HNAE3_GLOBAL_RESET, &hdev->default_reset_request);
+ hclge_reset_event(hdev->pdev, NULL);
+}
+
static void hclge_reset_subtask(struct hclge_dev *hdev)
{
/* check if there is any ongoing reset in the hardware. This status can
struct hlist_node *node;
int ret;
+ /* Return ok here, because reset error handling will check this
+ * return value. If error is returned here, the reset process will
+ * fail.
+ */
if (!hnae3_dev_fd_supported(hdev))
- return -EOPNOTSUPP;
+ return 0;
hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
if (hdev->service_timer.function)
del_timer_sync(&hdev->service_timer);
+ if (hdev->reset_timer.function)
+ del_timer_sync(&hdev->reset_timer);
if (hdev->service_task.func)
cancel_work_sync(&hdev->service_task);
if (hdev->rst_service_task.func)
hclge_dcb_ops_set(hdev);
timer_setup(&hdev->service_timer, hclge_service_timer, 0);
+ timer_setup(&hdev->reset_timer, hclge_reset_timer, 0);
INIT_WORK(&hdev->service_task, hclge_service_task);
INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task);
INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task);