net: hns3: add handling of hw ras errors using new set of commands
authorShiju Jose <shiju.jose@huawei.com>
Fri, 7 Dec 2018 21:08:04 +0000 (21:08 +0000)
committerDavid S. Miller <davem@davemloft.net>
Fri, 7 Dec 2018 23:57:01 +0000 (15:57 -0800)
1. This patch adds handling of hw ras errors using new set of
   common commands.
2. Updated the error message tables to match the register's name and
   error status returned by the commands.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h

index d2fb2103c709fede91735ed2774bb7b2b96a35f0..0a0eb6c154c586c0ec1b4db159e1517c1298ece1 100644 (file)
@@ -217,6 +217,9 @@ enum hclge_opcode_type {
        /* Error INT commands */
        HCLGE_TM_SCH_ECC_INT_EN         = 0x0829,
        HCLGE_COMMON_ECC_INT_CFG        = 0x1505,
+       HCLGE_QUERY_RAS_INT_STS_BD_NUM  = 0x1510,
+       HCLGE_QUERY_CLEAR_MPF_RAS_INT   = 0x1511,
+       HCLGE_QUERY_CLEAR_PF_RAS_INT    = 0x1512,
        HCLGE_IGU_EGU_TNL_INT_EN        = 0x1803,
        HCLGE_IGU_COMMON_INT_EN         = 0x1806,
        HCLGE_TM_QCN_MEM_INT_CFG        = 0x1A14,
index d1c9f7ac90215d6cd6a8a93b073e5bb9449c3d13..22e7c5bb2f7dbe17103f645490633435538369a2 100644 (file)
@@ -20,12 +20,7 @@ static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
        { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" },
        { .int_msk = BIT(14), .msg = "imp_dtcm1_mem1_ecc_1bit_err" },
        { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" },
-       { /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_imp_itcm4_ecc_int[] = {
-       { .int_msk = BIT(0), .msg = "imp_itcm4_ecc_1bit_err" },
-       { .int_msk = BIT(1), .msg = "imp_itcm4_ecc_mbit_err" },
+       { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err" },
        { /* sentinel */ }
 };
 
@@ -46,26 +41,14 @@ static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
        { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" },
        { .int_msk = BIT(14), .msg = "cmdq_nic_tx_addr_ecc_1bit_err" },
        { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" },
-       { /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int[] = {
-       { .int_msk = BIT(0), .msg = "cmdq_rocee_rx_depth_ecc_1bit_err" },
-       { .int_msk = BIT(1), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
-       { .int_msk = BIT(2), .msg = "cmdq_rocee_tx_depth_ecc_1bit_err" },
-       { .int_msk = BIT(3), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
-       { .int_msk = BIT(4), .msg = "cmdq_rocee_rx_tail_ecc_1bit_err" },
-       { .int_msk = BIT(5), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
-       { .int_msk = BIT(6), .msg = "cmdq_rocee_tx_tail_ecc_1bit_err" },
-       { .int_msk = BIT(7), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
-       { .int_msk = BIT(8), .msg = "cmdq_rocee_rx_head_ecc_1bit_err" },
-       { .int_msk = BIT(9), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
-       { .int_msk = BIT(10), .msg = "cmdq_rocee_tx_head_ecc_1bit_err" },
-       { .int_msk = BIT(11), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
-       { .int_msk = BIT(12), .msg = "cmdq_rocee_rx_addr_ecc_1bit_err" },
-       { .int_msk = BIT(13), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
-       { .int_msk = BIT(14), .msg = "cmdq_rocee_tx_addr_ecc_1bit_err" },
-       { .int_msk = BIT(15), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
+       { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
+       { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
+       { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
+       { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
+       { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
+       { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
+       { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
+       { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
        { /* sentinel */ }
 };
 
@@ -85,7 +68,13 @@ static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
        { /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_igu_com_err_int[] = {
+static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
+       { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err" },
+       { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err" },
+       { /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_igu_int[] = {
        { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err" },
        { .int_msk = BIT(1), .msg = "igu_rx_buf0_ecc_1bit_err" },
        { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err" },
@@ -93,7 +82,7 @@ static const struct hclge_hw_error hclge_igu_com_err_int[] = {
        { /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_igu_egu_tnl_err_int[] = {
+static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
        { .int_msk = BIT(0), .msg = "rx_buf_overflow" },
        { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow" },
        { .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow" },
@@ -109,46 +98,7 @@ static const struct hclge_hw_error hclge_ncsi_err_int[] = {
        { /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_ppp_mpf_int0[] = {
-       { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_1bit_err" },
-       { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_1bit_err" },
-       { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_1bit_err" },
-       { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_1bit_err" },
-       { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_1bit_err" },
-       { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_1bit_err" },
-       { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_1bit_err" },
-       { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_1bit_err" },
-       { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_1bit_err" },
-       { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_1bit_err" },
-       { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_1bit_err" },
-       { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_1bit_err" },
-       { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_1bit_err" },
-       { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_1bit_err" },
-       { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_1bit_err" },
-       { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_1bit_err" },
-       { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_1bit_err" },
-       { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_1bit_err" },
-       { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_1bit_err" },
-       { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_1bit_err" },
-       { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_1bit_err" },
-       { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_1bit_err" },
-       { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_1bit_err" },
-       { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_1bit_err" },
-       { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_1bit_err" },
-       { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_1bit_err" },
-       { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_1bit_err" },
-       { .int_msk = BIT(27),
-               .msg = "flow_director_ad_mem0_ecc_1bit_err" },
-       { .int_msk = BIT(28),
-               .msg = "flow_director_ad_mem1_ecc_1bit_err" },
-       { .int_msk = BIT(29),
-               .msg = "rx_vlan_tag_memory_ecc_1bit_err" },
-       { .int_msk = BIT(30),
-               .msg = "Tx_UP_mapping_config_mem_ecc_1bit_err" },
-       { /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_ppp_mpf_int1[] = {
+static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
        { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err" },
        { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err" },
        { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err" },
@@ -193,17 +143,7 @@ static const struct hclge_hw_error hclge_ppp_pf_int[] = {
        { /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_ppp_mpf_int2[] = {
-       { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_1bit_err" },
-       { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_1bit_err" },
-       { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_1bit_err" },
-       { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_1bit_err" },
-       { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_1bit_err" },
-       { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_1bit_err" },
-       { /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_ppp_mpf_int3[] = {
+static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
        { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err" },
        { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err" },
        { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err" },
@@ -213,104 +153,75 @@ static const struct hclge_hw_error hclge_ppp_mpf_int3[] = {
        { /* sentinel */ }
 };
 
-struct hclge_tm_sch_ecc_info {
-       const char *name;
-};
-
-static const struct hclge_tm_sch_ecc_info hclge_tm_sch_ecc_err[7][15] = {
-       {
-               { .name = "QSET_QUEUE_CTRL:PRI_LEN TAB" },
-               { .name = "QSET_QUEUE_CTRL:SPA_LEN TAB" },
-               { .name = "QSET_QUEUE_CTRL:SPB_LEN TAB" },
-               { .name = "QSET_QUEUE_CTRL:WRRA_LEN TAB" },
-               { .name = "QSET_QUEUE_CTRL:WRRB_LEN TAB" },
-               { .name = "QSET_QUEUE_CTRL:SPA_HPTR TAB" },
-               { .name = "QSET_QUEUE_CTRL:SPB_HPTR TAB" },
-               { .name = "QSET_QUEUE_CTRL:WRRA_HPTR TAB" },
-               { .name = "QSET_QUEUE_CTRL:WRRB_HPTR TAB" },
-               { .name = "QSET_QUEUE_CTRL:QS_LINKLIST TAB" },
-               { .name = "QSET_QUEUE_CTRL:SPA_TPTR TAB" },
-               { .name = "QSET_QUEUE_CTRL:SPB_TPTR TAB" },
-               { .name = "QSET_QUEUE_CTRL:WRRA_TPTR TAB" },
-               { .name = "QSET_QUEUE_CTRL:WRRB_TPTR TAB" },
-               { .name = "QSET_QUEUE_CTRL:QS_DEFICITCNT TAB" },
-       },
-       {
-               { .name = "ROCE_QUEUE_CTRL:QS_LEN TAB" },
-               { .name = "ROCE_QUEUE_CTRL:QS_TPTR TAB" },
-               { .name = "ROCE_QUEUE_CTRL:QS_HPTR TAB" },
-               { .name = "ROCE_QUEUE_CTRL:QLINKLIST TAB" },
-               { .name = "ROCE_QUEUE_CTRL:QCLEN TAB" },
-       },
-       {
-               { .name = "NIC_QUEUE_CTRL:QS_LEN TAB" },
-               { .name = "NIC_QUEUE_CTRL:QS_TPTR TAB" },
-               { .name = "NIC_QUEUE_CTRL:QS_HPTR TAB" },
-               { .name = "NIC_QUEUE_CTRL:QLINKLIST TAB" },
-               { .name = "NIC_QUEUE_CTRL:QCLEN TAB" },
-       },
-       {
-               { .name = "RAM_CFG_CTRL:CSHAP TAB" },
-               { .name = "RAM_CFG_CTRL:PSHAP TAB" },
-       },
-       {
-               { .name = "SHAPER_CTRL:PSHAP TAB" },
-       },
-       {
-               { .name = "MSCH_CTRL" },
-       },
-       {
-               { .name = "TOP_CTRL" },
-       },
-};
-
-static const struct hclge_hw_error hclge_tm_sch_err_int[] = {
+static const struct hclge_hw_error hclge_tm_sch_rint[] = {
        { .int_msk = BIT(0), .msg = "tm_sch_ecc_1bit_err" },
        { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err" },
-       { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_full_err" },
-       { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_empty_err" },
-       { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_full_err" },
-       { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_empty_err" },
-       { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_full_err" },
-       { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_empty_err" },
-       { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_full_err" },
-       { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_empty_err" },
-       { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_full_err" },
-       { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_empty_err" },
+       { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err" },
+       { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err" },
+       { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err" },
+       { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err" },
+       { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err" },
+       { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err" },
+       { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err" },
+       { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err" },
+       { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err" },
+       { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err" },
        { .int_msk = BIT(12),
-         .msg = "tm_sch_port_shap_offset_fifo_wr_full_err" },
+         .msg = "tm_sch_port_shap_offset_fifo_wr_err" },
        { .int_msk = BIT(13),
-         .msg = "tm_sch_port_shap_offset_fifo_rd_empty_err" },
+         .msg = "tm_sch_port_shap_offset_fifo_rd_err" },
        { .int_msk = BIT(14),
-         .msg = "tm_sch_pg_pshap_offset_fifo_wr_full_err" },
+         .msg = "tm_sch_pg_pshap_offset_fifo_wr_err" },
        { .int_msk = BIT(15),
-         .msg = "tm_sch_pg_pshap_offset_fifo_rd_empty_err" },
+         .msg = "tm_sch_pg_pshap_offset_fifo_rd_err" },
        { .int_msk = BIT(16),
-         .msg = "tm_sch_pg_cshap_offset_fifo_wr_full_err" },
+         .msg = "tm_sch_pg_cshap_offset_fifo_wr_err" },
        { .int_msk = BIT(17),
-         .msg = "tm_sch_pg_cshap_offset_fifo_rd_empty_err" },
+         .msg = "tm_sch_pg_cshap_offset_fifo_rd_err" },
        { .int_msk = BIT(18),
-         .msg = "tm_sch_pri_pshap_offset_fifo_wr_full_err" },
+         .msg = "tm_sch_pri_pshap_offset_fifo_wr_err" },
        { .int_msk = BIT(19),
-         .msg = "tm_sch_pri_pshap_offset_fifo_rd_empty_err" },
+         .msg = "tm_sch_pri_pshap_offset_fifo_rd_err" },
        { .int_msk = BIT(20),
-         .msg = "tm_sch_pri_cshap_offset_fifo_wr_full_err" },
+         .msg = "tm_sch_pri_cshap_offset_fifo_wr_err" },
        { .int_msk = BIT(21),
-         .msg = "tm_sch_pri_cshap_offset_fifo_rd_empty_err" },
-       { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_full_err" },
-       { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_empty_err" },
-       { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_full_err" },
-       { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_empty_err" },
-       { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_full_err" },
-       { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_empty_err" },
-       { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_full_err" },
-       { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_empty_err" },
-       { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_full_err" },
-       { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_empty_err" },
+         .msg = "tm_sch_pri_cshap_offset_fifo_rd_err" },
+       { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err" },
+       { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err" },
+       { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err" },
+       { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err" },
+       { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err" },
+       { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err" },
+       { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err" },
+       { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err" },
+       { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err" },
+       { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err" },
+       { /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
+       { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err" },
+       { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err" },
+       { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err" },
+       { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err" },
+       { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err" },
+       { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err" },
+       { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err" },
+       { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err" },
+       { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err" },
+       { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offser_fifo_wr_err" },
+       { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err" },
+       { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err" },
+       { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err" },
+       { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err" },
+       { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err" },
+       { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err" },
+       { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err" },
+       { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err" },
        { /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_qcn_ecc_err_int[] = {
+static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
        { .int_msk = BIT(0), .msg = "qcn_byte_mem_ecc_1bit_err" },
        { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err" },
        { .int_msk = BIT(2), .msg = "qcn_time_mem_ecc_1bit_err" },
@@ -336,6 +247,18 @@ static const struct hclge_hw_error hclge_qcn_ecc_err_int[] = {
        { /* sentinel */ }
 };
 
+static void hclge_log_error(struct device *dev, char *reg,
+                           const struct hclge_hw_error *err,
+                           u32 err_sts)
+{
+       while (err->msg) {
+               if (err->int_msk & err_sts)
+                       dev_warn(dev, "%s %s found [error status=0x%x]\n",
+                                reg, err->msg, err_sts);
+               err++;
+       }
+}
+
 /* hclge_cmd_query_error: read the error information
  * @hdev: pointer to struct hclge_dev
  * @desc: descriptor for describing the command
@@ -388,7 +311,8 @@ static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en)
                desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
                                        HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
                desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
-               desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN);
+               desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN |
+                                             HCLGE_MSIX_SRAM_ECC_ERR_INT_EN);
                desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
        }
 
@@ -396,7 +320,8 @@ static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en)
        desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
                                HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
        desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
-       desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK);
+       desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK |
+                                     HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK);
        desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);
 
        ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
@@ -564,6 +489,225 @@ static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en)
        return ret;
 }
 
+#define HCLGE_SET_DEFAULT_RESET_REQUEST(reset_type) \
+       do { \
+               if (ae_dev->ops->set_default_reset_request) \
+                       ae_dev->ops->set_default_reset_request(ae_dev, \
+                                                              reset_type); \
+       } while (0)
+
+/* hclge_handle_mpf_ras_error: handle all main PF RAS errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @num:  number of extended command structures
+ *
+ * This function handles all the main PF RAS errors in the
+ * hw register/s using command.
+ */
+static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
+                                     struct hclge_desc *desc,
+                                     int num)
+{
+       struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+       struct device *dev = &hdev->pdev->dev;
+       __le32 *desc_data;
+       u32 status;
+       int ret;
+
+       /* query all main PF RAS errors */
+       hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT,
+                                  true);
+       desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+       ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+       if (ret) {
+               dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret);
+               return ret;
+       }
+
+       /* log HNS common errors */
+       status = le32_to_cpu(desc[0].data[0]);
+       if (status) {
+               hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
+                               &hclge_imp_tcm_ecc_int[0], status);
+               HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+       }
+
+       status = le32_to_cpu(desc[0].data[1]);
+       if (status) {
+               hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
+                               &hclge_cmdq_nic_mem_ecc_int[0], status);
+               HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+       }
+
+       if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) {
+               dev_warn(dev, "imp_rd_data_poison_err found\n");
+               HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+       }
+
+       status = le32_to_cpu(desc[0].data[3]);
+       if (status) {
+               hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
+                               &hclge_tqp_int_ecc_int[0], status);
+               HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+       }
+
+       status = le32_to_cpu(desc[0].data[4]);
+       if (status) {
+               hclge_log_error(dev, "MSIX_ECC_INT_STS",
+                               &hclge_msix_sram_ecc_int[0], status);
+               HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+       }
+
+       /* log IGU(Ingress Unit) errors */
+       desc_data = (__le32 *)&desc[3];
+       status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
+       if (status)
+               hclge_log_error(dev, "IGU_INT_STS",
+                               &hclge_igu_int[0], status);
+
+       /* log PPP(Programmable Packet Process) errors */
+       desc_data = (__le32 *)&desc[4];
+       status = le32_to_cpu(*(desc_data + 1));
+       if (status)
+               hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
+                               &hclge_ppp_mpf_abnormal_int_st1[0], status);
+
+       status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
+       if (status)
+               hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
+                               &hclge_ppp_mpf_abnormal_int_st3[0], status);
+
+       /* log TM(Traffic Manager) errors */
+       desc_data = (__le32 *)&desc[6];
+       status = le32_to_cpu(*desc_data);
+       if (status) {
+               hclge_log_error(dev, "TM_SCH_RINT",
+                               &hclge_tm_sch_rint[0], status);
+               HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+       }
+
+       /* log QCN(Quantized Congestion Control) errors */
+       desc_data = (__le32 *)&desc[7];
+       status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
+       if (status) {
+               hclge_log_error(dev, "QCN_FIFO_RINT",
+                               &hclge_qcn_fifo_rint[0], status);
+               HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+       }
+
+       status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
+       if (status) {
+               hclge_log_error(dev, "QCN_ECC_RINT",
+                               &hclge_qcn_ecc_rint[0], status);
+               HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+       }
+
+       /* log NCSI errors */
+       desc_data = (__le32 *)&desc[9];
+       status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
+       if (status) {
+               hclge_log_error(dev, "NCSI_ECC_INT_RPT",
+                               &hclge_ncsi_err_int[0], status);
+               HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+       }
+
+       /* clear all main PF RAS errors */
+       hclge_cmd_reuse_desc(&desc[0], false);
+       desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+       ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+       if (ret)
+               dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret);
+
+       return ret;
+}
+
+/* hclge_handle_pf_ras_error: handle all PF RAS errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @num:  number of extended command structures
+ *
+ * This function handles all the PF RAS errors in the
+ * hw register/s using command.
+ */
+static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
+                                    struct hclge_desc *desc,
+                                    int num)
+{
+       struct device *dev = &hdev->pdev->dev;
+       __le32 *desc_data;
+       u32 status;
+       int ret;
+
+       /* query all PF RAS errors */
+       hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT,
+                                  true);
+       desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+       ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+       if (ret) {
+               dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret);
+               return ret;
+       }
+
+       /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
+       desc_data = (__le32 *)&desc[1];
+       status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
+       if (status)
+               hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
+                               &hclge_igu_egu_tnl_int[0], status);
+
+       /* clear all PF RAS errors */
+       hclge_cmd_reuse_desc(&desc[0], false);
+       desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+       ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+       if (ret)
+               dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret);
+
+       return ret;
+}
+
+static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
+{
+       struct device *dev = &hdev->pdev->dev;
+       u32 mpf_bd_num, pf_bd_num, bd_num;
+       struct hclge_desc desc_bd;
+       struct hclge_desc *desc;
+       int ret;
+
+       /* query the number of registers in the RAS int status */
+       hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_RAS_INT_STS_BD_NUM,
+                                  true);
+       ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
+       if (ret) {
+               dev_err(dev, "fail(%d) to query ras int status bd num\n", ret);
+               return ret;
+       }
+       mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
+       pf_bd_num = le32_to_cpu(desc_bd.data[1]);
+       bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
+
+       desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+       if (!desc)
+               return -ENOMEM;
+
+       /* handle all main PF RAS errors */
+       ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num);
+       if (ret) {
+               kfree(desc);
+               return ret;
+       }
+       memset(desc, 0, bd_num * sizeof(struct hclge_desc));
+
+       /* handle all PF RAS errors */
+       ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num);
+       kfree(desc);
+
+       return ret;
+}
+
 static const struct hclge_hw_blk hw_blk[] = {
        {
          .msk = BIT(0), .name = "IGU_EGU",
@@ -605,13 +749,18 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
 {
        struct hclge_dev *hdev = ae_dev->priv;
        struct device *dev = &hdev->pdev->dev;
-       u32 sts;
+       u32 status;
 
-       sts = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
+       status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
 
-       /* Handling Non-fatal RAS errors */
-       if (sts & HCLGE_RAS_REG_NFE_MASK)
-               dev_warn(dev, "HNS Non-Fatal RAS error identified\n");
+       /* Handling Non-fatal HNS RAS errors */
+       if (status & HCLGE_RAS_REG_NFE_MASK) {
+               dev_warn(dev,
+                        "HNS Non-Fatal RAS error(status=0x%x) identified\n",
+                        status);
+               hclge_handle_all_ras_errors(hdev);
+               return PCI_ERS_RESULT_NEED_RESET;
+       }
 
-       return PCI_ERS_RESULT_NEED_RESET;
+       return PCI_ERS_RESULT_RECOVERED;
 }
index 9fe1c966574e55e37d7033f7b9105fd03180fa71..809e6981aa8ba016844325c568268d92007ca8e0 100644 (file)
@@ -21,6 +21,8 @@
 #define HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK    0x0100
 #define HCLGE_TQP_ECC_ERR_INT_EN       0x0FFF
 #define HCLGE_TQP_ECC_ERR_INT_EN_MASK  0x0FFF
+#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK    0x0F000000
+#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN 0x0F000000
 #define HCLGE_IGU_ERR_INT_EN   0x0000066F
 #define HCLGE_IGU_ERR_INT_EN_MASK      0x000F
 #define HCLGE_IGU_TNL_ERR_INT_EN    0x0002AABF
 #define HCLGE_NCSI_ERR_INT_EN  0x3
 #define HCLGE_NCSI_ERR_INT_TYPE        0x9
 
+#define HCLGE_IGU_INT_MASK             GENMASK(3, 0)
+#define HCLGE_IGU_EGU_TNL_INT_MASK     GENMASK(5, 0)
+#define HCLGE_PPP_MPF_INT_ST3_MASK     GENMASK(5, 0)
+#define HCLGE_QCN_FIFO_INT_MASK                GENMASK(17, 0)
+#define HCLGE_QCN_ECC_INT_MASK         GENMASK(21, 0)
+#define HCLGE_NCSI_ECC_INT_MASK                GENMASK(1, 0)
+
 enum hclge_err_int_type {
        HCLGE_ERR_INT_MSIX = 0,
        HCLGE_ERR_INT_RAS_CE = 1,