626 lines
18 KiB
Diff
626 lines
18 KiB
Diff
From ff05551bfec7c306b38bdb056f0e1203b8f01980 Mon Sep 17 00:00:00 2001
|
|
From: Jiaran Zhang <zhangjiaran@huawei.com>
|
|
Date: Sat, 24 Jul 2021 15:45:12 +0800
|
|
Subject: [PATCH 077/283] net: hns3: add the RAS compatibility adaptation
|
|
solution
|
|
|
|
mainline inclusion
|
|
from mainline-v5.14-rc1
|
|
commit 2e2deee7618b062efe3aba9fcb017dadcf148819
|
|
category: feature
|
|
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EMUR
|
|
CVE: NA
|
|
|
|
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2e2deee7618b062efe3aba9fcb017dadcf148819
|
|
|
|
----------------------------------------------------------------------
|
|
|
|
To adapt to hardware modification and ensure that the driver is
|
|
compatible with the original error handling content, we need to add the
|
|
RAS compatibility adaptation solution.
|
|
|
|
Add a processing branch to the driver during error handling. In the new
|
|
processing branch, NIC fault information is integrated by the IMP. An
|
|
interaction command is added between the driver and IMP to query
|
|
and clear the fault source and interrupt source. The IMP integrates
|
|
error information and reports the highest reset level to the driver.
|
|
|
|
Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
|
|
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
|
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Reviewed-by: Yongxin Li <liyongxin1@huawei.com>
|
|
Signed-off-by: Junxin Chen <chenjunxin1@huawei.com>
|
|
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
|
|
Signed-off-by: Xiaodong Li <lixiaodong67@huawei.com>
|
|
|
|
Conflicts:
|
|
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
|
|
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
|
|
---
|
|
.../hisilicon/hns3/hns3pf/hclge_cmd.c | 3 +-
|
|
.../hisilicon/hns3/hns3pf/hclge_cmd.h | 2 +
|
|
.../hisilicon/hns3/hns3pf/hclge_err.c | 324 ++++++++++++++++--
|
|
.../hisilicon/hns3/hns3pf/hclge_err.h | 68 ++++
|
|
.../hisilicon/hns3/hns3pf/hclge_main.c | 54 ++-
|
|
5 files changed, 410 insertions(+), 41 deletions(-)
|
|
|
|
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
|
|
index 7646e510c0e7..e127ca7106b7 100644
|
|
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
|
|
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
|
|
@@ -194,7 +194,8 @@ static bool hclge_is_special_opcode(u16 opcode)
|
|
HCLGE_QUERY_CLEAR_MPF_RAS_INT,
|
|
HCLGE_QUERY_CLEAR_PF_RAS_INT,
|
|
HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
|
|
- HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT};
|
|
+ HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
|
|
+ HCLGE_QUERY_ALL_ERR_INFO};
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
|
|
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
|
|
index b6498f8ce224..81b726e557df 100644
|
|
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
|
|
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
|
|
@@ -296,6 +296,8 @@ enum hclge_opcode_type {
|
|
HCLGE_QUERY_MSIX_INT_STS_BD_NUM = 0x1513,
|
|
HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT = 0x1514,
|
|
HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT = 0x1515,
|
|
+ HCLGE_QUERY_ALL_ERR_BD_NUM = 0x1516,
|
|
+ HCLGE_QUERY_ALL_ERR_INFO = 0x1517,
|
|
HCLGE_CONFIG_ROCEE_RAS_INT_EN = 0x1580,
|
|
HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581,
|
|
HCLGE_ROCEE_PF_RAS_INT_CMD = 0x1584,
|
|
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
|
|
index 3855c029829a..cf4afddad526 100644
|
|
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
|
|
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
|
|
@@ -631,9 +631,101 @@ const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
|
|
{ /* sentinel */ }
|
|
};
|
|
|
|
+static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
|
|
+ {
|
|
+ .module_id = MODULE_NONE,
|
|
+ .msg = "MODULE_NONE"
|
|
+ }, {
|
|
+ .module_id = MODULE_BIOS_COMMON,
|
|
+ .msg = "MODULE_BIOS_COMMON"
|
|
+ }, {
|
|
+ .module_id = MODULE_GE,
|
|
+ .msg = "MODULE_GE"
|
|
+ }, {
|
|
+ .module_id = MODULE_IGU_EGU,
|
|
+ .msg = "MODULE_IGU_EGU"
|
|
+ }, {
|
|
+ .module_id = MODULE_LGE,
|
|
+ .msg = "MODULE_LGE"
|
|
+ }, {
|
|
+ .module_id = MODULE_NCSI,
|
|
+ .msg = "MODULE_NCSI"
|
|
+ }, {
|
|
+ .module_id = MODULE_PPP,
|
|
+ .msg = "MODULE_PPP"
|
|
+ }, {
|
|
+ .module_id = MODULE_QCN,
|
|
+ .msg = "MODULE_QCN"
|
|
+ }, {
|
|
+ .module_id = MODULE_RCB_RX,
|
|
+ .msg = "MODULE_RCB_RX"
|
|
+ }, {
|
|
+ .module_id = MODULE_RTC,
|
|
+ .msg = "MODULE_RTC"
|
|
+ }, {
|
|
+ .module_id = MODULE_SSU,
|
|
+ .msg = "MODULE_SSU"
|
|
+ }, {
|
|
+ .module_id = MODULE_TM,
|
|
+ .msg = "MODULE_TM"
|
|
+ }, {
|
|
+ .module_id = MODULE_RCB_TX,
|
|
+ .msg = "MODULE_RCB_TX"
|
|
+ }, {
|
|
+ .module_id = MODULE_TXDMA,
|
|
+ .msg = "MODULE_TXDMA"
|
|
+ }, {
|
|
+ .module_id = MODULE_MASTER,
|
|
+ .msg = "MODULE_MASTER"
|
|
+ }
|
|
+};
|
|
+
|
|
+static const struct hclge_hw_type_id hclge_hw_type_id_st[] = {
|
|
+ {
|
|
+ .type_id = NONE_ERROR,
|
|
+ .msg = "none_error"
|
|
+ }, {
|
|
+ .type_id = FIFO_ERROR,
|
|
+ .msg = "fifo_error"
|
|
+ }, {
|
|
+ .type_id = MEMORY_ERROR,
|
|
+ .msg = "memory_error"
|
|
+ }, {
|
|
+ .type_id = POISON_ERROR,
|
|
+ .msg = "poison_error"
|
|
+ }, {
|
|
+ .type_id = MSIX_ECC_ERROR,
|
|
+ .msg = "msix_ecc_error"
|
|
+ }, {
|
|
+ .type_id = TQP_INT_ECC_ERROR,
|
|
+ .msg = "tqp_int_ecc_error"
|
|
+ }, {
|
|
+ .type_id = PF_ABNORMAL_INT_ERROR,
|
|
+ .msg = "pf_abnormal_int_error"
|
|
+ }, {
|
|
+ .type_id = MPF_ABNORMAL_INT_ERROR,
|
|
+ .msg = "mpf_abnormal_int_error"
|
|
+ }, {
|
|
+ .type_id = COMMON_ERROR,
|
|
+ .msg = "common_error"
|
|
+ }, {
|
|
+ .type_id = PORT_ERROR,
|
|
+ .msg = "port_error"
|
|
+ }, {
|
|
+ .type_id = ETS_ERROR,
|
|
+ .msg = "ets_error"
|
|
+ }, {
|
|
+ .type_id = NCSI_ERROR,
|
|
+ .msg = "ncsi_error"
|
|
+ }, {
|
|
+ .type_id = GLB_ERROR,
|
|
+ .msg = "glb_error"
|
|
+ }
|
|
+};
|
|
+
|
|
void hclge_log_error(struct device *dev, char *reg,
|
|
- const struct hclge_hw_error *err,
|
|
- u32 err_sts, unsigned long *reset_requests)
|
|
+ const struct hclge_hw_error *err,
|
|
+ u32 err_sts, unsigned long *reset_requests)
|
|
{
|
|
while (err->msg) {
|
|
if (err->int_msk & err_sts) {
|
|
@@ -1895,11 +1987,8 @@ static int hclge_handle_pf_msix_error(struct hclge_dev *hdev,
|
|
static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
|
|
unsigned long *reset_requests)
|
|
{
|
|
- struct hclge_mac_tnl_stats mac_tnl_stats;
|
|
- struct device *dev = &hdev->pdev->dev;
|
|
u32 mpf_bd_num, pf_bd_num, bd_num;
|
|
struct hclge_desc *desc;
|
|
- u32 status;
|
|
int ret;
|
|
|
|
/* query the number of bds for the MSIx int status */
|
|
@@ -1922,29 +2011,7 @@ static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
|
|
if (ret)
|
|
goto msi_error;
|
|
|
|
- /* query and clear mac tnl interruptions */
|
|
- hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT,
|
|
- true);
|
|
- ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
|
|
- if (ret) {
|
|
- dev_err(dev, "query mac tnl int cmd failed (%d)\n", ret);
|
|
- goto msi_error;
|
|
- }
|
|
-
|
|
- status = le32_to_cpu(desc->data[0]);
|
|
- if (status) {
|
|
- /* When mac tnl interrupt occurs, we record current time and
|
|
- * register status here in a fifo, then clear the status. So
|
|
- * that if link status changes suddenly at some time, we can
|
|
- * query them by debugfs.
|
|
- */
|
|
- mac_tnl_stats.time = local_clock();
|
|
- mac_tnl_stats.status = status;
|
|
- kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
|
|
- ret = hclge_clear_mac_tnl_int(hdev);
|
|
- if (ret)
|
|
- dev_err(dev, "clear mac tnl int failed (%d)\n", ret);
|
|
- }
|
|
+ ret = hclge_handle_mac_tnl(hdev);
|
|
|
|
msi_error:
|
|
kfree(desc);
|
|
@@ -1966,10 +2033,43 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
|
|
return hclge_handle_all_hw_msix_error(hdev, reset_requests);
|
|
}
|
|
|
|
-void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
|
|
+int hclge_handle_mac_tnl(struct hclge_dev *hdev)
|
|
{
|
|
-#define HCLGE_DESC_NO_DATA_LEN 8
|
|
+ struct hclge_mac_tnl_stats mac_tnl_stats;
|
|
+ struct device *dev = &hdev->pdev->dev;
|
|
+ struct hclge_desc desc;
|
|
+ u32 status;
|
|
+ int ret;
|
|
|
|
+ /* query and clear mac tnl interruptions */
|
|
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true);
|
|
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
|
|
+ if (ret) {
|
|
+ dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ status = le32_to_cpu(desc.data[0]);
|
|
+ if (status) {
|
|
+ /* When mac tnl interrupt occurs, we record current time and
|
|
+ * register status here in a fifo, then clear the status. So
|
|
+ * that if link status changes suddenly at some time, we can
|
|
+ * query them by debugfs.
|
|
+ */
|
|
+ mac_tnl_stats.time = local_clock();
|
|
+ mac_tnl_stats.status = status;
|
|
+ kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
|
|
+ ret = hclge_clear_mac_tnl_int(hdev);
|
|
+ if (ret)
|
|
+ dev_err(dev, "failed to clear mac tnl int, ret = %d.\n",
|
|
+ ret);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
|
|
+{
|
|
struct hclge_dev *hdev = ae_dev->priv;
|
|
struct device *dev = &hdev->pdev->dev;
|
|
u32 mpf_bd_num, pf_bd_num, bd_num;
|
|
@@ -2018,3 +2118,167 @@ void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
|
|
msi_error:
|
|
kfree(desc);
|
|
}
|
|
+
|
|
+static void
|
|
+hclge_handle_error_type_reg_log(struct device *dev,
|
|
+ struct hclge_mod_err_info *mod_info,
|
|
+ struct hclge_type_reg_err_info *type_reg_info)
|
|
+{
|
|
+#define HCLGE_ERR_TYPE_MASK 0x7F
|
|
+#define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7
|
|
+
|
|
+ u8 mod_id, total_module, type_id, total_type, i, is_ras;
|
|
+
|
|
+ mod_id = mod_info->mod_id;
|
|
+ type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK;
|
|
+ is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET;
|
|
+
|
|
+ total_module = ARRAY_SIZE(hclge_hw_module_id_st);
|
|
+ total_type = ARRAY_SIZE(hclge_hw_type_id_st);
|
|
+
|
|
+ if (mod_id < total_module && type_id < total_type)
|
|
+ dev_err(dev,
|
|
+ "found %s %s, is %s error.\n",
|
|
+ hclge_hw_module_id_st[mod_id].msg,
|
|
+ hclge_hw_type_id_st[type_id].msg,
|
|
+ is_ras ? "ras" : "msix");
|
|
+ else
|
|
+ dev_err(dev,
|
|
+ "unknown module[%u] or type[%u].\n", mod_id, type_id);
|
|
+
|
|
+ dev_err(dev, "reg_value:\n");
|
|
+ for (i = 0; i < type_reg_info->reg_num; i++)
|
|
+ dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]);
|
|
+}
|
|
+
|
|
+static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
|
|
+ const u32 *buf, u32 buf_size)
|
|
+{
|
|
+ struct hclge_type_reg_err_info *type_reg_info;
|
|
+ struct hclge_dev *hdev = ae_dev->priv;
|
|
+ struct device *dev = &hdev->pdev->dev;
|
|
+ struct hclge_mod_err_info *mod_info;
|
|
+ struct hclge_sum_err_info *sum_info;
|
|
+ u8 mod_num, err_num, i;
|
|
+ u32 offset = 0;
|
|
+
|
|
+ sum_info = (struct hclge_sum_err_info *)&buf[offset++];
|
|
+ if (sum_info->reset_type &&
|
|
+ sum_info->reset_type != HNAE3_NONE_RESET)
|
|
+ set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req);
|
|
+ mod_num = sum_info->mod_num;
|
|
+
|
|
+ while (mod_num--) {
|
|
+ if (offset >= buf_size) {
|
|
+ dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n",
|
|
+ offset, buf_size);
|
|
+ return;
|
|
+ }
|
|
+ mod_info = (struct hclge_mod_err_info *)&buf[offset++];
|
|
+ err_num = mod_info->err_num;
|
|
+
|
|
+ for (i = 0; i < err_num; i++) {
|
|
+ if (offset >= buf_size) {
|
|
+ dev_err(dev,
|
|
+ "The offset(%u) exceeds buf size(%u).\n",
|
|
+ offset, buf_size);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ type_reg_info = (struct hclge_type_reg_err_info *)
|
|
+ &buf[offset++];
|
|
+ hclge_handle_error_type_reg_log(dev, mod_info,
|
|
+ type_reg_info);
|
|
+
|
|
+ offset += type_reg_info->reg_num;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num)
|
|
+{
|
|
+ struct device *dev = &hdev->pdev->dev;
|
|
+ struct hclge_desc desc_bd;
|
|
+ int ret;
|
|
+
|
|
+ hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true);
|
|
+ ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
|
|
+ if (ret) {
|
|
+ dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ *bd_num = le32_to_cpu(desc_bd.data[0]);
|
|
+ if (!(*bd_num)) {
|
|
+ dev_err(dev, "The value of bd_num is 0!\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int hclge_query_all_err_info(struct hclge_dev *hdev,
|
|
+ struct hclge_desc *desc, u32 bd_num)
|
|
+{
|
|
+ struct device *dev = &hdev->pdev->dev;
|
|
+ int ret;
|
|
+
|
|
+ hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true);
|
|
+ ret = hclge_cmd_send(&hdev->hw, desc, bd_num);
|
|
+ if (ret)
|
|
+ dev_err(dev, "failed to query error info, ret = %d.\n", ret);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev)
|
|
+{
|
|
+ u32 bd_num, desc_len, buf_len, buf_size, i;
|
|
+ struct hclge_dev *hdev = ae_dev->priv;
|
|
+ struct hclge_desc *desc;
|
|
+ __le32 *desc_data;
|
|
+ u32 *buf;
|
|
+ int ret;
|
|
+
|
|
+ ret = hclge_query_all_err_bd_num(hdev, &bd_num);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+
|
|
+ desc_len = bd_num * sizeof(struct hclge_desc);
|
|
+ desc = kzalloc(desc_len, GFP_KERNEL);
|
|
+ if (!desc) {
|
|
+ ret = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = hclge_query_all_err_info(hdev, desc, bd_num);
|
|
+ if (ret)
|
|
+ goto err_desc;
|
|
+
|
|
+ buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN;
|
|
+ buf_size = buf_len / sizeof(u32);
|
|
+
|
|
+ desc_data = kzalloc(buf_len, GFP_KERNEL);
|
|
+ if (!desc_data)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ buf = kzalloc(buf_len, GFP_KERNEL);
|
|
+ if (!buf) {
|
|
+ ret = -ENOMEM;
|
|
+ goto err_buf_alloc;
|
|
+ }
|
|
+
|
|
+ memcpy(desc_data, &desc[0].data[0], buf_len);
|
|
+ for (i = 0; i < buf_size; i++)
|
|
+ buf[i] = le32_to_cpu(desc_data[i]);
|
|
+
|
|
+ hclge_handle_error_module_log(ae_dev, buf, buf_size);
|
|
+ kfree(buf);
|
|
+
|
|
+err_buf_alloc:
|
|
+ kfree(desc_data);
|
|
+err_desc:
|
|
+ kfree(desc);
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
|
|
index fcaf4f472379..6cc225a5cfbe 100644
|
|
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
|
|
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
|
|
@@ -110,6 +110,10 @@
|
|
#define HCLGE_ROCEE_OVF_ERR_INT_MASK 0x10000
|
|
#define HCLGE_ROCEE_OVF_ERR_TYPE_MASK 0x3F
|
|
|
|
+#define HCLGE_DESC_DATA_MAX 8
|
|
+#define HCLGE_REG_NUM_MAX 256
|
|
+#define HCLGE_DESC_NO_DATA_LEN 8
|
|
+
|
|
enum hclge_err_int_type {
|
|
HCLGE_ERR_INT_MSIX = 0,
|
|
HCLGE_ERR_INT_RAS_CE = 1,
|
|
@@ -117,6 +121,40 @@ enum hclge_err_int_type {
|
|
HCLGE_ERR_INT_RAS_FE = 3,
|
|
};
|
|
|
|
+enum hclge_mod_name_list {
|
|
+ MODULE_NONE = 0,
|
|
+ MODULE_BIOS_COMMON = 1,
|
|
+ MODULE_GE = 2,
|
|
+ MODULE_IGU_EGU = 3,
|
|
+ MODULE_LGE = 4,
|
|
+ MODULE_NCSI = 5,
|
|
+ MODULE_PPP = 6,
|
|
+ MODULE_QCN = 7,
|
|
+ MODULE_RCB_RX = 8,
|
|
+ MODULE_RTC = 9,
|
|
+ MODULE_SSU = 10,
|
|
+ MODULE_TM = 11,
|
|
+ MODULE_RCB_TX = 12,
|
|
+ MODULE_TXDMA = 13,
|
|
+ MODULE_MASTER = 14,
|
|
+};
|
|
+
|
|
+enum hclge_err_type_list {
|
|
+ NONE_ERROR = 0,
|
|
+ FIFO_ERROR = 1,
|
|
+ MEMORY_ERROR = 2,
|
|
+ POISON_ERROR = 3,
|
|
+ MSIX_ECC_ERROR = 4,
|
|
+ TQP_INT_ECC_ERROR = 5,
|
|
+ PF_ABNORMAL_INT_ERROR = 6,
|
|
+ MPF_ABNORMAL_INT_ERROR = 7,
|
|
+ COMMON_ERROR = 8,
|
|
+ PORT_ERROR = 9,
|
|
+ ETS_ERROR = 10,
|
|
+ NCSI_ERROR = 11,
|
|
+ GLB_ERROR = 12,
|
|
+};
|
|
+
|
|
struct hclge_hw_blk {
|
|
u32 msk;
|
|
const char *name;
|
|
@@ -153,6 +191,34 @@ extern const struct hclge_hw_error hclge_ssu_fifo_overflow_int[];
|
|
extern const struct hclge_hw_error hclge_ssu_ets_tcg_int[];
|
|
extern const struct hclge_hw_error hclge_ssu_port_based_pf_int[];
|
|
extern const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[];
|
|
+struct hclge_hw_module_id {
|
|
+ enum hclge_mod_name_list module_id;
|
|
+ const char *msg;
|
|
+};
|
|
+
|
|
+struct hclge_hw_type_id {
|
|
+ enum hclge_err_type_list type_id;
|
|
+ const char *msg;
|
|
+};
|
|
+
|
|
+struct hclge_sum_err_info {
|
|
+ u8 reset_type;
|
|
+ u8 mod_num;
|
|
+ u8 rsv[2];
|
|
+};
|
|
+
|
|
+struct hclge_mod_err_info {
|
|
+ u8 mod_id;
|
|
+ u8 err_num;
|
|
+ u8 rsv[2];
|
|
+};
|
|
+
|
|
+struct hclge_type_reg_err_info {
|
|
+ u8 type_id;
|
|
+ u8 reg_num;
|
|
+ u8 rsv[2];
|
|
+ u32 hclge_reg[HCLGE_REG_NUM_MAX];
|
|
+};
|
|
|
|
int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en);
|
|
int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state);
|
|
@@ -166,4 +232,6 @@ int hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev);
|
|
void hclge_log_error(struct device *dev, char *reg,
|
|
const struct hclge_hw_error *err,
|
|
u32 err_sts, unsigned long *reset_requests);
|
|
+int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev);
|
|
+int hclge_handle_mac_tnl(struct hclge_dev *hdev);
|
|
#endif
|
|
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
|
|
index f56cc318e2ad..59d81b0f48c5 100644
|
|
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
|
|
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
|
|
@@ -4157,11 +4157,49 @@ static void hclge_reset_subtask(struct hclge_dev *hdev)
|
|
hdev->reset_type = HNAE3_NONE_RESET;
|
|
}
|
|
|
|
+static void hclge_handle_err_reset_request(struct hclge_dev *hdev)
|
|
+{
|
|
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
|
|
+ enum hnae3_reset_type reset_type;
|
|
+
|
|
+ if (ae_dev->hw_err_reset_req) {
|
|
+ reset_type = hclge_get_reset_level(ae_dev,
|
|
+ &ae_dev->hw_err_reset_req);
|
|
+ hclge_set_def_reset_request(ae_dev, reset_type);
|
|
+ }
|
|
+
|
|
+ if (hdev->default_reset_request && ae_dev->ops->reset_event)
|
|
+ ae_dev->ops->reset_event(hdev->pdev, NULL);
|
|
+
|
|
+ /* enable interrupt after error handling complete */
|
|
+ hclge_enable_vector(&hdev->misc_vector, true);
|
|
+}
|
|
+
|
|
+static void hclge_handle_err_recovery(struct hclge_dev *hdev)
|
|
+{
|
|
+ u32 mask_val = HCLGE_RAS_REG_NFE_MASK | HCLGE_RAS_REG_ROCEE_ERR_MASK;
|
|
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
|
|
+ u32 msix_src_flag, hw_err_src_flag;
|
|
+
|
|
+ msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) &
|
|
+ HCLGE_VECTOR0_REG_MSIX_MASK;
|
|
+
|
|
+ hw_err_src_flag = hclge_read_dev(&hdev->hw,
|
|
+ HCLGE_RAS_PF_OTHER_INT_STS_REG) &
|
|
+ mask_val;
|
|
+
|
|
+ if (msix_src_flag || hw_err_src_flag) {
|
|
+ hclge_handle_error_info_log(ae_dev);
|
|
+ hclge_handle_mac_tnl(hdev);
|
|
+ }
|
|
+
|
|
+ hclge_handle_err_reset_request(hdev);
|
|
+}
|
|
+
|
|
static void hclge_misc_err_recovery(struct hclge_dev *hdev)
|
|
{
|
|
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
|
|
struct device *dev = &hdev->pdev->dev;
|
|
- enum hnae3_reset_type reset_type;
|
|
u32 msix_sts_reg;
|
|
|
|
msix_sts_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
|
|
@@ -4171,17 +4209,10 @@ static void hclge_misc_err_recovery(struct hclge_dev *hdev)
|
|
dev_info(dev, "received msix interrupt 0x%x\n",
|
|
msix_sts_reg);
|
|
}
|
|
- hclge_enable_vector(&hdev->misc_vector, true);
|
|
|
|
hclge_handle_hw_ras_error(ae_dev);
|
|
- if (ae_dev->hw_err_reset_req) {
|
|
- reset_type = hclge_get_reset_level(ae_dev,
|
|
- &ae_dev->hw_err_reset_req);
|
|
- hclge_set_def_reset_request(ae_dev, reset_type);
|
|
- }
|
|
|
|
- if (hdev->default_reset_request && ae_dev->ops->reset_event)
|
|
- ae_dev->ops->reset_event(hdev->pdev, NULL);
|
|
+ hclge_handle_err_reset_request(hdev);
|
|
}
|
|
|
|
static void hclge_errhand_service_task(struct hclge_dev *hdev)
|
|
@@ -4189,7 +4220,10 @@ static void hclge_errhand_service_task(struct hclge_dev *hdev)
|
|
if (!test_and_clear_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state))
|
|
return;
|
|
|
|
- hclge_misc_err_recovery(hdev);
|
|
+ if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
|
|
+ hclge_handle_err_recovery(hdev);
|
|
+ else
|
|
+ hclge_misc_err_recovery(hdev);
|
|
}
|
|
|
|
static void hclge_reset_service_task(struct hclge_dev *hdev)
|
|
--
|
|
2.34.1
|
|
|