720 lines
22 KiB
Diff
720 lines
22 KiB
Diff
From 576d5899a183853648fdf5e9bfd9fd50951153b9 Mon Sep 17 00:00:00 2001
|
|
From: Yunsheng Lin <linyunsheng@huawei.com>
|
|
Date: Sat, 24 Jul 2021 15:45:23 +0800
|
|
Subject: [PATCH 088/283] net: hns3: use tx bounce buffer for small packets
|
|
|
|
mainline inclusion
|
|
from mainline-v5.14-rc1
|
|
commit 907676b130711fd1f627824559e92259db2061d1
|
|
category: feature
|
|
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EMUR
|
|
CVE: NA
|
|
|
|
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=907676b130711fd1f627824559e92259db2061d1
|
|
|
|
----------------------------------------------------------------------
|
|
|
|
when the packet or frag size is small, it causes both security and
|
|
performance issue. As dma can't map sub-page, this means some extra
|
|
kernel data is visible to devices. On the other hand, the overhead
|
|
of dma map and unmap is huge when IOMMU is on.
|
|
|
|
So add a queue based tx shared bounce buffer to memcpy the small
|
|
packet when the len of the xmitted skb is below tx_copybreak.
|
|
Add tx_spare_buf_size module param to set the size of tx spare
|
|
buffer, and add set/get_tunable to set or query the tx_copybreak.
|
|
|
|
The throughtput improves from 30 Gbps to 90+ Gbps when running 16
|
|
netperf threads with 32KB UDP message size when IOMMU is in the
|
|
strict mode(tx_copybreak = 2000 and mtu = 1500).
|
|
|
|
Suggested-by: Barry Song <song.bao.hua@hisilicon.com>
|
|
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
|
|
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
|
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Reviewed-by: Yongxin Li <liyongxin1@huawei.com>
|
|
Signed-off-by: Junxin Chen <chenjunxin1@huawei.com>
|
|
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
|
|
Signed-off-by: Xiaodong Li <lixiaodong67@huawei.com>
|
|
|
|
Conflicts:
|
|
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
|
|
---
|
|
.../ethernet/hisilicon/hns3/hns3_debugfs.c | 54 +++-
|
|
.../net/ethernet/hisilicon/hns3/hns3_enet.c | 294 +++++++++++++++++-
|
|
.../net/ethernet/hisilicon/hns3/hns3_enet.h | 43 ++-
|
|
.../ethernet/hisilicon/hns3/hns3_ethtool.c | 51 +++
|
|
4 files changed, 423 insertions(+), 19 deletions(-)
|
|
|
|
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
|
|
index ad7015b3ec80..6b2179516fff 100644
|
|
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
|
|
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
|
|
@@ -385,6 +385,56 @@ static void hns3_dbg_fill_content(char *content, u16 len,
|
|
*pos++ = '\0';
|
|
}
|
|
|
|
+static const struct hns3_dbg_item tx_spare_info_items[] = {
|
|
+ { "QUEUE_ID", 2 },
|
|
+ { "COPYBREAK", 2 },
|
|
+ { "LEN", 7 },
|
|
+ { "NTU", 4 },
|
|
+ { "NTC", 4 },
|
|
+ { "LTC", 4 },
|
|
+ { "DMA", 17 },
|
|
+};
|
|
+
|
|
+static void hns3_dbg_tx_spare_info(struct hns3_enet_ring *ring, char *buf,
|
|
+ int len, u32 ring_num, int *pos)
|
|
+{
|
|
+ char data_str[ARRAY_SIZE(tx_spare_info_items)][HNS3_DBG_DATA_STR_LEN];
|
|
+ struct hns3_tx_spare *tx_spare = ring->tx_spare;
|
|
+ char *result[ARRAY_SIZE(tx_spare_info_items)];
|
|
+ char content[HNS3_DBG_INFO_LEN];
|
|
+ u32 i, j;
|
|
+
|
|
+ if (!tx_spare) {
|
|
+ *pos += scnprintf(buf + *pos, len - *pos,
|
|
+ "tx spare buffer is not enabled\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(tx_spare_info_items); i++)
|
|
+ result[i] = &data_str[i][0];
|
|
+
|
|
+ *pos += scnprintf(buf + *pos, len - *pos, "tx spare buffer info\n");
|
|
+ hns3_dbg_fill_content(content, sizeof(content), tx_spare_info_items,
|
|
+ NULL, ARRAY_SIZE(tx_spare_info_items));
|
|
+ *pos += scnprintf(buf + *pos, len - *pos, "%s", content);
|
|
+
|
|
+ for (i = 0; i < ring_num; i++) {
|
|
+ j = 0;
|
|
+ sprintf(result[j++], "%8u", i);
|
|
+ sprintf(result[j++], "%9u", ring->tx_copybreak);
|
|
+ sprintf(result[j++], "%3u", tx_spare->len);
|
|
+ sprintf(result[j++], "%3u", tx_spare->next_to_use);
|
|
+ sprintf(result[j++], "%3u", tx_spare->next_to_clean);
|
|
+ sprintf(result[j++], "%3u", tx_spare->last_to_clean);
|
|
+ sprintf(result[j++], "%pad", &tx_spare->dma);
|
|
+ hns3_dbg_fill_content(content, sizeof(content),
|
|
+ tx_spare_info_items,
|
|
+ (const char **)result,
|
|
+ ARRAY_SIZE(tx_spare_info_items));
|
|
+ *pos += scnprintf(buf + *pos, len - *pos, "%s", content);
|
|
+ }
|
|
+}
|
|
+
|
|
static const struct hns3_dbg_item rx_queue_info_items[] = {
|
|
{ "QUEUE_ID", 2 },
|
|
{ "BD_NUM", 2 },
|
|
@@ -544,7 +594,7 @@ static int hns3_dbg_tx_queue_info(struct hnae3_handle *h,
|
|
char *result[ARRAY_SIZE(tx_queue_info_items)];
|
|
struct hns3_nic_priv *priv = h->priv;
|
|
char content[HNS3_DBG_INFO_LEN];
|
|
- struct hns3_enet_ring *ring;
|
|
+ struct hns3_enet_ring *ring = NULL;
|
|
int pos = 0;
|
|
u32 i;
|
|
|
|
@@ -578,6 +628,8 @@ static int hns3_dbg_tx_queue_info(struct hnae3_handle *h,
|
|
pos += scnprintf(buf + pos, len - pos, "%s", content);
|
|
}
|
|
|
|
+ hns3_dbg_tx_spare_info(ring, buf, len, h->kinfo.num_tqps, &pos);
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
|
|
index 7600a2e4a645..fd800aa35f3e 100644
|
|
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
|
|
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
|
|
@@ -52,6 +52,10 @@ static int debug = -1;
|
|
module_param(debug, int, 0);
|
|
MODULE_PARM_DESC(debug, " Network interface message level setting");
|
|
|
|
+static unsigned int tx_spare_buf_size;
|
|
+module_param(tx_spare_buf_size, uint, 0400);
|
|
+MODULE_PARM_DESC(tx_spare_buf_size, "Size used to allocate tx spare buffer");
|
|
+
|
|
#define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \
|
|
NETIF_MSG_IFDOWN | NETIF_MSG_IFUP)
|
|
|
|
@@ -645,7 +649,178 @@ void hns3_request_update_promisc_mode(struct hnae3_handle *handle)
|
|
ops->request_update_promisc_mode(handle);
|
|
}
|
|
|
|
-static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
|
|
+static u32 hns3_tx_spare_space(struct hns3_enet_ring *ring)
|
|
+{
|
|
+ struct hns3_tx_spare *tx_spare = ring->tx_spare;
|
|
+ u32 ntc, ntu;
|
|
+
|
|
+ /* This smp_load_acquire() pairs with smp_store_release() in
|
|
+ * hns3_tx_spare_update() called in tx desc cleaning process.
|
|
+ */
|
|
+ ntc = smp_load_acquire(&tx_spare->last_to_clean);
|
|
+ ntu = tx_spare->next_to_use;
|
|
+
|
|
+ if (ntc > ntu)
|
|
+ return ntc - ntu - 1;
|
|
+
|
|
+ /* The free tx buffer is divided into two part, so pick the
|
|
+ * larger one.
|
|
+ */
|
|
+ return (ntc > (tx_spare->len - ntu) ? ntc :
|
|
+ (tx_spare->len - ntu)) - 1;
|
|
+}
|
|
+
|
|
+static void hns3_tx_spare_update(struct hns3_enet_ring *ring)
|
|
+{
|
|
+ struct hns3_tx_spare *tx_spare = ring->tx_spare;
|
|
+
|
|
+ if (!tx_spare ||
|
|
+ tx_spare->last_to_clean == tx_spare->next_to_clean)
|
|
+ return;
|
|
+
|
|
+ /* This smp_store_release() pairs with smp_load_acquire() in
|
|
+ * hns3_tx_spare_space() called in xmit process.
|
|
+ */
|
|
+ smp_store_release(&tx_spare->last_to_clean,
|
|
+ tx_spare->next_to_clean);
|
|
+}
|
|
+
|
|
+static bool hns3_can_use_tx_bounce(struct hns3_enet_ring *ring,
|
|
+ struct sk_buff *skb,
|
|
+ u32 space)
|
|
+{
|
|
+ u32 len = skb->len <= ring->tx_copybreak ? skb->len :
|
|
+ skb_headlen(skb);
|
|
+
|
|
+ if (len > ring->tx_copybreak)
|
|
+ return false;
|
|
+
|
|
+ if (ALIGN(len, dma_get_cache_alignment()) > space) {
|
|
+ u64_stats_update_begin(&ring->syncp);
|
|
+ ring->stats.tx_spare_full++;
|
|
+ u64_stats_update_end(&ring->syncp);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
|
|
+{
|
|
+ struct hns3_tx_spare *tx_spare;
|
|
+ struct page *page;
|
|
+ dma_addr_t dma;
|
|
+ int order;
|
|
+
|
|
+ if (!tx_spare_buf_size)
|
|
+ return;
|
|
+
|
|
+ order = get_order(tx_spare_buf_size);
|
|
+ tx_spare = devm_kzalloc(ring_to_dev(ring), sizeof(*tx_spare),
|
|
+ GFP_KERNEL);
|
|
+ if (!tx_spare) {
|
|
+ /* The driver still work without the tx spare buffer */
|
|
+ dev_warn(ring_to_dev(ring), "failed to allocate hns3_tx_spare\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ page = alloc_pages_node(dev_to_node(ring_to_dev(ring)),
|
|
+ GFP_KERNEL, order);
|
|
+ if (!page) {
|
|
+ dev_warn(ring_to_dev(ring), "failed to allocate tx spare pages\n");
|
|
+ devm_kfree(ring_to_dev(ring), tx_spare);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ dma = dma_map_page(ring_to_dev(ring), page, 0,
|
|
+ PAGE_SIZE << order, DMA_TO_DEVICE);
|
|
+ if (dma_mapping_error(ring_to_dev(ring), dma)) {
|
|
+ dev_warn(ring_to_dev(ring), "failed to map pages for tx spare\n");
|
|
+ put_page(page);
|
|
+ devm_kfree(ring_to_dev(ring), tx_spare);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ tx_spare->dma = dma;
|
|
+ tx_spare->buf = page_address(page);
|
|
+ tx_spare->len = PAGE_SIZE << order;
|
|
+ ring->tx_spare = tx_spare;
|
|
+}
|
|
+
|
|
+/* Use hns3_tx_spare_space() to make sure there is enough buffer
|
|
+ * before calling below function to allocate tx buffer.
|
|
+ */
|
|
+static void *hns3_tx_spare_alloc(struct hns3_enet_ring *ring,
|
|
+ unsigned int size, dma_addr_t *dma,
|
|
+ u32 *cb_len)
|
|
+{
|
|
+ struct hns3_tx_spare *tx_spare = ring->tx_spare;
|
|
+ u32 ntu = tx_spare->next_to_use;
|
|
+
|
|
+ size = ALIGN(size, dma_get_cache_alignment());
|
|
+ *cb_len = size;
|
|
+
|
|
+ /* Tx spare buffer wraps back here because the end of
|
|
+ * freed tx buffer is not enough.
|
|
+ */
|
|
+ if (ntu + size > tx_spare->len) {
|
|
+ *cb_len += (tx_spare->len - ntu);
|
|
+ ntu = 0;
|
|
+ }
|
|
+
|
|
+ tx_spare->next_to_use = ntu + size;
|
|
+ if (tx_spare->next_to_use == tx_spare->len)
|
|
+ tx_spare->next_to_use = 0;
|
|
+
|
|
+ *dma = tx_spare->dma + ntu;
|
|
+
|
|
+ return tx_spare->buf + ntu;
|
|
+}
|
|
+
|
|
+static void hns3_tx_spare_rollback(struct hns3_enet_ring *ring, u32 len)
|
|
+{
|
|
+ struct hns3_tx_spare *tx_spare = ring->tx_spare;
|
|
+
|
|
+ if (len > tx_spare->next_to_use) {
|
|
+ len -= tx_spare->next_to_use;
|
|
+ tx_spare->next_to_use = tx_spare->len - len;
|
|
+ } else {
|
|
+ tx_spare->next_to_use -= len;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void hns3_tx_spare_reclaim_cb(struct hns3_enet_ring *ring,
|
|
+ struct hns3_desc_cb *cb)
|
|
+{
|
|
+ struct hns3_tx_spare *tx_spare = ring->tx_spare;
|
|
+ u32 ntc = tx_spare->next_to_clean;
|
|
+ u32 len = cb->length;
|
|
+
|
|
+ tx_spare->next_to_clean += len;
|
|
+
|
|
+ if (tx_spare->next_to_clean >= tx_spare->len) {
|
|
+ tx_spare->next_to_clean -= tx_spare->len;
|
|
+
|
|
+ if (tx_spare->next_to_clean) {
|
|
+ ntc = 0;
|
|
+ len = tx_spare->next_to_clean;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* This tx spare buffer is only really reclaimed after calling
|
|
+ * hns3_tx_spare_update(), so it is still safe to use the info in
|
|
+ * the tx buffer to do the dma sync after tx_spare->next_to_clean
|
|
+ * is moved forword.
|
|
+ */
|
|
+ if (cb->type & (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL)) {
|
|
+ dma_addr_t dma = tx_spare->dma + ntc;
|
|
+
|
|
+ dma_sync_single_for_cpu(ring_to_dev(ring), dma, len,
|
|
+ DMA_TO_DEVICE);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs,
|
|
u16 *mss, u32 *type_cs_vlan_tso, u32 *send_bytes)
|
|
{
|
|
u32 l4_offset, hdr_len;
|
|
@@ -719,7 +894,7 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
|
|
*send_bytes = (skb_shinfo(skb)->gso_segs - 1) * hdr_len + skb->len;
|
|
|
|
/* find the txbd field values */
|
|
- *paylen = skb->len - hdr_len;
|
|
+ *paylen_fdop_ol4cs = skb->len - hdr_len;
|
|
hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_TSO_B, 1);
|
|
|
|
/* get MSS for TSO */
|
|
@@ -1190,6 +1365,11 @@ static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv,
|
|
return 0;
|
|
|
|
dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
|
|
+ } else if (type & DESC_TYPE_BOUNCE_HEAD) {
|
|
+ /* Head data has been filled in hns3_handle_tx_bounce(),
|
|
+ * just return 0 here.
|
|
+ */
|
|
+ return 0;
|
|
} else {
|
|
struct skb_frag_struct *frag = (struct skb_frag_struct *)priv;
|
|
|
|
@@ -1446,6 +1626,9 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
|
|
if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB))
|
|
dma_unmap_single(dev, desc_cb->dma, desc_cb->length,
|
|
DMA_TO_DEVICE);
|
|
+ else if (desc_cb->type &
|
|
+ (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL))
|
|
+ hns3_tx_spare_rollback(ring, desc_cb->length);
|
|
else if (desc_cb->length)
|
|
dma_unmap_page(dev, desc_cb->dma, desc_cb->length,
|
|
DMA_TO_DEVICE);
|
|
@@ -1527,6 +1710,79 @@ static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb,
|
|
desc->tx.bdtp_fe_sc_vld_ra_ri |= cpu_to_le16(BIT(HNS3_TXD_TSYN_B));
|
|
}
|
|
|
|
+static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring,
|
|
+ struct sk_buff *skb)
|
|
+{
|
|
+ struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
|
|
+ unsigned int type = DESC_TYPE_BOUNCE_HEAD;
|
|
+ unsigned int size = skb_headlen(skb);
|
|
+ dma_addr_t dma;
|
|
+ int bd_num = 0;
|
|
+ u32 cb_len;
|
|
+ void *buf;
|
|
+ int ret;
|
|
+
|
|
+ if (skb->len <= ring->tx_copybreak) {
|
|
+ size = skb->len;
|
|
+ type = DESC_TYPE_BOUNCE_ALL;
|
|
+ }
|
|
+
|
|
+ /* hns3_can_use_tx_bounce() is called to ensure the below
|
|
+ * function can always return the tx buffer.
|
|
+ */
|
|
+ buf = hns3_tx_spare_alloc(ring, size, &dma, &cb_len);
|
|
+
|
|
+ ret = skb_copy_bits(skb, 0, buf, size);
|
|
+ if (unlikely(ret < 0)) {
|
|
+ hns3_tx_spare_rollback(ring, cb_len);
|
|
+ u64_stats_update_begin(&ring->syncp);
|
|
+ ring->stats.copy_bits_err++;
|
|
+ u64_stats_update_end(&ring->syncp);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ desc_cb->priv = skb;
|
|
+ desc_cb->length = cb_len;
|
|
+ desc_cb->dma = dma;
|
|
+ desc_cb->type = type;
|
|
+
|
|
+ bd_num += hns3_fill_desc(ring, dma, size);
|
|
+
|
|
+ if (type == DESC_TYPE_BOUNCE_HEAD) {
|
|
+ ret = hns3_fill_skb_to_desc(ring, skb,
|
|
+ DESC_TYPE_BOUNCE_HEAD);
|
|
+ if (unlikely(ret < 0))
|
|
+ return ret;
|
|
+
|
|
+ bd_num += ret;
|
|
+ }
|
|
+
|
|
+ dma_sync_single_for_device(ring_to_dev(ring), dma, size,
|
|
+ DMA_TO_DEVICE);
|
|
+
|
|
+ u64_stats_update_begin(&ring->syncp);
|
|
+ ring->stats.tx_bounce++;
|
|
+ u64_stats_update_end(&ring->syncp);
|
|
+ return bd_num;
|
|
+}
|
|
+
|
|
+static int hns3_handle_desc_filling(struct hns3_enet_ring *ring,
|
|
+ struct sk_buff *skb)
|
|
+{
|
|
+ u32 space;
|
|
+
|
|
+ if (!ring->tx_spare)
|
|
+ goto out;
|
|
+
|
|
+ space = hns3_tx_spare_space(ring);
|
|
+
|
|
+ if (hns3_can_use_tx_bounce(ring, skb, space))
|
|
+ return hns3_handle_tx_bounce(ring, skb);
|
|
+
|
|
+out:
|
|
+ return hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
|
|
+}
|
|
+
|
|
netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
|
|
{
|
|
struct hns3_nic_priv *priv = netdev_priv(netdev);
|
|
@@ -1572,7 +1828,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
|
|
* zero, which is unlikely, and 'ret > 0' means how many tx desc
|
|
* need to be notified to the hw.
|
|
*/
|
|
- ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
|
|
+ ret = hns3_handle_desc_filling(ring, skb);
|
|
if (unlikely(ret <= 0))
|
|
goto fill_err;
|
|
|
|
@@ -1778,6 +2034,7 @@ static struct rtnl_link_stats64 *hns3_nic_get_stats64(struct net_device *netdev,
|
|
tx_drop += ring->stats.tx_tso_err;
|
|
tx_drop += ring->stats.over_max_recursion;
|
|
tx_drop += ring->stats.hw_limitation;
|
|
+ tx_drop += ring->stats.copy_bits_err;
|
|
tx_errors += ring->stats.sw_err_cnt;
|
|
tx_errors += ring->stats.tx_vlan_err;
|
|
tx_errors += ring->stats.tx_l4_proto_err;
|
|
@@ -1785,6 +2042,7 @@ static struct rtnl_link_stats64 *hns3_nic_get_stats64(struct net_device *netdev,
|
|
tx_errors += ring->stats.tx_tso_err;
|
|
tx_errors += ring->stats.over_max_recursion;
|
|
tx_errors += ring->stats.hw_limitation;
|
|
+ tx_errors += ring->stats.copy_bits_err;
|
|
} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
|
|
|
|
/* fetch the rx stats */
|
|
@@ -2552,7 +2810,8 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
|
|
static void hns3_free_buffer(struct hns3_enet_ring *ring,
|
|
struct hns3_desc_cb *cb, int budget)
|
|
{
|
|
- if (cb->type & DESC_TYPE_SKB)
|
|
+ if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
|
|
+ DESC_TYPE_BOUNCE_ALL))
|
|
napi_consume_skb(cb->priv, budget);
|
|
else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
|
|
__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
|
|
@@ -2576,9 +2835,11 @@ static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
|
|
if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB))
|
|
dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
|
|
ring_to_dma_dir(ring));
|
|
- else if (cb->length)
|
|
+ else if ((cb->type & DESC_TYPE_PAGE) && cb->length)
|
|
dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
|
|
ring_to_dma_dir(ring));
|
|
+ else if (cb->type & (DESC_TYPE_BOUNCE_ALL | DESC_TYPE_BOUNCE_HEAD))
|
|
+ hns3_tx_spare_reclaim_cb(ring, cb);
|
|
}
|
|
|
|
static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i)
|
|
@@ -2729,7 +2990,8 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
|
|
|
|
desc_cb = &ring->desc_cb[ntc];
|
|
|
|
- if (desc_cb->type & DESC_TYPE_SKB) {
|
|
+ if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_ALL |
|
|
+ DESC_TYPE_BOUNCE_HEAD)) {
|
|
(*pkts)++;
|
|
(*bytes) += desc_cb->send_bytes;
|
|
}
|
|
@@ -2752,6 +3014,9 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
|
|
* ring_space called by hns3_nic_net_xmit.
|
|
*/
|
|
smp_store_release(&ring->next_to_clean, ntc);
|
|
+
|
|
+ hns3_tx_spare_update(ring);
|
|
+
|
|
return true;
|
|
}
|
|
|
|
@@ -3960,7 +4225,8 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
|
|
ring = &priv->ring[q->tqp_index];
|
|
desc_num = priv->ae_handle->kinfo.num_tx_desc;
|
|
ring->queue_index = q->tqp_index;
|
|
- ring->io_base = (u8 __iomem *)q->io_base + HNS3_TX_REG_OFFSET;
|
|
+ ring->tx_copybreak = priv->tx_copybreak;
|
|
+ ring->last_to_use = 0;
|
|
} else {
|
|
ring = &priv->ring[q->tqp_index + queue_num];
|
|
desc_num = priv->ae_handle->kinfo.num_rx_desc;
|
|
@@ -3979,7 +4245,6 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
|
|
ring->desc_num = desc_num;
|
|
ring->next_to_use = 0;
|
|
ring->next_to_clean = 0;
|
|
- ring->last_to_use = 0;
|
|
}
|
|
|
|
static void hns3_queue_to_ring(struct hnae3_queue *tqp,
|
|
@@ -4039,6 +4304,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
|
|
ret = hns3_alloc_ring_buffers(ring);
|
|
if (ret)
|
|
goto out_with_desc;
|
|
+ } else {
|
|
+ hns3_init_tx_spare_buffer(ring);
|
|
}
|
|
|
|
return 0;
|
|
@@ -4061,9 +4328,18 @@ void hns3_fini_ring(struct hns3_enet_ring *ring)
|
|
ring->next_to_use = 0;
|
|
ring->last_to_use = 0;
|
|
ring->pending_buf = 0;
|
|
- if (ring->skb) {
|
|
+ if (!HNAE3_IS_TX_RING(ring) && ring->skb) {
|
|
dev_kfree_skb_any(ring->skb);
|
|
ring->skb = NULL;
|
|
+ } else if (HNAE3_IS_TX_RING(ring) && ring->tx_spare) {
|
|
+ struct hns3_tx_spare *tx_spare = ring->tx_spare;
|
|
+
|
|
+ dma_unmap_page(ring_to_dev(ring), tx_spare->dma, tx_spare->len,
|
|
+ DMA_TO_DEVICE);
|
|
+ free_pages((unsigned long)tx_spare->buf,
|
|
+ get_order(tx_spare->len));
|
|
+ devm_kfree(ring_to_dev(ring), tx_spare);
|
|
+ ring->tx_spare = NULL;
|
|
}
|
|
}
|
|
|
|
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
|
|
index 4b035c458a58..7d0d9c3eb53c 100644
|
|
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
|
|
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
|
|
@@ -302,6 +302,8 @@ enum hns3_desc_type {
|
|
DESC_TYPE_SKB = 1 << 0,
|
|
DESC_TYPE_FRAGLIST_SKB = 1 << 1,
|
|
DESC_TYPE_PAGE = 1 << 2,
|
|
+ DESC_TYPE_BOUNCE_ALL = 1 << 3,
|
|
+ DESC_TYPE_BOUNCE_HEAD = 1 << 4,
|
|
};
|
|
|
|
struct hns3_desc_cb {
|
|
@@ -395,6 +397,9 @@ struct ring_stats {
|
|
u64 tx_tso_err;
|
|
u64 over_max_recursion;
|
|
u64 hw_limitation;
|
|
+ u64 tx_bounce;
|
|
+ u64 tx_spare_full;
|
|
+ u64 copy_bits_err;
|
|
};
|
|
struct {
|
|
u64 rx_pkts;
|
|
@@ -411,6 +416,15 @@ struct ring_stats {
|
|
};
|
|
};
|
|
|
|
+struct hns3_tx_spare {
|
|
+ dma_addr_t dma;
|
|
+ void *buf;
|
|
+ u32 next_to_use;
|
|
+ u32 next_to_clean;
|
|
+ u32 last_to_clean;
|
|
+ u32 len;
|
|
+};
|
|
+
|
|
struct hns3_enet_ring {
|
|
u8 __iomem *io_base; /* base io address for the ring */
|
|
struct hns3_desc *desc; /* dma map address space */
|
|
@@ -434,18 +448,28 @@ struct hns3_enet_ring {
|
|
* next_to_use
|
|
*/
|
|
int next_to_clean;
|
|
- union {
|
|
- int last_to_use; /* last idx used by xmit */
|
|
- u32 pull_len; /* memcpy len for current rx packet */
|
|
- };
|
|
- u32 frag_num;
|
|
- void *va; /* first buffer address for current packet */
|
|
-
|
|
u32 flag; /* ring attribute */
|
|
|
|
int pending_buf;
|
|
- struct sk_buff *skb;
|
|
- struct sk_buff *tail_skb;
|
|
+ union {
|
|
+ /* for Tx ring */
|
|
+ struct {
|
|
+ u32 fd_qb_tx_sample;
|
|
+ int last_to_use; /* last idx used by xmit */
|
|
+ u32 tx_copybreak;
|
|
+ struct hns3_tx_spare *tx_spare;
|
|
+ };
|
|
+
|
|
+ /* for Rx ring */
|
|
+ struct {
|
|
+ u32 pull_len; /* memcpy len for current rx packet */
|
|
+ u32 frag_num;
|
|
+ /* first buffer address for current packet */
|
|
+ unsigned char *va;
|
|
+ struct sk_buff *skb;
|
|
+ struct sk_buff *tail_skb;
|
|
+ };
|
|
+ };
|
|
} ____cacheline_internodealigned_in_smp;
|
|
|
|
enum hns3_flow_level_range {
|
|
@@ -524,6 +548,7 @@ struct hns3_nic_priv {
|
|
|
|
struct hns3_enet_coalesce tx_coal;
|
|
struct hns3_enet_coalesce rx_coal;
|
|
+ u32 tx_copybreak;
|
|
};
|
|
|
|
union l3_hdr_info {
|
|
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
|
|
index ae22adfe5740..5b5780a59d3e 100644
|
|
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
|
|
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
|
|
@@ -50,6 +50,9 @@ static const struct hns3_stats hns3_txq_stats[] = {
|
|
HNS3_TQP_STAT("tso_err", tx_tso_err),
|
|
HNS3_TQP_STAT("over_max_recursion", over_max_recursion),
|
|
HNS3_TQP_STAT("hw_limitation", hw_limitation),
|
|
+ HNS3_TQP_STAT("bounce", tx_bounce),
|
|
+ HNS3_TQP_STAT("spare_full", tx_spare_full),
|
|
+ HNS3_TQP_STAT("copy_bits_err", copy_bits_err),
|
|
};
|
|
|
|
#define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)
|
|
@@ -1604,6 +1607,50 @@ static int hns3_set_priv_flags(struct net_device *netdev, u32 pflags)
|
|
return 0;
|
|
}
|
|
|
|
+static int hns3_get_tunable(struct net_device *netdev,
|
|
+ const struct ethtool_tunable *tuna,
|
|
+ void *data)
|
|
+{
|
|
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
|
|
+ int ret = 0;
|
|
+
|
|
+ switch (tuna->id) {
|
|
+ case ETHTOOL_TX_COPYBREAK:
|
|
+ /* all the tx rings have the same tx_copybreak */
|
|
+ *(u32 *)data = priv->tx_copybreak;
|
|
+ break;
|
|
+ default:
|
|
+ ret = -EOPNOTSUPP;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int hns3_set_tunable(struct net_device *netdev,
|
|
+ const struct ethtool_tunable *tuna,
|
|
+ const void *data)
|
|
+{
|
|
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
|
|
+ struct hnae3_handle *h = priv->ae_handle;
|
|
+ int i, ret = 0;
|
|
+
|
|
+ switch (tuna->id) {
|
|
+ case ETHTOOL_TX_COPYBREAK:
|
|
+ priv->tx_copybreak = *(u32 *)data;
|
|
+
|
|
+ for (i = 0; i < h->kinfo.num_tqps; i++)
|
|
+ priv->ring[i].tx_copybreak = priv->tx_copybreak;
|
|
+
|
|
+ break;
|
|
+ default:
|
|
+ ret = -EOPNOTSUPP;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
#define HNS3_ETHTOOL_COALESCE (ETHTOOL_COALESCE_USECS | \
|
|
ETHTOOL_COALESCE_USE_ADAPTIVE | \
|
|
ETHTOOL_COALESCE_RX_USECS_HIGH | \
|
|
@@ -1647,6 +1694,8 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
|
|
.set_msglevel = hns3_set_msglevel,
|
|
.get_priv_flags = hns3_get_priv_flags,
|
|
.set_priv_flags = hns3_set_priv_flags,
|
|
+ .get_tunable = hns3_get_tunable,
|
|
+ .set_tunable = hns3_set_tunable,
|
|
};
|
|
|
|
static const struct ethtool_ops hns3_ethtool_ops = {
|
|
@@ -1686,6 +1735,8 @@ static const struct ethtool_ops hns3_ethtool_ops = {
|
|
.get_priv_flags = hns3_get_priv_flags,
|
|
.set_priv_flags = hns3_set_priv_flags,
|
|
.get_ts_info = hns3_get_ts_info,
|
|
+ .get_tunable = hns3_get_tunable,
|
|
+ .set_tunable = hns3_set_tunable,
|
|
};
|
|
|
|
void hns3_ethtool_set_ops(struct net_device *netdev)
|
|
--
|
|
2.34.1
|
|
|