From 76f9aa6f4bfd12fba34fc44aef890c48d4fb1024 Mon Sep 17 00:00:00 2001
From: Jakov Petrina <jakov.petrina@sartura.hr>
Date: Fri, 15 May 2020 12:52:35 +0200
Subject: [PATCH] linux: mvebu: backport mvneta XDP support

This patch backports XDP support in the mvneta driver used by Marvell ARMADA 37x,
38x and 37xx series SoCs. Supported actions are:

- XDP_DROP
- XDP_PASS
- XDP_REDIRECT
- XDP_TX

Patches are present upstream as following commits:

* b0a43db9087a net: mvneta: add XDP_TX support
* 9e58c8b41065 net: mvneta: make tx buffer array agnostic
* fa383f6b77a2 net: mvneta: move header prefetch in mvneta_swbm_rx_frame
* 0db51da7a8e9 net: mvneta: add basic XDP support
* 8dc9a0888f4c net: mvneta: rely on build_skb in mvneta_rx_swbm poll routine
* 568a3fa24a95 net: mvneta: introduce page pool API for sw buffer manager
* ff519e2acd46 net: mvneta: introduce mvneta_update_stats routine

Signed-off-by: Jakov Petrina <jakov.petrina@sartura.hr>
---
 ...ntroduce-mvneta_update_stats-routine.patch |  95 ++++++
 ...duce-page-pool-API-for-sw-buffer-man.patch | 181 ++++++++++
 ...on-build_skb-in-mvneta_rx_swbm-poll-.patch | 303 +++++++++++++++++
 ...013-net-mvneta-add-basic-XDP-support.patch | 311 ++++++++++++++++++
 ...header-prefetch-in-mvneta_swbm_rx_fr.patch |  43 +++
 ...mvneta-make-tx-buffer-array-agnostic.patch | 210 ++++++++++++
 .../016-net-mvneta-add-XDP_TX-support.patch   | 175 ++++++++++
 .../300-mvneta-tx-queue-workaround.patch      |   8 +-
 8 files changed, 1322 insertions(+), 4 deletions(-)
 create mode 100644 target/linux/mvebu/patches-5.4/010-net-mvneta-introduce-mvneta_update_stats-routine.patch
 create mode 100644 target/linux/mvebu/patches-5.4/011-net-mvneta-introduce-page-pool-API-for-sw-buffer-man.patch
 create mode 100644 target/linux/mvebu/patches-5.4/012-net-mvneta-rely-on-build_skb-in-mvneta_rx_swbm-poll-.patch
 create mode 100644 target/linux/mvebu/patches-5.4/013-net-mvneta-add-basic-XDP-support.patch
 create mode 100644 target/linux/mvebu/patches-5.4/014-net-mvneta-move-header-prefetch-in-mvneta_swbm_rx_fr.patch
 create mode 100644 target/linux/mvebu/patches-5.4/015-net-mvneta-make-tx-buffer-array-agnostic.patch
 create mode 100644 target/linux/mvebu/patches-5.4/016-net-mvneta-add-XDP_TX-support.patch

diff --git a/target/linux/mvebu/patches-5.4/010-net-mvneta-introduce-mvneta_update_stats-routine.patch b/target/linux/mvebu/patches-5.4/010-net-mvneta-introduce-mvneta_update_stats-routine.patch
new file mode 100644
index 0000000000..c6d6d55b29
--- /dev/null
+++ b/target/linux/mvebu/patches-5.4/010-net-mvneta-introduce-mvneta_update_stats-routine.patch
@@ -0,0 +1,95 @@
+From 06815202344ee6c256b63e68c16cdc3b0480f4ee Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Sat, 19 Oct 2019 10:13:21 +0200
+Subject: [PATCH 1/7] net: mvneta: introduce mvneta_update_stats routine
+
+Introduce mvneta_update_stats routine to collect {rx/tx} statistics
+(packets and bytes). This is a preliminary patch to add XDP support to
+mvneta driver
+
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 43 ++++++++++++++-------------
+ 1 file changed, 22 insertions(+), 21 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1911,6 +1911,23 @@ static void mvneta_rxq_drop_pkts(struct
+ 	}
+ }
+ 
++static void
++mvneta_update_stats(struct mvneta_port *pp, u32 pkts,
++		    u32 len, bool tx)
++{
++	struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
++
++	u64_stats_update_begin(&stats->syncp);
++	if (tx) {
++		stats->tx_packets += pkts;
++		stats->tx_bytes += len;
++	} else {
++		stats->rx_packets += pkts;
++		stats->rx_bytes += len;
++	}
++	u64_stats_update_end(&stats->syncp);
++}
++
+ static inline
+ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
+ {
+@@ -2091,14 +2108,8 @@ static int mvneta_rx_swbm(struct napi_st
+ 		rxq->left_size = 0;
+ 	}
+ 
+-	if (rcvd_pkts) {
+-		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+-
+-		u64_stats_update_begin(&stats->syncp);
+-		stats->rx_packets += rcvd_pkts;
+-		stats->rx_bytes   += rcvd_bytes;
+-		u64_stats_update_end(&stats->syncp);
+-	}
++	if (rcvd_pkts)
++		mvneta_update_stats(pp, rcvd_pkts, rcvd_bytes, false);
+ 
+ 	/* return some buffers to hardware queue, one at a time is too slow */
+ 	refill = mvneta_rx_refill_queue(pp, rxq);
+@@ -2221,14 +2232,8 @@ err_drop_frame:
+ 		napi_gro_receive(napi, skb);
+ 	}
+ 
+-	if (rcvd_pkts) {
+-		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+-
+-		u64_stats_update_begin(&stats->syncp);
+-		stats->rx_packets += rcvd_pkts;
+-		stats->rx_bytes   += rcvd_bytes;
+-		u64_stats_update_end(&stats->syncp);
+-	}
++	if (rcvd_pkts)
++		mvneta_update_stats(pp, rcvd_pkts, rcvd_bytes, false);
+ 
+ 	/* Update rxq management counters */
+ 	mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
+@@ -2474,7 +2479,6 @@ static netdev_tx_t mvneta_tx(struct sk_b
+ 
+ out:
+ 	if (frags > 0) {
+-		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+ 		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
+ 
+ 		netdev_tx_sent_queue(nq, len);
+@@ -2489,10 +2493,7 @@ out:
+ 		else
+ 			txq->pending += frags;
+ 
+-		u64_stats_update_begin(&stats->syncp);
+-		stats->tx_packets++;
+-		stats->tx_bytes  += len;
+-		u64_stats_update_end(&stats->syncp);
++		mvneta_update_stats(pp, 1, len, true);
+ 	} else {
+ 		dev->stats.tx_dropped++;
+ 		dev_kfree_skb_any(skb);
diff --git a/target/linux/mvebu/patches-5.4/011-net-mvneta-introduce-page-pool-API-for-sw-buffer-man.patch b/target/linux/mvebu/patches-5.4/011-net-mvneta-introduce-page-pool-API-for-sw-buffer-man.patch
new file mode 100644
index 0000000000..24c446f578
--- /dev/null
+++ b/target/linux/mvebu/patches-5.4/011-net-mvneta-introduce-page-pool-API-for-sw-buffer-man.patch
@@ -0,0 +1,181 @@
+From 160f006a6fe904177cbca867c48dfb6d27262dd5 Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Sat, 19 Oct 2019 10:13:22 +0200
+Subject: [PATCH 2/7] net: mvneta: introduce page pool API for sw buffer
+ manager
+
+Use the page_pool api for allocations and DMA handling instead of
+__dev_alloc_page()/dma_map_page() and free_page()/dma_unmap_page().
+Pages are unmapped using page_pool_release_page before packets
+go into the network stack.
+
+The page_pool API offers buffer recycling capabilities for XDP but
+allocates one page per packet, unless the driver splits and manages
+the allocated page.
+This is a preliminary patch to add XDP support to mvneta driver
+
+Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/ethernet/marvell/Kconfig  |  1 +
+ drivers/net/ethernet/marvell/mvneta.c | 83 +++++++++++++++++++++------
+ 2 files changed, 65 insertions(+), 19 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/Kconfig
++++ b/drivers/net/ethernet/marvell/Kconfig
+@@ -61,6 +61,7 @@ config MVNETA
+ 	depends on ARCH_MVEBU || COMPILE_TEST
+ 	select MVMDIO
+ 	select PHYLINK
++	select PAGE_POOL
+ 	---help---
+ 	  This driver supports the network interface units in the
+ 	  Marvell ARMADA XP, ARMADA 370, ARMADA 38x and
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -37,6 +37,7 @@
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/tso.h>
++#include <net/page_pool.h>
+ 
+ /* Registers */
+ #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
+@@ -605,6 +606,10 @@ struct mvneta_rx_queue {
+ 	u32 pkts_coal;
+ 	u32 time_coal;
+ 
++	/* page_pool */
++	struct page_pool *page_pool;
++	struct xdp_rxq_info xdp_rxq;
++
+ 	/* Virtual address of the RX buffer */
+ 	void  **buf_virt_addr;
+ 
+@@ -1823,23 +1828,21 @@ static int mvneta_rx_refill(struct mvnet
+ 			    struct mvneta_rx_queue *rxq,
+ 			    gfp_t gfp_mask)
+ {
++	enum dma_data_direction dma_dir;
+ 	dma_addr_t phys_addr;
+ 	struct page *page;
+ 
+-	page = __dev_alloc_page(gfp_mask);
++	page = page_pool_alloc_pages(rxq->page_pool,
++				     gfp_mask | __GFP_NOWARN);
+ 	if (!page)
+ 		return -ENOMEM;
+ 
+-	/* map page for use */
+-	phys_addr = dma_map_page(pp->dev->dev.parent, page, 0, PAGE_SIZE,
+-				 DMA_FROM_DEVICE);
+-	if (unlikely(dma_mapping_error(pp->dev->dev.parent, phys_addr))) {
+-		__free_page(page);
+-		return -ENOMEM;
+-	}
+-
+-	phys_addr += pp->rx_offset_correction;
++	phys_addr = page_pool_get_dma_addr(page) + pp->rx_offset_correction;
++	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
++	dma_sync_single_for_device(pp->dev->dev.parent, phys_addr,
++				   PAGE_SIZE, dma_dir);
+ 	mvneta_rx_desc_fill(rx_desc, phys_addr, page, rxq);
++
+ 	return 0;
+ }
+ 
+@@ -1905,10 +1908,12 @@ static void mvneta_rxq_drop_pkts(struct
+ 		if (!data || !(rx_desc->buf_phys_addr))
+ 			continue;
+ 
+-		dma_unmap_page(pp->dev->dev.parent, rx_desc->buf_phys_addr,
+-			       PAGE_SIZE, DMA_FROM_DEVICE);
+-		__free_page(data);
++		page_pool_put_page(rxq->page_pool, data, false);
+ 	}
++	if (xdp_rxq_info_is_reg(&rxq->xdp_rxq))
++		xdp_rxq_info_unreg(&rxq->xdp_rxq);
++	page_pool_destroy(rxq->page_pool);
++	rxq->page_pool = NULL;
+ }
+ 
+ static void
+@@ -2045,8 +2050,7 @@ static int mvneta_rx_swbm(struct napi_st
+ 				skb_add_rx_frag(rxq->skb, frag_num, page,
+ 						frag_offset, frag_size,
+ 						PAGE_SIZE);
+-				dma_unmap_page(dev->dev.parent, phys_addr,
+-					       PAGE_SIZE, DMA_FROM_DEVICE);
++				page_pool_release_page(rxq->page_pool, page);
+ 				rxq->left_size -= frag_size;
+ 			}
+ 		} else {
+@@ -2076,9 +2080,7 @@ static int mvneta_rx_swbm(struct napi_st
+ 						frag_offset, frag_size,
+ 						PAGE_SIZE);
+ 
+-				dma_unmap_page(dev->dev.parent, phys_addr,
+-					       PAGE_SIZE, DMA_FROM_DEVICE);
+-
++				page_pool_release_page(rxq->page_pool, page);
+ 				rxq->left_size -= frag_size;
+ 			}
+ 		} /* Middle or Last descriptor */
+@@ -2845,11 +2847,54 @@ static int mvneta_poll(struct napi_struc
+ 	return rx_done;
+ }
+ 
++static int mvneta_create_page_pool(struct mvneta_port *pp,
++				   struct mvneta_rx_queue *rxq, int size)
++{
++	struct page_pool_params pp_params = {
++		.order = 0,
++		.flags = PP_FLAG_DMA_MAP,
++		.pool_size = size,
++		.nid = cpu_to_node(0),
++		.dev = pp->dev->dev.parent,
++		.dma_dir = DMA_FROM_DEVICE,
++	};
++	int err;
++
++	rxq->page_pool = page_pool_create(&pp_params);
++	if (IS_ERR(rxq->page_pool)) {
++		err = PTR_ERR(rxq->page_pool);
++		rxq->page_pool = NULL;
++		return err;
++	}
++
++	err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id);
++	if (err < 0)
++		goto err_free_pp;
++
++	err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
++					 rxq->page_pool);
++	if (err)
++		goto err_unregister_rxq;
++
++	return 0;
++
++err_unregister_rxq:
++	xdp_rxq_info_unreg(&rxq->xdp_rxq);
++err_free_pp:
++	page_pool_destroy(rxq->page_pool);
++	rxq->page_pool = NULL;
++	return err;
++}
++
+ /* Handle rxq fill: allocates rxq skbs; called when initializing a port */
+ static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
+ 			   int num)
+ {
+-	int i;
++	int i, err;
++
++	err = mvneta_create_page_pool(pp, rxq, num);
++	if (err < 0)
++		return err;
+ 
+ 	for (i = 0; i < num; i++) {
+ 		memset(rxq->descs + i, 0, sizeof(struct mvneta_rx_desc));
diff --git a/target/linux/mvebu/patches-5.4/012-net-mvneta-rely-on-build_skb-in-mvneta_rx_swbm-poll-.patch b/target/linux/mvebu/patches-5.4/012-net-mvneta-rely-on-build_skb-in-mvneta_rx_swbm-poll-.patch
new file mode 100644
index 0000000000..1a89c8476d
--- /dev/null
+++ b/target/linux/mvebu/patches-5.4/012-net-mvneta-rely-on-build_skb-in-mvneta_rx_swbm-poll-.patch
@@ -0,0 +1,303 @@
+From 00cf2a1d7d58631ba137b9acabe1de1d542625a8 Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Sat, 19 Oct 2019 10:13:23 +0200
+Subject: [PATCH 3/7] net: mvneta: rely on build_skb in mvneta_rx_swbm poll
+ routine
+
+Refactor mvneta_rx_swbm code introducing mvneta_swbm_rx_frame and
+mvneta_swbm_add_rx_fragment routines. Rely on build_skb in oreder to
+allocate skb since the previous patch introduced buffer recycling using
+the page_pool API.
+This patch fixes even an issue in the original driver where dma buffers
+are accessed before dma sync.
+mvneta driver can run on not cache coherent devices so it is
+necessary to sync DMA buffers before sending them to the device
+in order to avoid memory corruptions. Running perf analysis we can
+see a performance cost associated with this DMA-sync (anyway it is
+already there in the original driver code). In follow up patches we
+will add more logic to reduce DMA-sync as much as possible.
+
+Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 185 +++++++++++++-------------
+ 1 file changed, 95 insertions(+), 90 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -323,6 +323,11 @@
+ 	      ETH_HLEN + ETH_FCS_LEN,			     \
+ 	      cache_line_size())
+ 
++#define MVNETA_SKB_PAD	(SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \
++			 NET_SKB_PAD))
++#define MVNETA_SKB_SIZE(len)	(SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD)
++#define MVNETA_MAX_RX_BUF_SIZE	(PAGE_SIZE - MVNETA_SKB_PAD)
++
+ #define IS_TSO_HEADER(txq, addr) \
+ 	((addr >= txq->tso_hdrs_phys) && \
+ 	 (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE))
+@@ -648,7 +653,6 @@ static int txq_number = 8;
+ static int rxq_def;
+ 
+ static int rx_copybreak __read_mostly = 256;
+-static int rx_header_size __read_mostly = 128;
+ 
+ /* HW BM need that each port be identify by a unique ID */
+ static int global_port_id;
+@@ -1840,7 +1844,7 @@ static int mvneta_rx_refill(struct mvnet
+ 	phys_addr = page_pool_get_dma_addr(page) + pp->rx_offset_correction;
+ 	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
+ 	dma_sync_single_for_device(pp->dev->dev.parent, phys_addr,
+-				   PAGE_SIZE, dma_dir);
++				   MVNETA_MAX_RX_BUF_SIZE, dma_dir);
+ 	mvneta_rx_desc_fill(rx_desc, phys_addr, page, rxq);
+ 
+ 	return 0;
+@@ -1958,30 +1962,102 @@ int mvneta_rx_refill_queue(struct mvneta
+ 	return i;
+ }
+ 
++static int
++mvneta_swbm_rx_frame(struct mvneta_port *pp,
++		     struct mvneta_rx_desc *rx_desc,
++		     struct mvneta_rx_queue *rxq,
++		     struct page *page)
++{
++	unsigned char *data = page_address(page);
++	int data_len = -MVNETA_MH_SIZE, len;
++	struct net_device *dev = pp->dev;
++	enum dma_data_direction dma_dir;
++
++	if (MVNETA_SKB_SIZE(rx_desc->data_size) > PAGE_SIZE) {
++		len = MVNETA_MAX_RX_BUF_SIZE;
++		data_len += len;
++	} else {
++		len = rx_desc->data_size;
++		data_len += len - ETH_FCS_LEN;
++	}
++
++	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
++	dma_sync_single_for_cpu(dev->dev.parent,
++				rx_desc->buf_phys_addr,
++				len, dma_dir);
++
++	rxq->skb = build_skb(data, PAGE_SIZE);
++	if (unlikely(!rxq->skb)) {
++		netdev_err(dev,
++			   "Can't allocate skb on queue %d\n",
++			   rxq->id);
++		dev->stats.rx_dropped++;
++		rxq->skb_alloc_err++;
++		return -ENOMEM;
++	}
++	page_pool_release_page(rxq->page_pool, page);
++
++	skb_reserve(rxq->skb, MVNETA_MH_SIZE + NET_SKB_PAD);
++	skb_put(rxq->skb, data_len);
++	mvneta_rx_csum(pp, rx_desc->status, rxq->skb);
++
++	rxq->left_size = rx_desc->data_size - len;
++	rx_desc->buf_phys_addr = 0;
++
++	return 0;
++}
++
++static void
++mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
++			    struct mvneta_rx_desc *rx_desc,
++			    struct mvneta_rx_queue *rxq,
++			    struct page *page)
++{
++	struct net_device *dev = pp->dev;
++	enum dma_data_direction dma_dir;
++	int data_len, len;
++
++	if (rxq->left_size > MVNETA_MAX_RX_BUF_SIZE) {
++		len = MVNETA_MAX_RX_BUF_SIZE;
++		data_len = len;
++	} else {
++		len = rxq->left_size;
++		data_len = len - ETH_FCS_LEN;
++	}
++	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
++	dma_sync_single_for_cpu(dev->dev.parent,
++				rx_desc->buf_phys_addr,
++				len, dma_dir);
++	if (data_len > 0) {
++		/* refill descriptor with new buffer later */
++		skb_add_rx_frag(rxq->skb,
++				skb_shinfo(rxq->skb)->nr_frags,
++				page, NET_SKB_PAD, data_len,
++				PAGE_SIZE);
++	}
++	page_pool_release_page(rxq->page_pool, page);
++	rx_desc->buf_phys_addr = 0;
++	rxq->left_size -= len;
++}
++
+ /* Main rx processing when using software buffer management */
+ static int mvneta_rx_swbm(struct napi_struct *napi,
+ 			  struct mvneta_port *pp, int budget,
+ 			  struct mvneta_rx_queue *rxq)
+ {
++	int rcvd_pkts = 0, rcvd_bytes = 0, rx_proc = 0;
+ 	struct net_device *dev = pp->dev;
+-	int rx_todo, rx_proc;
+-	int refill = 0;
+-	u32 rcvd_pkts = 0;
+-	u32 rcvd_bytes = 0;
++	int rx_todo, refill;
+ 
+ 	/* Get number of received packets */
+ 	rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
+-	rx_proc = 0;
+ 
+ 	/* Fairness NAPI loop */
+-	while ((rcvd_pkts < budget) && (rx_proc < rx_todo)) {
++	while (rx_proc < budget && rx_proc < rx_todo) {
+ 		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
++		u32 rx_status, index;
+ 		unsigned char *data;
+ 		struct page *page;
+-		dma_addr_t phys_addr;
+-		u32 rx_status, index;
+-		int rx_bytes, skb_size, copy_size;
+-		int frag_num, frag_size, frag_offset;
+ 
+ 		index = rx_desc - rxq->descs;
+ 		page = (struct page *)rxq->buf_virt_addr[index];
+@@ -1989,100 +2065,30 @@ static int mvneta_rx_swbm(struct napi_st
+ 		/* Prefetch header */
+ 		prefetch(data);
+ 
+-		phys_addr = rx_desc->buf_phys_addr;
+ 		rx_status = rx_desc->status;
+ 		rx_proc++;
+ 		rxq->refill_num++;
+ 
+ 		if (rx_status & MVNETA_RXD_FIRST_DESC) {
++			int err;
++
+ 			/* Check errors only for FIRST descriptor */
+ 			if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
+ 				mvneta_rx_error(pp, rx_desc);
+ 				/* leave the descriptor untouched */
+ 				continue;
+ 			}
+-			rx_bytes = rx_desc->data_size -
+-				   (ETH_FCS_LEN + MVNETA_MH_SIZE);
+-
+-			/* Allocate small skb for each new packet */
+-			skb_size = max(rx_copybreak, rx_header_size);
+-			rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
+-			if (unlikely(!rxq->skb)) {
+-				struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+-
+-				netdev_err(dev,
+-					   "Can't allocate skb on queue %d\n",
+-					   rxq->id);
+-
+-				rxq->skb_alloc_err++;
+ 
+-				u64_stats_update_begin(&stats->syncp);
+-				stats->rx_dropped++;
+-				u64_stats_update_end(&stats->syncp);
++			err = mvneta_swbm_rx_frame(pp, rx_desc, rxq, page);
++			if (err)
+ 				continue;
+-			}
+-			copy_size = min(skb_size, rx_bytes);
+-
+-			/* Copy data from buffer to SKB, skip Marvell header */
+-			memcpy(rxq->skb->data, data + MVNETA_MH_SIZE,
+-			       copy_size);
+-			skb_put(rxq->skb, copy_size);
+-			rxq->left_size = rx_bytes - copy_size;
+-
+-			mvneta_rx_csum(pp, rx_status, rxq->skb);
+-			if (rxq->left_size == 0) {
+-				int size = copy_size + MVNETA_MH_SIZE;
+-
+-				dma_sync_single_range_for_cpu(dev->dev.parent,
+-							      phys_addr, 0,
+-							      size,
+-							      DMA_FROM_DEVICE);
+-
+-				/* leave the descriptor and buffer untouched */
+-			} else {
+-				/* refill descriptor with new buffer later */
+-				rx_desc->buf_phys_addr = 0;
+-
+-				frag_num = 0;
+-				frag_offset = copy_size + MVNETA_MH_SIZE;
+-				frag_size = min(rxq->left_size,
+-						(int)(PAGE_SIZE - frag_offset));
+-				skb_add_rx_frag(rxq->skb, frag_num, page,
+-						frag_offset, frag_size,
+-						PAGE_SIZE);
+-				page_pool_release_page(rxq->page_pool, page);
+-				rxq->left_size -= frag_size;
+-			}
+ 		} else {
+-			/* Middle or Last descriptor */
+ 			if (unlikely(!rxq->skb)) {
+ 				pr_debug("no skb for rx_status 0x%x\n",
+ 					 rx_status);
+ 				continue;
+ 			}
+-			if (!rxq->left_size) {
+-				/* last descriptor has only FCS */
+-				/* and can be discarded */
+-				dma_sync_single_range_for_cpu(dev->dev.parent,
+-							      phys_addr, 0,
+-							      ETH_FCS_LEN,
+-							      DMA_FROM_DEVICE);
+-				/* leave the descriptor and buffer untouched */
+-			} else {
+-				/* refill descriptor with new buffer later */
+-				rx_desc->buf_phys_addr = 0;
+-
+-				frag_num = skb_shinfo(rxq->skb)->nr_frags;
+-				frag_offset = 0;
+-				frag_size = min(rxq->left_size,
+-						(int)(PAGE_SIZE - frag_offset));
+-				skb_add_rx_frag(rxq->skb, frag_num, page,
+-						frag_offset, frag_size,
+-						PAGE_SIZE);
+-
+-				page_pool_release_page(rxq->page_pool, page);
+-				rxq->left_size -= frag_size;
+-			}
++			mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, page);
+ 		} /* Middle or Last descriptor */
+ 
+ 		if (!(rx_status & MVNETA_RXD_LAST_DESC))
+@@ -2107,7 +2113,6 @@ static int mvneta_rx_swbm(struct napi_st
+ 
+ 		/* clean uncomplete skb pointer in queue */
+ 		rxq->skb = NULL;
+-		rxq->left_size = 0;
+ 	}
+ 
+ 	if (rcvd_pkts)
+@@ -2968,7 +2973,7 @@ static void mvneta_rxq_hw_init(struct mv
+ 		/* Set Offset */
+ 		mvneta_rxq_offset_set(pp, rxq, 0);
+ 		mvneta_rxq_buf_size_set(pp, rxq, PAGE_SIZE < SZ_64K ?
+-					PAGE_SIZE :
++					MVNETA_MAX_RX_BUF_SIZE :
+ 					MVNETA_RX_BUF_SIZE(pp->pkt_size));
+ 		mvneta_rxq_bm_disable(pp, rxq);
+ 		mvneta_rxq_fill(pp, rxq, rxq->size);
+@@ -4678,7 +4683,7 @@ static int mvneta_probe(struct platform_
+ 	SET_NETDEV_DEV(dev, &pdev->dev);
+ 
+ 	pp->id = global_port_id++;
+-	pp->rx_offset_correction = 0; /* not relevant for SW BM */
++	pp->rx_offset_correction = NET_SKB_PAD;
+ 
+ 	/* Obtain access to BM resources if enabled and already initialized */
+ 	bm_node = of_parse_phandle(dn, "buffer-manager", 0);
diff --git a/target/linux/mvebu/patches-5.4/013-net-mvneta-add-basic-XDP-support.patch b/target/linux/mvebu/patches-5.4/013-net-mvneta-add-basic-XDP-support.patch
new file mode 100644
index 0000000000..e45eba3832
--- /dev/null
+++ b/target/linux/mvebu/patches-5.4/013-net-mvneta-add-basic-XDP-support.patch
@@ -0,0 +1,311 @@
+From f21366d964bd41a2b823beed36a7c3e7a728857b Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Sat, 19 Oct 2019 10:13:24 +0200
+Subject: [PATCH 4/7] net: mvneta: add basic XDP support
+
+Add basic XDP support to mvneta driver for devices that rely on software
+buffer management. Currently supported verdicts are:
+- XDP_DROP
+- XDP_PASS
+- XDP_REDIRECT
+- XDP_ABORTED
+
+- iptables drop:
+$iptables -t raw -I PREROUTING -p udp --dport 9 -j DROP
+$nstat -n && sleep 1 && nstat
+IpInReceives		151169		0.0
+IpExtInOctets		6953544		0.0
+IpExtInNoECTPkts	151165		0.0
+
+- XDP_DROP via xdp1
+$./samples/bpf/xdp1 3
+proto 0:	421419 pkt/s
+proto 0:	421444 pkt/s
+proto 0:	421393 pkt/s
+proto 0:	421440 pkt/s
+proto 0:	421184 pkt/s
+
+Tested-by: Matteo Croce <mcroce@redhat.com>
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 148 ++++++++++++++++++++++++--
+ 1 file changed, 139 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -38,6 +38,7 @@
+ #include <net/ipv6.h>
+ #include <net/tso.h>
+ #include <net/page_pool.h>
++#include <linux/bpf_trace.h>
+ 
+ /* Registers */
+ #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
+@@ -323,8 +324,10 @@
+ 	      ETH_HLEN + ETH_FCS_LEN,			     \
+ 	      cache_line_size())
+ 
++#define MVNETA_SKB_HEADROOM	(max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
++				 NET_IP_ALIGN)
+ #define MVNETA_SKB_PAD	(SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \
+-			 NET_SKB_PAD))
++			 MVNETA_SKB_HEADROOM))
+ #define MVNETA_SKB_SIZE(len)	(SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD)
+ #define MVNETA_MAX_RX_BUF_SIZE	(PAGE_SIZE - MVNETA_SKB_PAD)
+ 
+@@ -352,6 +355,11 @@ struct mvneta_statistic {
+ #define T_REG_64	64
+ #define T_SW		1
+ 
++#define MVNETA_XDP_PASS		BIT(0)
++#define MVNETA_XDP_DROPPED	BIT(1)
++#define MVNETA_XDP_TX		BIT(2)
++#define MVNETA_XDP_REDIR	BIT(3)
++
+ static const struct mvneta_statistic mvneta_statistics[] = {
+ 	{ 0x3000, T_REG_64, "good_octets_received", },
+ 	{ 0x3010, T_REG_32, "good_frames_received", },
+@@ -433,6 +441,8 @@ struct mvneta_port {
+ 	u32 cause_rx_tx;
+ 	struct napi_struct napi;
+ 
++	struct bpf_prog *xdp_prog;
++
+ 	/* Core clock */
+ 	struct clk *clk;
+ 	/* AXI clock */
+@@ -1963,10 +1973,50 @@ int mvneta_rx_refill_queue(struct mvneta
+ }
+ 
+ static int
++mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
++	       struct bpf_prog *prog, struct xdp_buff *xdp)
++{
++	u32 ret, act = bpf_prog_run_xdp(prog, xdp);
++
++	switch (act) {
++	case XDP_PASS:
++		ret = MVNETA_XDP_PASS;
++		break;
++	case XDP_REDIRECT: {
++		int err;
++
++		err = xdp_do_redirect(pp->dev, xdp, prog);
++		if (err) {
++			ret = MVNETA_XDP_DROPPED;
++			xdp_return_buff(xdp);
++		} else {
++			ret = MVNETA_XDP_REDIR;
++		}
++		break;
++	}
++	default:
++		bpf_warn_invalid_xdp_action(act);
++		/* fall through */
++	case XDP_ABORTED:
++		trace_xdp_exception(pp->dev, prog, act);
++		/* fall through */
++	case XDP_DROP:
++		page_pool_recycle_direct(rxq->page_pool,
++					 virt_to_head_page(xdp->data));
++		ret = MVNETA_XDP_DROPPED;
++		break;
++	}
++
++	return ret;
++}
++
++static int
+ mvneta_swbm_rx_frame(struct mvneta_port *pp,
+ 		     struct mvneta_rx_desc *rx_desc,
+ 		     struct mvneta_rx_queue *rxq,
+-		     struct page *page)
++		     struct xdp_buff *xdp,
++		     struct bpf_prog *xdp_prog,
++		     struct page *page, u32 *xdp_ret)
+ {
+ 	unsigned char *data = page_address(page);
+ 	int data_len = -MVNETA_MH_SIZE, len;
+@@ -1986,7 +2036,26 @@ mvneta_swbm_rx_frame(struct mvneta_port
+ 				rx_desc->buf_phys_addr,
+ 				len, dma_dir);
+ 
+-	rxq->skb = build_skb(data, PAGE_SIZE);
++	xdp->data_hard_start = data;
++	xdp->data = data + MVNETA_SKB_HEADROOM + MVNETA_MH_SIZE;
++	xdp->data_end = xdp->data + data_len;
++	xdp_set_data_meta_invalid(xdp);
++
++	if (xdp_prog) {
++		u32 ret;
++
++		ret = mvneta_run_xdp(pp, rxq, xdp_prog, xdp);
++		if (ret != MVNETA_XDP_PASS) {
++			mvneta_update_stats(pp, 1,
++					    xdp->data_end - xdp->data,
++					    false);
++			rx_desc->buf_phys_addr = 0;
++			*xdp_ret |= ret;
++			return ret;
++		}
++	}
++
++	rxq->skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
+ 	if (unlikely(!rxq->skb)) {
+ 		netdev_err(dev,
+ 			   "Can't allocate skb on queue %d\n",
+@@ -1997,8 +2066,9 @@ mvneta_swbm_rx_frame(struct mvneta_port
+ 	}
+ 	page_pool_release_page(rxq->page_pool, page);
+ 
+-	skb_reserve(rxq->skb, MVNETA_MH_SIZE + NET_SKB_PAD);
+-	skb_put(rxq->skb, data_len);
++	skb_reserve(rxq->skb,
++		    xdp->data - xdp->data_hard_start);
++	skb_put(rxq->skb, xdp->data_end - xdp->data);
+ 	mvneta_rx_csum(pp, rx_desc->status, rxq->skb);
+ 
+ 	rxq->left_size = rx_desc->data_size - len;
+@@ -2032,7 +2102,7 @@ mvneta_swbm_add_rx_fragment(struct mvnet
+ 		/* refill descriptor with new buffer later */
+ 		skb_add_rx_frag(rxq->skb,
+ 				skb_shinfo(rxq->skb)->nr_frags,
+-				page, NET_SKB_PAD, data_len,
++				page, MVNETA_SKB_HEADROOM, data_len,
+ 				PAGE_SIZE);
+ 	}
+ 	page_pool_release_page(rxq->page_pool, page);
+@@ -2047,11 +2117,18 @@ static int mvneta_rx_swbm(struct napi_st
+ {
+ 	int rcvd_pkts = 0, rcvd_bytes = 0, rx_proc = 0;
+ 	struct net_device *dev = pp->dev;
++	struct bpf_prog *xdp_prog;
++	struct xdp_buff xdp_buf;
+ 	int rx_todo, refill;
++	u32 xdp_ret = 0;
+ 
+ 	/* Get number of received packets */
+ 	rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
+ 
++	rcu_read_lock();
++	xdp_prog = READ_ONCE(pp->xdp_prog);
++	xdp_buf.rxq = &rxq->xdp_rxq;
++
+ 	/* Fairness NAPI loop */
+ 	while (rx_proc < budget && rx_proc < rx_todo) {
+ 		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
+@@ -2079,7 +2156,8 @@ static int mvneta_rx_swbm(struct napi_st
+ 				continue;
+ 			}
+ 
+-			err = mvneta_swbm_rx_frame(pp, rx_desc, rxq, page);
++			err = mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf,
++						   xdp_prog, page, &xdp_ret);
+ 			if (err)
+ 				continue;
+ 		} else {
+@@ -2114,6 +2192,10 @@ static int mvneta_rx_swbm(struct napi_st
+ 		/* clean uncomplete skb pointer in queue */
+ 		rxq->skb = NULL;
+ 	}
++	rcu_read_unlock();
++
++	if (xdp_ret & MVNETA_XDP_REDIR)
++		xdp_do_flush_map();
+ 
+ 	if (rcvd_pkts)
+ 		mvneta_update_stats(pp, rcvd_pkts, rcvd_bytes, false);
+@@ -2855,13 +2937,14 @@ static int mvneta_poll(struct napi_struc
+ static int mvneta_create_page_pool(struct mvneta_port *pp,
+ 				   struct mvneta_rx_queue *rxq, int size)
+ {
++	struct bpf_prog *xdp_prog = READ_ONCE(pp->xdp_prog);
+ 	struct page_pool_params pp_params = {
+ 		.order = 0,
+ 		.flags = PP_FLAG_DMA_MAP,
+ 		.pool_size = size,
+ 		.nid = cpu_to_node(0),
+ 		.dev = pp->dev->dev.parent,
+-		.dma_dir = DMA_FROM_DEVICE,
++		.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+ 	};
+ 	int err;
+ 
+@@ -3328,6 +3411,11 @@ static int mvneta_change_mtu(struct net_
+ 		mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
+ 	}
+ 
++	if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
++		netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
++		return -EINVAL;
++	}
++
+ 	dev->mtu = mtu;
+ 
+ 	if (!netif_running(dev)) {
+@@ -3997,6 +4085,47 @@ static int mvneta_ioctl(struct net_devic
+ 	return phylink_mii_ioctl(pp->phylink, ifr, cmd);
+ }
+ 
++static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
++			    struct netlink_ext_ack *extack)
++{
++	bool need_update, running = netif_running(dev);
++	struct mvneta_port *pp = netdev_priv(dev);
++	struct bpf_prog *old_prog;
++
++	if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
++		NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP");
++		return -EOPNOTSUPP;
++	}
++
++	need_update = !!pp->xdp_prog != !!prog;
++	if (running && need_update)
++		mvneta_stop(dev);
++
++	old_prog = xchg(&pp->xdp_prog, prog);
++	if (old_prog)
++		bpf_prog_put(old_prog);
++
++	if (running && need_update)
++		return mvneta_open(dev);
++
++	return 0;
++}
++
++static int mvneta_xdp(struct net_device *dev, struct netdev_bpf *xdp)
++{
++	struct mvneta_port *pp = netdev_priv(dev);
++
++	switch (xdp->command) {
++	case XDP_SETUP_PROG:
++		return mvneta_xdp_setup(dev, xdp->prog, xdp->extack);
++	case XDP_QUERY_PROG:
++		xdp->prog_id = pp->xdp_prog ? pp->xdp_prog->aux->id : 0;
++		return 0;
++	default:
++		return -EINVAL;
++	}
++}
++
+ /* Ethtool methods */
+ 
+ /* Set link ksettings (phy address, speed) for ethtools */
+@@ -4393,6 +4522,7 @@ static const struct net_device_ops mvnet
+ 	.ndo_fix_features    = mvneta_fix_features,
+ 	.ndo_get_stats64     = mvneta_get_stats64,
+ 	.ndo_do_ioctl        = mvneta_ioctl,
++	.ndo_bpf             = mvneta_xdp,
+ };
+ 
+ static const struct ethtool_ops mvneta_eth_tool_ops = {
+@@ -4683,7 +4813,7 @@ static int mvneta_probe(struct platform_
+ 	SET_NETDEV_DEV(dev, &pdev->dev);
+ 
+ 	pp->id = global_port_id++;
+-	pp->rx_offset_correction = NET_SKB_PAD;
++	pp->rx_offset_correction = MVNETA_SKB_HEADROOM;
+ 
+ 	/* Obtain access to BM resources if enabled and already initialized */
+ 	bm_node = of_parse_phandle(dn, "buffer-manager", 0);
diff --git a/target/linux/mvebu/patches-5.4/014-net-mvneta-move-header-prefetch-in-mvneta_swbm_rx_fr.patch b/target/linux/mvebu/patches-5.4/014-net-mvneta-move-header-prefetch-in-mvneta_swbm_rx_fr.patch
new file mode 100644
index 0000000000..f9d2702dbb
--- /dev/null
+++ b/target/linux/mvebu/patches-5.4/014-net-mvneta-move-header-prefetch-in-mvneta_swbm_rx_fr.patch
@@ -0,0 +1,43 @@
+From 8abaeebfa7ef9a5600f4f596d2e94aa253f47553 Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Sat, 19 Oct 2019 10:13:25 +0200
+Subject: [PATCH 5/7] net: mvneta: move header prefetch in mvneta_swbm_rx_frame
+
+Move data buffer prefetch in mvneta_swbm_rx_frame after
+dma_sync_single_range_for_cpu
+
+Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -2036,6 +2036,9 @@ mvneta_swbm_rx_frame(struct mvneta_port
+ 				rx_desc->buf_phys_addr,
+ 				len, dma_dir);
+ 
++	/* Prefetch header */
++	prefetch(data);
++
+ 	xdp->data_hard_start = data;
+ 	xdp->data = data + MVNETA_SKB_HEADROOM + MVNETA_MH_SIZE;
+ 	xdp->data_end = xdp->data + data_len;
+@@ -2133,14 +2136,10 @@ static int mvneta_rx_swbm(struct napi_st
+ 	while (rx_proc < budget && rx_proc < rx_todo) {
+ 		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
+ 		u32 rx_status, index;
+-		unsigned char *data;
+ 		struct page *page;
+ 
+ 		index = rx_desc - rxq->descs;
+ 		page = (struct page *)rxq->buf_virt_addr[index];
+-		data = page_address(page);
+-		/* Prefetch header */
+-		prefetch(data);
+ 
+ 		rx_status = rx_desc->status;
+ 		rx_proc++;
diff --git a/target/linux/mvebu/patches-5.4/015-net-mvneta-make-tx-buffer-array-agnostic.patch b/target/linux/mvebu/patches-5.4/015-net-mvneta-make-tx-buffer-array-agnostic.patch
new file mode 100644
index 0000000000..d44a3f1a49
--- /dev/null
+++ b/target/linux/mvebu/patches-5.4/015-net-mvneta-make-tx-buffer-array-agnostic.patch
@@ -0,0 +1,210 @@
+From f7f705a63cd4bd1e3463db7662011717c5149e8a Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Sat, 19 Oct 2019 10:13:26 +0200
+Subject: [PATCH 6/7] net: mvneta: make tx buffer array agnostic
+
+Allow tx buffer array to contain both skb and xdp buffers in order to
+enable xdp frame recycling adding XDP_TX verdict support
+
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 66 +++++++++++++++++----------
+ 1 file changed, 43 insertions(+), 23 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -563,6 +563,20 @@ struct mvneta_rx_desc {
+ };
+ #endif
+ 
++enum mvneta_tx_buf_type {
++	MVNETA_TYPE_SKB,
++	MVNETA_TYPE_XDP_TX,
++	MVNETA_TYPE_XDP_NDO,
++};
++
++struct mvneta_tx_buf {
++	enum mvneta_tx_buf_type type;
++	union {
++		struct xdp_frame *xdpf;
++		struct sk_buff *skb;
++	};
++};
++
+ struct mvneta_tx_queue {
+ 	/* Number of this TX queue, in the range 0-7 */
+ 	u8 id;
+@@ -578,8 +592,8 @@ struct mvneta_tx_queue {
+ 	int tx_stop_threshold;
+ 	int tx_wake_threshold;
+ 
+-	/* Array of transmitted skb */
+-	struct sk_buff **tx_skb;
++	/* Array of transmitted buffers */
++	struct mvneta_tx_buf *buf;
+ 
+ 	/* Index of last TX DMA descriptor that was inserted */
+ 	int txq_put_index;
+@@ -1791,14 +1805,9 @@ static void mvneta_txq_bufs_free(struct
+ 	int i;
+ 
+ 	for (i = 0; i < num; i++) {
++		struct mvneta_tx_buf *buf = &txq->buf[txq->txq_get_index];
+ 		struct mvneta_tx_desc *tx_desc = txq->descs +
+ 			txq->txq_get_index;
+-		struct sk_buff *skb = txq->tx_skb[txq->txq_get_index];
+-
+-		if (skb) {
+-			bytes_compl += skb->len;
+-			pkts_compl++;
+-		}
+ 
+ 		mvneta_txq_inc_get(txq);
+ 
+@@ -1806,9 +1815,12 @@ static void mvneta_txq_bufs_free(struct
+ 			dma_unmap_single(pp->dev->dev.parent,
+ 					 tx_desc->buf_phys_addr,
+ 					 tx_desc->data_size, DMA_TO_DEVICE);
+-		if (!skb)
++		if (!buf->skb)
+ 			continue;
+-		dev_kfree_skb_any(skb);
++
++		bytes_compl += buf->skb->len;
++		pkts_compl++;
++		dev_kfree_skb_any(buf->skb);
+ 	}
+ 
+ 	netdev_tx_completed_queue(nq, pkts_compl, bytes_compl);
+@@ -2333,16 +2345,19 @@ static inline void
+ mvneta_tso_put_hdr(struct sk_buff *skb,
+ 		   struct mvneta_port *pp, struct mvneta_tx_queue *txq)
+ {
+-	struct mvneta_tx_desc *tx_desc;
+ 	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
++	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
++	struct mvneta_tx_desc *tx_desc;
+ 
+-	txq->tx_skb[txq->txq_put_index] = NULL;
+ 	tx_desc = mvneta_txq_next_desc_get(txq);
+ 	tx_desc->data_size = hdr_len;
+ 	tx_desc->command = mvneta_skb_tx_csum(pp, skb);
+ 	tx_desc->command |= MVNETA_TXD_F_DESC;
+ 	tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
+ 				 txq->txq_put_index * TSO_HEADER_SIZE;
++	buf->type = MVNETA_TYPE_SKB;
++	buf->skb = NULL;
++
+ 	mvneta_txq_inc_put(txq);
+ }
+ 
+@@ -2351,6 +2366,7 @@ mvneta_tso_put_data(struct net_device *d
+ 		    struct sk_buff *skb, char *data, int size,
+ 		    bool last_tcp, bool is_last)
+ {
++	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+ 	struct mvneta_tx_desc *tx_desc;
+ 
+ 	tx_desc = mvneta_txq_next_desc_get(txq);
+@@ -2364,7 +2380,8 @@ mvneta_tso_put_data(struct net_device *d
+ 	}
+ 
+ 	tx_desc->command = 0;
+-	txq->tx_skb[txq->txq_put_index] = NULL;
++	buf->type = MVNETA_TYPE_SKB;
++	buf->skb = NULL;
+ 
+ 	if (last_tcp) {
+ 		/* last descriptor in the TCP packet */
+@@ -2372,7 +2389,7 @@ mvneta_tso_put_data(struct net_device *d
+ 
+ 		/* last descriptor in SKB */
+ 		if (is_last)
+-			txq->tx_skb[txq->txq_put_index] = skb;
++			buf->skb = skb;
+ 	}
+ 	mvneta_txq_inc_put(txq);
+ 	return 0;
+@@ -2457,6 +2474,7 @@ static int mvneta_tx_frag_process(struct
+ 	int i, nr_frags = skb_shinfo(skb)->nr_frags;
+ 
+ 	for (i = 0; i < nr_frags; i++) {
++		struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+ 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ 		void *addr = skb_frag_address(frag);
+ 
+@@ -2476,12 +2494,13 @@ static int mvneta_tx_frag_process(struct
+ 		if (i == nr_frags - 1) {
+ 			/* Last descriptor */
+ 			tx_desc->command = MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
+-			txq->tx_skb[txq->txq_put_index] = skb;
++			buf->skb = skb;
+ 		} else {
+ 			/* Descriptor in the middle: Not First, Not Last */
+ 			tx_desc->command = 0;
+-			txq->tx_skb[txq->txq_put_index] = NULL;
++			buf->skb = NULL;
+ 		}
++		buf->type = MVNETA_TYPE_SKB;
+ 		mvneta_txq_inc_put(txq);
+ 	}
+ 
+@@ -2509,6 +2528,7 @@ static netdev_tx_t mvneta_tx(struct sk_b
+ 	struct mvneta_port *pp = netdev_priv(dev);
+ 	u16 txq_id = skb_get_queue_mapping(skb);
+ 	struct mvneta_tx_queue *txq = &pp->txqs[txq_id];
++	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+ 	struct mvneta_tx_desc *tx_desc;
+ 	int len = skb->len;
+ 	int frags = 0;
+@@ -2541,16 +2561,17 @@ static netdev_tx_t mvneta_tx(struct sk_b
+ 		goto out;
+ 	}
+ 
++	buf->type = MVNETA_TYPE_SKB;
+ 	if (frags == 1) {
+ 		/* First and Last descriptor */
+ 		tx_cmd |= MVNETA_TXD_FLZ_DESC;
+ 		tx_desc->command = tx_cmd;
+-		txq->tx_skb[txq->txq_put_index] = skb;
++		buf->skb = skb;
+ 		mvneta_txq_inc_put(txq);
+ 	} else {
+ 		/* First but not Last */
+ 		tx_cmd |= MVNETA_TXD_F_DESC;
+-		txq->tx_skb[txq->txq_put_index] = NULL;
++		buf->skb = NULL;
+ 		mvneta_txq_inc_put(txq);
+ 		tx_desc->command = tx_cmd;
+ 		/* Continue with other skb fragments */
+@@ -3136,9 +3157,8 @@ static int mvneta_txq_sw_init(struct mvn
+ 
+ 	txq->last_desc = txq->size - 1;
+ 
+-	txq->tx_skb = kmalloc_array(txq->size, sizeof(*txq->tx_skb),
+-				    GFP_KERNEL);
+-	if (!txq->tx_skb) {
++	txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL);
++	if (!txq->buf) {
+ 		dma_free_coherent(pp->dev->dev.parent,
+ 				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
+ 				  txq->descs, txq->descs_phys);
+@@ -3150,7 +3170,7 @@ static int mvneta_txq_sw_init(struct mvn
+ 					   txq->size * TSO_HEADER_SIZE,
+ 					   &txq->tso_hdrs_phys, GFP_KERNEL);
+ 	if (!txq->tso_hdrs) {
+-		kfree(txq->tx_skb);
++		kfree(txq->buf);
+ 		dma_free_coherent(pp->dev->dev.parent,
+ 				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
+ 				  txq->descs, txq->descs_phys);
+@@ -3203,7 +3223,7 @@ static void mvneta_txq_sw_deinit(struct
+ {
+ 	struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
+ 
+-	kfree(txq->tx_skb);
++	kfree(txq->buf);
+ 
+ 	if (txq->tso_hdrs)
+ 		dma_free_coherent(pp->dev->dev.parent,
diff --git a/target/linux/mvebu/patches-5.4/016-net-mvneta-add-XDP_TX-support.patch b/target/linux/mvebu/patches-5.4/016-net-mvneta-add-XDP_TX-support.patch
new file mode 100644
index 0000000000..9edf32ff6d
--- /dev/null
+++ b/target/linux/mvebu/patches-5.4/016-net-mvneta-add-XDP_TX-support.patch
@@ -0,0 +1,175 @@
+From 2ca7bbf394e6d98741f19566b82b3d5a56e9cc2c Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Sat, 19 Oct 2019 10:13:27 +0200
+Subject: [PATCH 7/7] net: mvneta: add XDP_TX support
+
+Implement XDP_TX verdict and ndo_xdp_xmit net_device_ops function
+pointer
+
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/ethernet/marvell/mvneta.c | 128 ++++++++++++++++++++++++--
+ 1 file changed, 121 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1811,16 +1811,19 @@ static void mvneta_txq_bufs_free(struct
+ 
+ 		mvneta_txq_inc_get(txq);
+ 
+-		if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr))
++		if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr) &&
++		    buf->type != MVNETA_TYPE_XDP_TX)
+ 			dma_unmap_single(pp->dev->dev.parent,
+ 					 tx_desc->buf_phys_addr,
+ 					 tx_desc->data_size, DMA_TO_DEVICE);
+-		if (!buf->skb)
+-			continue;
+-
+-		bytes_compl += buf->skb->len;
+-		pkts_compl++;
+-		dev_kfree_skb_any(buf->skb);
++		if (buf->type == MVNETA_TYPE_SKB && buf->skb) {
++			bytes_compl += buf->skb->len;
++			pkts_compl++;
++			dev_kfree_skb_any(buf->skb);
++		} else if (buf->type == MVNETA_TYPE_XDP_TX ||
++			   buf->type == MVNETA_TYPE_XDP_NDO) {
++			xdp_return_frame(buf->xdpf);
++		}
+ 	}
+ 
+ 	netdev_tx_completed_queue(nq, pkts_compl, bytes_compl);
+@@ -1985,6 +1988,111 @@ int mvneta_rx_refill_queue(struct mvneta
+ }
+ 
+ static int
++mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
++			struct xdp_frame *xdpf, bool dma_map)
++{
++	struct mvneta_tx_desc *tx_desc;
++	struct mvneta_tx_buf *buf;
++	dma_addr_t dma_addr;
++
++	if (txq->count >= txq->tx_stop_threshold)
++		return MVNETA_XDP_DROPPED;
++
++	tx_desc = mvneta_txq_next_desc_get(txq);
++
++	buf = &txq->buf[txq->txq_put_index];
++	if (dma_map) {
++		/* ndo_xdp_xmit */
++		dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
++					  xdpf->len, DMA_TO_DEVICE);
++		if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
++			mvneta_txq_desc_put(txq);
++			return MVNETA_XDP_DROPPED;
++		}
++		buf->type = MVNETA_TYPE_XDP_NDO;
++	} else {
++		struct page *page = virt_to_page(xdpf->data);
++
++		dma_addr = page_pool_get_dma_addr(page) +
++			   sizeof(*xdpf) + xdpf->headroom;
++		dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
++					   xdpf->len, DMA_BIDIRECTIONAL);
++		buf->type = MVNETA_TYPE_XDP_TX;
++	}
++	buf->xdpf = xdpf;
++
++	tx_desc->command = MVNETA_TXD_FLZ_DESC;
++	tx_desc->buf_phys_addr = dma_addr;
++	tx_desc->data_size = xdpf->len;
++
++	mvneta_update_stats(pp, 1, xdpf->len, true);
++	mvneta_txq_inc_put(txq);
++	txq->pending++;
++	txq->count++;
++
++	return MVNETA_XDP_TX;
++}
++
++static int
++mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
++{
++	struct mvneta_tx_queue *txq;
++	struct netdev_queue *nq;
++	struct xdp_frame *xdpf;
++	int cpu;
++	u32 ret;
++
++	xdpf = convert_to_xdp_frame(xdp);
++	if (unlikely(!xdpf))
++		return MVNETA_XDP_DROPPED;
++
++	cpu = smp_processor_id();
++	txq = &pp->txqs[cpu % txq_number];
++	nq = netdev_get_tx_queue(pp->dev, txq->id);
++
++	__netif_tx_lock(nq, cpu);
++	ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
++	if (ret == MVNETA_XDP_TX)
++		mvneta_txq_pend_desc_add(pp, txq, 0);
++	__netif_tx_unlock(nq);
++
++	return ret;
++}
++
++static int
++mvneta_xdp_xmit(struct net_device *dev, int num_frame,
++		struct xdp_frame **frames, u32 flags)
++{
++	struct mvneta_port *pp = netdev_priv(dev);
++	int cpu = smp_processor_id();
++	struct mvneta_tx_queue *txq;
++	struct netdev_queue *nq;
++	int i, drops = 0;
++	u32 ret;
++
++	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
++		return -EINVAL;
++
++	txq = &pp->txqs[cpu % txq_number];
++	nq = netdev_get_tx_queue(pp->dev, txq->id);
++
++	__netif_tx_lock(nq, cpu);
++	for (i = 0; i < num_frame; i++) {
++		ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
++		if (ret != MVNETA_XDP_TX) {
++			xdp_return_frame_rx_napi(frames[i]);
++			drops++;
++		}
++	}
++
++	if (unlikely(flags & XDP_XMIT_FLUSH))
++		mvneta_txq_pend_desc_add(pp, txq, 0);
++	__netif_tx_unlock(nq);
++
++	return num_frame - drops;
++}
++
++static int
+ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
+ 	       struct bpf_prog *prog, struct xdp_buff *xdp)
+ {
+@@ -2006,6 +2114,11 @@ mvneta_run_xdp(struct mvneta_port *pp, s
+ 		}
+ 		break;
+ 	}
++	case XDP_TX:
++		ret = mvneta_xdp_xmit_back(pp, xdp);
++		if (ret != MVNETA_XDP_TX)
++			xdp_return_buff(xdp);
++		break;
+ 	default:
+ 		bpf_warn_invalid_xdp_action(act);
+ 		/* fall through */
+@@ -4542,6 +4655,7 @@ static const struct net_device_ops mvnet
+ 	.ndo_get_stats64     = mvneta_get_stats64,
+ 	.ndo_do_ioctl        = mvneta_ioctl,
+ 	.ndo_bpf             = mvneta_xdp,
++	.ndo_xdp_xmit        = mvneta_xdp_xmit,
+ };
+ 
+ static const struct ethtool_ops mvneta_eth_tool_ops = {
diff --git a/target/linux/mvebu/patches-5.4/300-mvneta-tx-queue-workaround.patch b/target/linux/mvebu/patches-5.4/300-mvneta-tx-queue-workaround.patch
index c62839c82d..1056d801be 100644
--- a/target/linux/mvebu/patches-5.4/300-mvneta-tx-queue-workaround.patch
+++ b/target/linux/mvebu/patches-5.4/300-mvneta-tx-queue-workaround.patch
@@ -9,7 +9,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
 ---
 --- a/drivers/net/ethernet/marvell/mvneta.c
 +++ b/drivers/net/ethernet/marvell/mvneta.c
-@@ -4332,6 +4332,14 @@ static int mvneta_ethtool_set_eee(struct
+@@ -4644,6 +4644,14 @@ static int mvneta_ethtool_set_eee(struct
  	return phylink_ethtool_set_eee(pp->phylink, eee);
  }
  
@@ -24,11 +24,11 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
  static const struct net_device_ops mvneta_netdev_ops = {
  	.ndo_open            = mvneta_open,
  	.ndo_stop            = mvneta_stop,
-@@ -4342,6 +4350,7 @@ static const struct net_device_ops mvnet
+@@ -4654,6 +4662,7 @@ static const struct net_device_ops mvnet
  	.ndo_fix_features    = mvneta_fix_features,
  	.ndo_get_stats64     = mvneta_get_stats64,
  	.ndo_do_ioctl        = mvneta_ioctl,
 +	.ndo_select_queue    = mvneta_select_queue,
+ 	.ndo_bpf             = mvneta_xdp,
+ 	.ndo_xdp_xmit        = mvneta_xdp_xmit,
  };
- 
- static const struct ethtool_ops mvneta_eth_tool_ops = {
-- 
2.30.2