Changing API xdp_return_frame() to take struct xdp_frame as argument,
seems like a natural choice. But there are some subtle performance
details here that needs extra care, which is a deliberate choice.
When de-referencing xdp_frame on a remote CPU during DMA-TX
completion, result in the cache-line is change to "Shared"
state. Later when the page is reused for RX, then this xdp_frame
cache-line is written, which change the state to "Modified".
This situation already happens (naturally) for, virtio_net, tun and
cpumap as the xdp_frame pointer is the queued object. In tun and
cpumap, the ptr_ring is used for efficiently transferring cache-lines
(with pointers) between CPUs. Thus, the only option is to
de-referencing xdp_frame.
It is only the ixgbe driver that had an optimization, in which it can
avoid doing the de-reference of xdp_frame. The driver already have
TX-ring queue, which (in case of remote DMA-TX completion) have to be
transferred between CPUs anyhow. In this data area, we stored a
struct xdp_mem_info and a data pointer, which allowed us to avoid
de-referencing xdp_frame.
To compensate for this, a prefetchw is used for telling the cache
coherency protocol about our access pattern. My benchmarks show that
this prefetchw is enough to compensate the ixgbe driver.
V7: Adjust for commit
d9314c474d4f ("i40e: add support for XDP_REDIRECT")
V8: Adjust for commit
bd658dda4237 ("net/mlx5e: Separate dma base address
and offset in dma_sync call")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
kfree(tx_buffer->raw_buf);
else if (ring_is_xdp(ring))
- xdp_return_frame(tx_buffer->xdpf->data,
- &tx_buffer->xdpf->mem);
+ xdp_return_frame(tx_buffer->xdpf);
else
dev_kfree_skb_any(tx_buffer->skb);
if (dma_unmap_len(tx_buffer, len))
/* free the skb/XDP data */
if (ring_is_xdp(tx_ring))
- xdp_return_frame(tx_buf->xdpf->data, &tx_buf->xdpf->mem);
+ xdp_return_frame(tx_buf->xdpf);
else
napi_consume_skb(tx_buf->skb, napi_budget);
unsigned long time_stamp;
union {
struct sk_buff *skb;
- /* XDP uses address ptr on irq_clean */
- void *data;
+ struct xdp_frame *xdpf;
};
unsigned int bytecount;
unsigned short gso_segs;
DEFINE_DMA_UNMAP_ADDR(dma);
DEFINE_DMA_UNMAP_LEN(len);
u32 tx_flags;
- struct xdp_mem_info xdp_mem;
};
struct ixgbe_rx_buffer {
/* free the skb */
if (ring_is_xdp(tx_ring))
- xdp_return_frame(tx_buffer->data, &tx_buffer->xdp_mem);
+ xdp_return_frame(tx_buffer->xdpf);
else
napi_consume_skb(tx_buffer->skb, napi_budget);
xdp.data_hard_start = xdp.data -
ixgbe_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
+ prefetchw(xdp.data_hard_start); /* xdp_frame write */
skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
}
/* Free all the Tx ring sk_buffs */
if (ring_is_xdp(tx_ring))
- xdp_return_frame(tx_buffer->data, &tx_buffer->xdp_mem);
+ xdp_return_frame(tx_buffer->xdpf);
else
dev_kfree_skb_any(tx_buffer->skb);
struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
struct ixgbe_tx_buffer *tx_buffer;
union ixgbe_adv_tx_desc *tx_desc;
+ struct xdp_frame *xdpf;
u32 len, cmd_type;
dma_addr_t dma;
u16 i;
- len = xdp->data_end - xdp->data;
+ xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpf))
+ return -EOVERFLOW;
+
+ len = xdpf->len;
if (unlikely(!ixgbe_desc_unused(ring)))
return IXGBE_XDP_CONSUMED;
- dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
+ dma = dma_map_single(ring->dev, xdpf->data, len, DMA_TO_DEVICE);
if (dma_mapping_error(ring->dev, dma))
return IXGBE_XDP_CONSUMED;
dma_unmap_len_set(tx_buffer, len, len);
dma_unmap_addr_set(tx_buffer, dma, dma);
- tx_buffer->data = xdp->data;
- tx_buffer->xdp_mem = xdp->rxq->mem;
+ tx_buffer->xdpf = xdpf;
tx_desc->read.buffer_addr = cpu_to_le64(dma);
dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
frag_size, DMA_FROM_DEVICE);
+ prefetchw(va); /* xdp_frame data area */
prefetch(data);
wi->offset += frag_size;
if (tun_is_xdp_frame(ptr)) {
struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- xdp_return_frame(xdpf->data, &xdpf->mem);
+ xdp_return_frame(xdpf);
} else {
__skb_array_destroy_skb(ptr);
}
struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
ret = tun_put_user_xdp(tun, tfile, xdpf, to);
- xdp_return_frame(xdpf->data, &xdpf->mem);
+ xdp_return_frame(xdpf);
} else {
struct sk_buff *skb = ptr;
/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
- xdp_return_frame(xdpf_sent->data, &xdpf_sent->mem);
+ xdp_return_frame(xdpf_sent);
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return xdp_frame;
}
-void xdp_return_frame(void *data, struct xdp_mem_info *mem);
+void xdp_return_frame(struct xdp_frame *xdpf);
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index);
while ((xdpf = ptr_ring_consume(ring)))
if (WARN_ON_ONCE(xdpf))
- xdp_return_frame(xdpf->data, &xdpf->mem);
+ xdp_return_frame(xdpf);
}
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
skb = cpu_map_build_skb(rcpu, xdpf);
if (!skb) {
- xdp_return_frame(xdpf->data, &xdpf->mem);
+ xdp_return_frame(xdpf);
continue;
}
err = __ptr_ring_produce(q, xdpf);
if (err) {
drops++;
- xdp_return_frame(xdpf->data, &xdpf->mem);
+ xdp_return_frame(xdpf);
}
processed++;
}
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
-void xdp_return_frame(void *data, struct xdp_mem_info *mem)
+void xdp_return_frame(struct xdp_frame *xdpf)
{
+ struct xdp_mem_info *mem = &xdpf->mem;
struct xdp_mem_allocator *xa;
+ void *data = xdpf->data;
struct page *page;
switch (mem->type) {