#include <linux/workqueue.h>
#include <linux/if_xdp.h>
#include <linux/mutex.h>
+#include <linux/spinlock.h>
#include <linux/mm.h>
#include <net/sock.h>
struct net_device *dev;
u16 queue_id;
bool zc;
+ spinlock_t xsk_list_lock;
+ struct list_head xsk_list;
};
struct xdp_sock {
struct list_head flush_node;
u16 queue_id;
struct xsk_queue *tx ____cacheline_aligned_in_smp;
+ struct list_head list;
+ bool zc;
/* Protects multiple processes in the control path */
struct mutex mutex;
u64 rx_dropped;
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
void xsk_flush(struct xdp_sock *xs);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
+/* Used from netdev driver */
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
void xsk_umem_discard_addr(struct xdp_umem *umem);
+void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
+bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
+void xsk_umem_consume_tx_done(struct xdp_umem *umem);
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
#define XDP_UMEM_MIN_CHUNK_SIZE 2048
+void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&umem->xsk_list_lock, flags);
+ list_add_rcu(&xs->list, &umem->xsk_list);
+ spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
+}
+
+void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
+{
+ unsigned long flags;
+
+ if (xs->dev) {
+ spin_lock_irqsave(&umem->xsk_list_lock, flags);
+ list_del_rcu(&xs->list);
+ spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
+
+ if (umem->zc)
+ synchronize_net();
+ }
+}
+
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
u32 queue_id, u16 flags)
{
dev_hold(dev);
- if (dev->netdev_ops->ndo_bpf) {
+ if (dev->netdev_ops->ndo_bpf && dev->netdev_ops->ndo_xsk_async_xmit) {
bpf.command = XDP_QUERY_XSK_UMEM;
rtnl_lock();
return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
}
-void xdp_umem_clear_dev(struct xdp_umem *umem)
+static void xdp_umem_clear_dev(struct xdp_umem *umem)
{
struct netdev_bpf bpf;
int err;
umem->npgs = size / PAGE_SIZE;
umem->pgs = NULL;
umem->user = NULL;
+ INIT_LIST_HEAD(&umem->xsk_list);
+ spin_lock_init(&umem->xsk_list_lock);
refcount_set(&umem->users, 1);
return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
}
+static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
+{
+ return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
+}
+
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
u32 queue_id, u16 flags);
-void xdp_umem_clear_dev(struct xdp_umem *umem);
bool xdp_umem_validate_queues(struct xdp_umem *umem);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
+void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs);
+void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs);
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
#endif /* XDP_UMEM_H_ */
#include <linux/uaccess.h>
#include <linux/net.h>
#include <linux/netdevice.h>
+#include <linux/rculist.h>
#include <net/xdp_sock.h>
#include <net/xdp.h>
return err;
}
+void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
+{
+ xskq_produce_flush_addr_n(umem->cq, nb_entries);
+}
+EXPORT_SYMBOL(xsk_umem_complete_tx);
+
+void xsk_umem_consume_tx_done(struct xdp_umem *umem)
+{
+ struct xdp_sock *xs;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ xs->sk.sk_write_space(&xs->sk);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(xsk_umem_consume_tx_done);
+
+bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len)
+{
+ struct xdp_desc desc;
+ struct xdp_sock *xs;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ if (!xskq_peek_desc(xs->tx, &desc))
+ continue;
+
+ if (xskq_produce_addr_lazy(umem->cq, desc.addr))
+ goto out;
+
+ *dma = xdp_umem_get_dma(umem, desc.addr);
+ *len = desc.len;
+
+ xskq_discard_desc(xs->tx);
+ rcu_read_unlock();
+ return true;
+ }
+
+out:
+ rcu_read_unlock();
+ return false;
+}
+EXPORT_SYMBOL(xsk_umem_consume_tx);
+
+static int xsk_zc_xmit(struct sock *sk)
+{
+ struct xdp_sock *xs = xdp_sk(sk);
+ struct net_device *dev = xs->dev;
+
+ return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
+}
+
static void xsk_destruct_skb(struct sk_buff *skb)
{
u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
size_t total_len)
{
- bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
u32 max_batch = TX_BATCH_SIZE;
struct xdp_sock *xs = xdp_sk(sk);
bool sent_frame = false;
if (unlikely(!xs->tx))
return -ENOBUFS;
- if (need_wait)
- return -EOPNOTSUPP;
mutex_lock(&xs->mutex);
goto out;
}
- skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
+ skb = sock_alloc_send_skb(sk, len, 1, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
goto out;
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
+ bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
return -ENXIO;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
+ if (need_wait)
+ return -EOPNOTSUPP;
- return xsk_generic_xmit(sk, m, total_len);
+ return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
}
static unsigned int xsk_poll(struct file *file, struct socket *sock,
}
xs->dev = dev;
- xs->queue_id = sxdp->sxdp_queue_id;
-
+ xs->zc = xs->umem->zc;
+ xs->queue_id = qid;
xskq_set_umem(xs->rx, &xs->umem->props);
xskq_set_umem(xs->tx, &xs->umem->props);
+ xdp_add_sk_umem(xs->umem, xs);
out_unlock:
if (err)
xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
+ xdp_del_sk_umem(xs->umem, xs);
xdp_put_umem(xs->umem);
sk_refcnt_debug_dec(sk);
#include <net/xdp_sock.h>
#define RX_BATCH_SIZE 16
+#define LAZY_UPDATE_THRESHOLD 128
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
return (entries > dcnt) ? dcnt : entries;
}
+static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
+{
+ return q->nentries - (producer - q->cons_tail);
+}
+
static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
{
- u32 free_entries = q->nentries - (producer - q->cons_tail);
+ u32 free_entries = xskq_nb_free_lazy(q, producer);
if (free_entries >= dcnt)
return free_entries;
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+ if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0)
+ return -ENOSPC;
+
ring->desc[q->prod_tail++ & q->ring_mask] = addr;
/* Order producer and data */
return 0;
}
+static inline int xskq_produce_addr_lazy(struct xsk_queue *q, u64 addr)
+{
+ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+
+ if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0)
+ return -ENOSPC;
+
+ ring->desc[q->prod_head++ & q->ring_mask] = addr;
+ return 0;
+}
+
+static inline void xskq_produce_flush_addr_n(struct xsk_queue *q,
+ u32 nb_entries)
+{
+ /* Order producer and data */
+ smp_wmb();
+
+ q->prod_tail += nb_entries;
+ WRITE_ONCE(q->ring->producer, q->prod_tail);
+}
+
static inline int xskq_reserve_addr(struct xsk_queue *q)
{
if (xskq_nb_free(q, q->prod_head, 1) == 0)