From: Daniel Borkmann Date: Wed, 19 Apr 2017 21:01:17 +0000 (+0200) Subject: bpf: add napi_id read access to __sk_buff X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=b1d9fc41aab11f9520b2e0d57ae872e2ec5d6f32;p=openwrt%2Fstaging%2Fblogic.git bpf: add napi_id read access to __sk_buff Add napi_id access to __sk_buff for socket filter program types, tc program types and other bpf_convert_ctx_access() users. Having access to skb->napi_id is useful for per RX queue listener siloing, f.e. in combination with SO_ATTACH_REUSEPORT_EBPF and when busy polling is used, meaning SO_REUSEPORT enabled listeners can then select the corresponding socket at SYN time already [1]. The skb is marked via skb_mark_napi_id() early in the receive path (e.g., napi_gro_receive()). Currently, sockets can only use SO_INCOMING_NAPI_ID from 6d4339028b35 ("net: Introduce SO_INCOMING_NAPI_ID") as a socket option to look up the NAPI ID associated with the queue for steering, which requires a prior sk_mark_napi_id() after the socket was looked up. Semantics for the __sk_buff napi_id access are similar, meaning if skb->napi_id is < MIN_NAPI_ID (e.g. outgoing packets using sender_cpu), then an invalid napi_id of 0 is returned to the program, otherwise a valid non-zero napi_id. [1] http://netdevconf.org/2.1/slides/apr6/dumazet-BUSY-POLLING-Netdev-2.1.pdf Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1e062bb54eec..e553529929f6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -603,6 +603,7 @@ struct __sk_buff { __u32 tc_classid; __u32 data; __u32 data_end; + __u32 napi_id; }; struct bpf_tunnel_key { diff --git a/net/core/filter.c b/net/core/filter.c index 085925834727..9a37860a80fc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -53,6 +53,7 @@ #include #include #include +#include /** * sk_filter_trim_cap - run a packet through a socket filter @@ -3201,6 +3202,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); else *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct __sk_buff, napi_id): +#if defined(CONFIG_NET_RX_BUSY_POLL) + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sk_buff, napi_id)); + *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); +#else + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #endif break; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1e062bb54eec..e553529929f6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -603,6 +603,7 @@ struct __sk_buff { __u32 tc_classid; __u32 data; __u32 data_end; + __u32 napi_id; }; struct bpf_tunnel_key { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 6178b65fee59..95a8d5f3ab80 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -772,6 +772,9 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, vlan_tci)), BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), }, .result = ACCEPT,