At the moment, all kernel bpf objects are listed under BPF_OBJ_FILES.
Listing them manually sometimes causing patch conflict when people are
adding new testcases simultaneously.
It is better to centre all the related source files under a subdir
"progs", then auto-generate the object file list.
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
test_netcnt test_tcpnotify_user test_sock_fields
-BPF_OBJ_FILES = \
- test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
- sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o \
- test_tcpnotify_kern.o sample_map_ret0.o test_tcpbpf_kern.o \
- sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o \
- test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o \
- test_tunnel_kern.o test_sockhash_kern.o test_lwt_seg6local.o \
- sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
- get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
- test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \
- xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o \
- test_pkt_access.o test_xdp.o test_adjust_tail.o test_l4lb.o \
- test_l4lb_noinline.o test_xdp_noinline.o test_tcp_estats.o \
- test_obj_id.o test_pkt_md_access.o test_tracepoint.o \
- test_stacktrace_map.o test_stacktrace_build_id.o \
- test_get_stack_rawtp.o test_sk_lookup_kern.o test_queue_map.o \
- test_stack_map.o test_sock_fields_kern.o
-
+BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES)
# Also test sub-register code-gen if LLVM + kernel both has eBPF v3 processor
$(CC) $(CFLAGS) -o $(ALU32_BUILD_DIR)/test_progs_32 $< \
trace_helpers.c $(OUTPUT)/libbpf.a $(LDLIBS)
-$(ALU32_BUILD_DIR)/%.o: %.c $(ALU32_BUILD_DIR) $(ALU32_BUILD_DIR)/test_progs_32
+$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR) \
+ $(ALU32_BUILD_DIR)/test_progs_32
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \
# Have one program compiled without "-target bpf" to test whether libbpf loads
# it successfully
-$(OUTPUT)/test_xdp.o: test_xdp.c
+$(OUTPUT)/test_xdp.o: progs/test_xdp.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
$(BTF_PAHOLE) -J $@
endif
-$(OUTPUT)/%.o: %.c
+$(OUTPUT)/%.o: progs/%.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-#include <limits.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <string.h>
-#include <linux/pkt_cls.h>
-#include <linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/if_packet.h>
-#include <sys/socket.h>
-#include <linux/if_tunnel.h>
-#include <linux/mpls.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-int _version SEC("version") = 1;
-#define PROG(F) SEC(#F) int bpf_func_##F
-
-/* These are the identifiers of the BPF programs that will be used in tail
- * calls. Name is limited to 16 characters, with the terminating character and
- * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
- */
-enum {
- IP,
- IPV6,
- IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
- IPV6FR, /* Fragmentation IPv6 Extension Header */
- MPLS,
- VLAN,
-};
-
-#define IP_MF 0x2000
-#define IP_OFFSET 0x1FFF
-#define IP6_MF 0x0001
-#define IP6_OFFSET 0xFFF8
-
-struct vlan_hdr {
- __be16 h_vlan_TCI;
- __be16 h_vlan_encapsulated_proto;
-};
-
-struct gre_hdr {
- __be16 flags;
- __be16 proto;
-};
-
-struct frag_hdr {
- __u8 nexthdr;
- __u8 reserved;
- __be16 frag_off;
- __be32 identification;
-};
-
-struct bpf_map_def SEC("maps") jmp_table = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 8
-};
-
-static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
- __u16 hdr_size,
- void *buffer)
-{
- void *data_end = (void *)(long)skb->data_end;
- void *data = (void *)(long)skb->data;
- __u16 thoff = skb->flow_keys->thoff;
- __u8 *hdr;
-
- /* Verifies this variable offset does not overflow */
- if (thoff > (USHRT_MAX - hdr_size))
- return NULL;
-
- hdr = data + thoff;
- if (hdr + hdr_size <= data_end)
- return hdr;
-
- if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
- return NULL;
-
- return buffer;
-}
-
-/* Dispatches on ETHERTYPE */
-static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
-{
- struct bpf_flow_keys *keys = skb->flow_keys;
-
- keys->n_proto = proto;
- switch (proto) {
- case bpf_htons(ETH_P_IP):
- bpf_tail_call(skb, &jmp_table, IP);
- break;
- case bpf_htons(ETH_P_IPV6):
- bpf_tail_call(skb, &jmp_table, IPV6);
- break;
- case bpf_htons(ETH_P_MPLS_MC):
- case bpf_htons(ETH_P_MPLS_UC):
- bpf_tail_call(skb, &jmp_table, MPLS);
- break;
- case bpf_htons(ETH_P_8021Q):
- case bpf_htons(ETH_P_8021AD):
- bpf_tail_call(skb, &jmp_table, VLAN);
- break;
- default:
- /* Protocol not supported */
- return BPF_DROP;
- }
-
- return BPF_DROP;
-}
-
-SEC("flow_dissector")
-int _dissect(struct __sk_buff *skb)
-{
- if (!skb->vlan_present)
- return parse_eth_proto(skb, skb->protocol);
- else
- return parse_eth_proto(skb, skb->vlan_proto);
-}
-
-/* Parses on IPPROTO_* */
-static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
-{
- struct bpf_flow_keys *keys = skb->flow_keys;
- void *data_end = (void *)(long)skb->data_end;
- struct icmphdr *icmp, _icmp;
- struct gre_hdr *gre, _gre;
- struct ethhdr *eth, _eth;
- struct tcphdr *tcp, _tcp;
- struct udphdr *udp, _udp;
-
- keys->ip_proto = proto;
- switch (proto) {
- case IPPROTO_ICMP:
- icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
- if (!icmp)
- return BPF_DROP;
- return BPF_OK;
- case IPPROTO_IPIP:
- keys->is_encap = true;
- return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
- case IPPROTO_IPV6:
- keys->is_encap = true;
- return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
- case IPPROTO_GRE:
- gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
- if (!gre)
- return BPF_DROP;
-
- if (bpf_htons(gre->flags & GRE_VERSION))
- /* Only inspect standard GRE packets with version 0 */
- return BPF_OK;
-
- keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
- if (GRE_IS_CSUM(gre->flags))
- keys->thoff += 4; /* Step over chksum and Padding */
- if (GRE_IS_KEY(gre->flags))
- keys->thoff += 4; /* Step over key */
- if (GRE_IS_SEQ(gre->flags))
- keys->thoff += 4; /* Step over sequence number */
-
- keys->is_encap = true;
-
- if (gre->proto == bpf_htons(ETH_P_TEB)) {
- eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
- &_eth);
- if (!eth)
- return BPF_DROP;
-
- keys->thoff += sizeof(*eth);
-
- return parse_eth_proto(skb, eth->h_proto);
- } else {
- return parse_eth_proto(skb, gre->proto);
- }
- case IPPROTO_TCP:
- tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
- if (!tcp)
- return BPF_DROP;
-
- if (tcp->doff < 5)
- return BPF_DROP;
-
- if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
- return BPF_DROP;
-
- keys->sport = tcp->source;
- keys->dport = tcp->dest;
- return BPF_OK;
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
- if (!udp)
- return BPF_DROP;
-
- keys->sport = udp->source;
- keys->dport = udp->dest;
- return BPF_OK;
- default:
- return BPF_DROP;
- }
-
- return BPF_DROP;
-}
-
-static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
-{
- struct bpf_flow_keys *keys = skb->flow_keys;
-
- keys->ip_proto = nexthdr;
- switch (nexthdr) {
- case IPPROTO_HOPOPTS:
- case IPPROTO_DSTOPTS:
- bpf_tail_call(skb, &jmp_table, IPV6OP);
- break;
- case IPPROTO_FRAGMENT:
- bpf_tail_call(skb, &jmp_table, IPV6FR);
- break;
- default:
- return parse_ip_proto(skb, nexthdr);
- }
-
- return BPF_DROP;
-}
-
-PROG(IP)(struct __sk_buff *skb)
-{
- void *data_end = (void *)(long)skb->data_end;
- struct bpf_flow_keys *keys = skb->flow_keys;
- void *data = (void *)(long)skb->data;
- struct iphdr *iph, _iph;
- bool done = false;
-
- iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
- if (!iph)
- return BPF_DROP;
-
- /* IP header cannot be smaller than 20 bytes */
- if (iph->ihl < 5)
- return BPF_DROP;
-
- keys->addr_proto = ETH_P_IP;
- keys->ipv4_src = iph->saddr;
- keys->ipv4_dst = iph->daddr;
-
- keys->thoff += iph->ihl << 2;
- if (data + keys->thoff > data_end)
- return BPF_DROP;
-
- if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
- keys->is_frag = true;
- if (iph->frag_off & bpf_htons(IP_OFFSET))
- /* From second fragment on, packets do not have headers
- * we can parse.
- */
- done = true;
- else
- keys->is_first_frag = true;
- }
-
- if (done)
- return BPF_OK;
-
- return parse_ip_proto(skb, iph->protocol);
-}
-
-PROG(IPV6)(struct __sk_buff *skb)
-{
- struct bpf_flow_keys *keys = skb->flow_keys;
- struct ipv6hdr *ip6h, _ip6h;
-
- ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
- if (!ip6h)
- return BPF_DROP;
-
- keys->addr_proto = ETH_P_IPV6;
- memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
-
- keys->thoff += sizeof(struct ipv6hdr);
-
- return parse_ipv6_proto(skb, ip6h->nexthdr);
-}
-
-PROG(IPV6OP)(struct __sk_buff *skb)
-{
- struct ipv6_opt_hdr *ip6h, _ip6h;
-
- ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
- if (!ip6h)
- return BPF_DROP;
-
- /* hlen is in 8-octets and does not include the first 8 bytes
- * of the header
- */
- skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3;
-
- return parse_ipv6_proto(skb, ip6h->nexthdr);
-}
-
-PROG(IPV6FR)(struct __sk_buff *skb)
-{
- struct bpf_flow_keys *keys = skb->flow_keys;
- struct frag_hdr *fragh, _fragh;
-
- fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
- if (!fragh)
- return BPF_DROP;
-
- keys->thoff += sizeof(*fragh);
- keys->is_frag = true;
- if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
- keys->is_first_frag = true;
-
- return parse_ipv6_proto(skb, fragh->nexthdr);
-}
-
-PROG(MPLS)(struct __sk_buff *skb)
-{
- struct mpls_label *mpls, _mpls;
-
- mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
- if (!mpls)
- return BPF_DROP;
-
- return BPF_OK;
-}
-
-PROG(VLAN)(struct __sk_buff *skb)
-{
- struct bpf_flow_keys *keys = skb->flow_keys;
- struct vlan_hdr *vlan, _vlan;
- __be16 proto;
-
- /* Peek back to see if single or double-tagging */
- if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto,
- sizeof(proto)))
- return BPF_DROP;
-
- /* Account for double-tagging */
- if (proto == bpf_htons(ETH_P_8021AD)) {
- vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
- if (!vlan)
- return BPF_DROP;
-
- if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
- return BPF_DROP;
-
- keys->thoff += sizeof(*vlan);
- }
-
- vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
- if (!vlan)
- return BPF_DROP;
-
- keys->thoff += sizeof(*vlan);
- /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
- if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
- vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
- return BPF_DROP;
-
- return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
-}
-
-char __license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <string.h>
-
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <sys/socket.h>
-
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-#define SRC_REWRITE_IP4 0x7f000004U
-#define DST_REWRITE_IP4 0x7f000001U
-#define DST_REWRITE_PORT4 4444
-
-int _version SEC("version") = 1;
-
-SEC("cgroup/connect4")
-int connect_v4_prog(struct bpf_sock_addr *ctx)
-{
- struct bpf_sock_tuple tuple = {};
- struct sockaddr_in sa;
- struct bpf_sock *sk;
-
- /* Verify that new destination is available. */
- memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr));
- memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport));
-
- tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
- tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
-
- if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
- return 0;
- else if (ctx->type == SOCK_STREAM)
- sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4),
- BPF_F_CURRENT_NETNS, 0);
- else
- sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4),
- BPF_F_CURRENT_NETNS, 0);
-
- if (!sk)
- return 0;
-
- if (sk->src_ip4 != tuple.ipv4.daddr ||
- sk->src_port != DST_REWRITE_PORT4) {
- bpf_sk_release(sk);
- return 0;
- }
-
- bpf_sk_release(sk);
-
- /* Rewrite destination. */
- ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
- ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
-
- /* Rewrite source. */
- memset(&sa, 0, sizeof(sa));
-
- sa.sin_family = AF_INET;
- sa.sin_port = bpf_htons(0);
- sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
-
- if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
- return 0;
-
- return 1;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <string.h>
-
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <sys/socket.h>
-
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-#define SRC_REWRITE_IP6_0 0
-#define SRC_REWRITE_IP6_1 0
-#define SRC_REWRITE_IP6_2 0
-#define SRC_REWRITE_IP6_3 6
-
-#define DST_REWRITE_IP6_0 0
-#define DST_REWRITE_IP6_1 0
-#define DST_REWRITE_IP6_2 0
-#define DST_REWRITE_IP6_3 1
-
-#define DST_REWRITE_PORT6 6666
-
-int _version SEC("version") = 1;
-
-SEC("cgroup/connect6")
-int connect_v6_prog(struct bpf_sock_addr *ctx)
-{
- struct bpf_sock_tuple tuple = {};
- struct sockaddr_in6 sa;
- struct bpf_sock *sk;
-
- /* Verify that new destination is available. */
- memset(&tuple.ipv6.saddr, 0, sizeof(tuple.ipv6.saddr));
- memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport));
-
- tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
- tuple.ipv6.daddr[1] = bpf_htonl(DST_REWRITE_IP6_1);
- tuple.ipv6.daddr[2] = bpf_htonl(DST_REWRITE_IP6_2);
- tuple.ipv6.daddr[3] = bpf_htonl(DST_REWRITE_IP6_3);
-
- tuple.ipv6.dport = bpf_htons(DST_REWRITE_PORT6);
-
- if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
- return 0;
- else if (ctx->type == SOCK_STREAM)
- sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6),
- BPF_F_CURRENT_NETNS, 0);
- else
- sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6),
- BPF_F_CURRENT_NETNS, 0);
-
- if (!sk)
- return 0;
-
- if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
- sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
- sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
- sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
- sk->src_port != DST_REWRITE_PORT6) {
- bpf_sk_release(sk);
- return 0;
- }
-
- bpf_sk_release(sk);
-
- /* Rewrite destination. */
- ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
- ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
- ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
- ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
-
- ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
-
- /* Rewrite source. */
- memset(&sa, 0, sizeof(sa));
-
- sa.sin6_family = AF_INET6;
- sa.sin6_port = bpf_htons(0);
-
- sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
- sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
- sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
- sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
-
- if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
- return 0;
-
- return 1;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-/* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-
-#include <linux/bpf.h>
-#include <linux/version.h>
-#include "bpf_helpers.h"
-
-SEC("cgroup/dev")
-int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
-{
- short type = ctx->access_type & 0xFFFF;
-#ifdef DEBUG
- short access = ctx->access_type >> 16;
- char fmt[] = " %d:%d \n";
-
- switch (type) {
- case BPF_DEVCG_DEV_BLOCK:
- fmt[0] = 'b';
- break;
- case BPF_DEVCG_DEV_CHAR:
- fmt[0] = 'c';
- break;
- default:
- fmt[0] = '?';
- break;
- }
-
- if (access & BPF_DEVCG_ACC_READ)
- fmt[8] = 'r';
-
- if (access & BPF_DEVCG_ACC_WRITE)
- fmt[9] = 'w';
-
- if (access & BPF_DEVCG_ACC_MKNOD)
- fmt[10] = 'm';
-
- bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor);
-#endif
-
- /* Allow access to /dev/zero and /dev/random.
- * Forbid everything else.
- */
- if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR)
- return 0;
-
- switch (ctx->minor) {
- case 5: /* 1:5 /dev/zero */
- case 9: /* 1:9 /dev/urandom */
- return 1;
- }
-
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = LINUX_VERSION_CODE;
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-
-struct bpf_map_def SEC("maps") cg_ids = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64),
- .max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") pidmap = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 1,
-};
-
-SEC("tracepoint/syscalls/sys_enter_nanosleep")
-int trace(void *ctx)
-{
- __u32 pid = bpf_get_current_pid_tgid();
- __u32 key = 0, *expected_pid;
- __u64 *val;
-
- expected_pid = bpf_map_lookup_elem(&pidmap, &key);
- if (!expected_pid || *expected_pid != pid)
- return 0;
-
- val = bpf_map_lookup_elem(&cg_ids, &key);
- if (val)
- *val = bpf_get_current_cgroup_id();
-
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <linux/version.h>
-
-#include "bpf_helpers.h"
-#include "netcnt_common.h"
-
-#define MAX_BPS (3 * 1024 * 1024)
-
-#define REFRESH_TIME_NS 100000000
-#define NS_PER_SEC 1000000000
-
-struct bpf_map_def SEC("maps") percpu_netcnt = {
- .type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
- .key_size = sizeof(struct bpf_cgroup_storage_key),
- .value_size = sizeof(struct percpu_net_cnt),
-};
-
-BPF_ANNOTATE_KV_PAIR(percpu_netcnt, struct bpf_cgroup_storage_key,
- struct percpu_net_cnt);
-
-struct bpf_map_def SEC("maps") netcnt = {
- .type = BPF_MAP_TYPE_CGROUP_STORAGE,
- .key_size = sizeof(struct bpf_cgroup_storage_key),
- .value_size = sizeof(struct net_cnt),
-};
-
-BPF_ANNOTATE_KV_PAIR(netcnt, struct bpf_cgroup_storage_key,
- struct net_cnt);
-
-SEC("cgroup/skb")
-int bpf_nextcnt(struct __sk_buff *skb)
-{
- struct percpu_net_cnt *percpu_cnt;
- char fmt[] = "%d %llu %llu\n";
- struct net_cnt *cnt;
- __u64 ts, dt;
- int ret;
-
- cnt = bpf_get_local_storage(&netcnt, 0);
- percpu_cnt = bpf_get_local_storage(&percpu_netcnt, 0);
-
- percpu_cnt->packets++;
- percpu_cnt->bytes += skb->len;
-
- if (percpu_cnt->packets > MAX_PERCPU_PACKETS) {
- __sync_fetch_and_add(&cnt->packets,
- percpu_cnt->packets);
- percpu_cnt->packets = 0;
-
- __sync_fetch_and_add(&cnt->bytes,
- percpu_cnt->bytes);
- percpu_cnt->bytes = 0;
- }
-
- ts = bpf_ktime_get_ns();
- dt = ts - percpu_cnt->prev_ts;
-
- dt *= MAX_BPS;
- dt /= NS_PER_SEC;
-
- if (cnt->bytes + percpu_cnt->bytes - percpu_cnt->prev_bytes < dt)
- ret = 1;
- else
- ret = 0;
-
- if (dt > REFRESH_TIME_NS) {
- percpu_cnt->prev_ts = ts;
- percpu_cnt->prev_packets = cnt->packets;
- percpu_cnt->prev_bytes = cnt->bytes;
- }
-
- return !!ret;
-}
-
-char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = LINUX_VERSION_CODE;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <limits.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <linux/if_tunnel.h>
+#include <linux/mpls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+#define PROG(F) SEC(#F) int bpf_func_##F
+
+/* These are the identifiers of the BPF programs that will be used in tail
+ * calls. Name is limited to 16 characters, with the terminating character and
+ * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
+ */
+enum {
+ IP,
+ IPV6,
+ IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
+ IPV6FR, /* Fragmentation IPv6 Extension Header */
+ MPLS,
+ VLAN,
+};
+
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+#define IP6_MF 0x0001
+#define IP6_OFFSET 0xFFF8
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+};
+
+struct frag_hdr {
+ __u8 nexthdr;
+ __u8 reserved;
+ __be16 frag_off;
+ __be32 identification;
+};
+
+struct bpf_map_def SEC("maps") jmp_table = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 8
+};
+
+static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
+ __u16 hdr_size,
+ void *buffer)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ __u16 thoff = skb->flow_keys->thoff;
+ __u8 *hdr;
+
+ /* Verifies this variable offset does not overflow */
+ if (thoff > (USHRT_MAX - hdr_size))
+ return NULL;
+
+ hdr = data + thoff;
+ if (hdr + hdr_size <= data_end)
+ return hdr;
+
+ if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
+ return NULL;
+
+ return buffer;
+}
+
+/* Dispatches on ETHERTYPE */
+static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+
+ keys->n_proto = proto;
+ switch (proto) {
+ case bpf_htons(ETH_P_IP):
+ bpf_tail_call(skb, &jmp_table, IP);
+ break;
+ case bpf_htons(ETH_P_IPV6):
+ bpf_tail_call(skb, &jmp_table, IPV6);
+ break;
+ case bpf_htons(ETH_P_MPLS_MC):
+ case bpf_htons(ETH_P_MPLS_UC):
+ bpf_tail_call(skb, &jmp_table, MPLS);
+ break;
+ case bpf_htons(ETH_P_8021Q):
+ case bpf_htons(ETH_P_8021AD):
+ bpf_tail_call(skb, &jmp_table, VLAN);
+ break;
+ default:
+ /* Protocol not supported */
+ return BPF_DROP;
+ }
+
+ return BPF_DROP;
+}
+
+SEC("flow_dissector")
+int _dissect(struct __sk_buff *skb)
+{
+ if (!skb->vlan_present)
+ return parse_eth_proto(skb, skb->protocol);
+ else
+ return parse_eth_proto(skb, skb->vlan_proto);
+}
+
+/* Parses on IPPROTO_* */
+static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ void *data_end = (void *)(long)skb->data_end;
+ struct icmphdr *icmp, _icmp;
+ struct gre_hdr *gre, _gre;
+ struct ethhdr *eth, _eth;
+ struct tcphdr *tcp, _tcp;
+ struct udphdr *udp, _udp;
+
+ keys->ip_proto = proto;
+ switch (proto) {
+ case IPPROTO_ICMP:
+ icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
+ if (!icmp)
+ return BPF_DROP;
+ return BPF_OK;
+ case IPPROTO_IPIP:
+ keys->is_encap = true;
+ return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
+ case IPPROTO_IPV6:
+ keys->is_encap = true;
+ return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
+ case IPPROTO_GRE:
+ gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
+ if (!gre)
+ return BPF_DROP;
+
+ if (bpf_htons(gre->flags & GRE_VERSION))
+ /* Only inspect standard GRE packets with version 0 */
+ return BPF_OK;
+
+ keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
+ if (GRE_IS_CSUM(gre->flags))
+ keys->thoff += 4; /* Step over chksum and Padding */
+ if (GRE_IS_KEY(gre->flags))
+ keys->thoff += 4; /* Step over key */
+ if (GRE_IS_SEQ(gre->flags))
+ keys->thoff += 4; /* Step over sequence number */
+
+ keys->is_encap = true;
+
+ if (gre->proto == bpf_htons(ETH_P_TEB)) {
+ eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
+ &_eth);
+ if (!eth)
+ return BPF_DROP;
+
+ keys->thoff += sizeof(*eth);
+
+ return parse_eth_proto(skb, eth->h_proto);
+ } else {
+ return parse_eth_proto(skb, gre->proto);
+ }
+ case IPPROTO_TCP:
+ tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
+ if (!tcp)
+ return BPF_DROP;
+
+ if (tcp->doff < 5)
+ return BPF_DROP;
+
+ if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
+ return BPF_DROP;
+
+ keys->sport = tcp->source;
+ keys->dport = tcp->dest;
+ return BPF_OK;
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
+ if (!udp)
+ return BPF_DROP;
+
+ keys->sport = udp->source;
+ keys->dport = udp->dest;
+ return BPF_OK;
+ default:
+ return BPF_DROP;
+ }
+
+ return BPF_DROP;
+}
+
+static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+
+ keys->ip_proto = nexthdr;
+ switch (nexthdr) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_DSTOPTS:
+ bpf_tail_call(skb, &jmp_table, IPV6OP);
+ break;
+ case IPPROTO_FRAGMENT:
+ bpf_tail_call(skb, &jmp_table, IPV6FR);
+ break;
+ default:
+ return parse_ip_proto(skb, nexthdr);
+ }
+
+ return BPF_DROP;
+}
+
+PROG(IP)(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph, _iph;
+ bool done = false;
+
+ iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
+ if (!iph)
+ return BPF_DROP;
+
+ /* IP header cannot be smaller than 20 bytes */
+ if (iph->ihl < 5)
+ return BPF_DROP;
+
+ keys->addr_proto = ETH_P_IP;
+ keys->ipv4_src = iph->saddr;
+ keys->ipv4_dst = iph->daddr;
+
+ keys->thoff += iph->ihl << 2;
+ if (data + keys->thoff > data_end)
+ return BPF_DROP;
+
+ if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
+ keys->is_frag = true;
+ if (iph->frag_off & bpf_htons(IP_OFFSET))
+ /* From second fragment on, packets do not have headers
+ * we can parse.
+ */
+ done = true;
+ else
+ keys->is_first_frag = true;
+ }
+
+ if (done)
+ return BPF_OK;
+
+ return parse_ip_proto(skb, iph->protocol);
+}
+
+PROG(IPV6)(struct __sk_buff *skb)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ struct ipv6hdr *ip6h, _ip6h;
+
+ ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+ if (!ip6h)
+ return BPF_DROP;
+
+ keys->addr_proto = ETH_P_IPV6;
+ memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
+
+ keys->thoff += sizeof(struct ipv6hdr);
+
+ return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6OP)(struct __sk_buff *skb)
+{
+ struct ipv6_opt_hdr *ip6h, _ip6h;
+
+ ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+ if (!ip6h)
+ return BPF_DROP;
+
+ /* hlen is in 8-octets and does not include the first 8 bytes
+ * of the header
+ */
+ skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3;
+
+ return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6FR)(struct __sk_buff *skb)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ struct frag_hdr *fragh, _fragh;
+
+ fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
+ if (!fragh)
+ return BPF_DROP;
+
+ keys->thoff += sizeof(*fragh);
+ keys->is_frag = true;
+ if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
+ keys->is_first_frag = true;
+
+ return parse_ipv6_proto(skb, fragh->nexthdr);
+}
+
+PROG(MPLS)(struct __sk_buff *skb)
+{
+ struct mpls_label *mpls, _mpls;
+
+ mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
+ if (!mpls)
+ return BPF_DROP;
+
+ return BPF_OK;
+}
+
+PROG(VLAN)(struct __sk_buff *skb)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ struct vlan_hdr *vlan, _vlan;
+ __be16 proto;
+
+ /* Peek back to see if single or double-tagging */
+ if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto,
+ sizeof(proto)))
+ return BPF_DROP;
+
+ /* Account for double-tagging */
+ if (proto == bpf_htons(ETH_P_8021AD)) {
+ vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+ if (!vlan)
+ return BPF_DROP;
+
+ if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
+ return BPF_DROP;
+
+ keys->thoff += sizeof(*vlan);
+ }
+
+ vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+ if (!vlan)
+ return BPF_DROP;
+
+ keys->thoff += sizeof(*vlan);
+ /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
+ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
+ vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
+ return BPF_DROP;
+
+ return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
+}
+
+char __license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP4 0x7f000004U
+#define DST_REWRITE_IP4 0x7f000001U
+#define DST_REWRITE_PORT4 4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect4")
+int connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct sockaddr_in sa;
+ struct bpf_sock *sk;
+
+ /* Verify that new destination is available. */
+ memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr));
+ memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport));
+
+ tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
+ tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
+
+ if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+ return 0;
+ else if (ctx->type == SOCK_STREAM)
+ sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4),
+ BPF_F_CURRENT_NETNS, 0);
+ else
+ sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4),
+ BPF_F_CURRENT_NETNS, 0);
+
+ if (!sk)
+ return 0;
+
+ if (sk->src_ip4 != tuple.ipv4.daddr ||
+ sk->src_port != DST_REWRITE_PORT4) {
+ bpf_sk_release(sk);
+ return 0;
+ }
+
+ bpf_sk_release(sk);
+
+ /* Rewrite destination. */
+ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+
+ /* Rewrite source. */
+ memset(&sa, 0, sizeof(sa));
+
+ sa.sin_family = AF_INET;
+ sa.sin_port = bpf_htons(0);
+ sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0 0
+#define SRC_REWRITE_IP6_1 0
+#define SRC_REWRITE_IP6_2 0
+#define SRC_REWRITE_IP6_3 6
+
+#define DST_REWRITE_IP6_0 0
+#define DST_REWRITE_IP6_1 0
+#define DST_REWRITE_IP6_2 0
+#define DST_REWRITE_IP6_3 1
+
+#define DST_REWRITE_PORT6 6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect6")
+int connect_v6_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct sockaddr_in6 sa;
+ struct bpf_sock *sk;
+
+ /* Verify that new destination is available. */
+ memset(&tuple.ipv6.saddr, 0, sizeof(tuple.ipv6.saddr));
+ memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport));
+
+ tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
+ tuple.ipv6.daddr[1] = bpf_htonl(DST_REWRITE_IP6_1);
+ tuple.ipv6.daddr[2] = bpf_htonl(DST_REWRITE_IP6_2);
+ tuple.ipv6.daddr[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+ tuple.ipv6.dport = bpf_htons(DST_REWRITE_PORT6);
+
+ if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+ return 0;
+ else if (ctx->type == SOCK_STREAM)
+ sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6),
+ BPF_F_CURRENT_NETNS, 0);
+ else
+ sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6),
+ BPF_F_CURRENT_NETNS, 0);
+
+ if (!sk)
+ return 0;
+
+ if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
+ sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
+ sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
+ sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
+ sk->src_port != DST_REWRITE_PORT6) {
+ bpf_sk_release(sk);
+ return 0;
+ }
+
+ bpf_sk_release(sk);
+
+ /* Rewrite destination. */
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+ /* Rewrite source. */
+ memset(&sa, 0, sizeof(sa));
+
+ sa.sin6_family = AF_INET6;
+ sa.sin6_port = bpf_htons(0);
+
+ sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+SEC("cgroup/dev")
+int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
+{
+ short type = ctx->access_type & 0xFFFF;
+#ifdef DEBUG
+ short access = ctx->access_type >> 16;
+ char fmt[] = " %d:%d \n";
+
+ switch (type) {
+ case BPF_DEVCG_DEV_BLOCK:
+ fmt[0] = 'b';
+ break;
+ case BPF_DEVCG_DEV_CHAR:
+ fmt[0] = 'c';
+ break;
+ default:
+ fmt[0] = '?';
+ break;
+ }
+
+ if (access & BPF_DEVCG_ACC_READ)
+ fmt[8] = 'r';
+
+ if (access & BPF_DEVCG_ACC_WRITE)
+ fmt[9] = 'w';
+
+ if (access & BPF_DEVCG_ACC_MKNOD)
+ fmt[10] = 'm';
+
+ bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor);
+#endif
+
+ /* Allow access to /dev/zero and /dev/random.
+ * Forbid everything else.
+ */
+ if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR)
+ return 0;
+
+ switch (ctx->minor) {
+ case 5: /* 1:5 /dev/zero */
+ case 9: /* 1:9 /dev/urandom */
+ return 1;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = LINUX_VERSION_CODE;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") cg_ids = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") pidmap = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int trace(void *ctx)
+{
+ __u32 pid = bpf_get_current_pid_tgid();
+ __u32 key = 0, *expected_pid;
+ __u64 *val;
+
+ expected_pid = bpf_map_lookup_elem(&pidmap, &key);
+ if (!expected_pid || *expected_pid != pid)
+ return 0;
+
+ val = bpf_map_lookup_elem(&cg_ids, &key);
+ if (val)
+ *val = bpf_get_current_cgroup_id();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/version.h>
+
+#include "bpf_helpers.h"
+#include "netcnt_common.h"
+
+#define MAX_BPS (3 * 1024 * 1024)
+
+#define REFRESH_TIME_NS 100000000
+#define NS_PER_SEC 1000000000
+
+struct bpf_map_def SEC("maps") percpu_netcnt = {
+ .type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ .key_size = sizeof(struct bpf_cgroup_storage_key),
+ .value_size = sizeof(struct percpu_net_cnt),
+};
+
+BPF_ANNOTATE_KV_PAIR(percpu_netcnt, struct bpf_cgroup_storage_key,
+ struct percpu_net_cnt);
+
+struct bpf_map_def SEC("maps") netcnt = {
+ .type = BPF_MAP_TYPE_CGROUP_STORAGE,
+ .key_size = sizeof(struct bpf_cgroup_storage_key),
+ .value_size = sizeof(struct net_cnt),
+};
+
+BPF_ANNOTATE_KV_PAIR(netcnt, struct bpf_cgroup_storage_key,
+ struct net_cnt);
+
+SEC("cgroup/skb")
+int bpf_nextcnt(struct __sk_buff *skb)
+{
+ struct percpu_net_cnt *percpu_cnt;
+ char fmt[] = "%d %llu %llu\n";
+ struct net_cnt *cnt;
+ __u64 ts, dt;
+ int ret;
+
+ cnt = bpf_get_local_storage(&netcnt, 0);
+ percpu_cnt = bpf_get_local_storage(&percpu_netcnt, 0);
+
+ percpu_cnt->packets++;
+ percpu_cnt->bytes += skb->len;
+
+ if (percpu_cnt->packets > MAX_PERCPU_PACKETS) {
+ __sync_fetch_and_add(&cnt->packets,
+ percpu_cnt->packets);
+ percpu_cnt->packets = 0;
+
+ __sync_fetch_and_add(&cnt->bytes,
+ percpu_cnt->bytes);
+ percpu_cnt->bytes = 0;
+ }
+
+ ts = bpf_ktime_get_ns();
+ dt = ts - percpu_cnt->prev_ts;
+
+ dt *= MAX_BPS;
+ dt /= NS_PER_SEC;
+
+ if (cnt->bytes + percpu_cnt->bytes - percpu_cnt->prev_bytes < dt)
+ ret = 1;
+ else
+ ret = 0;
+
+ if (dt > REFRESH_TIME_NS) {
+ percpu_cnt->prev_ts = ts;
+ percpu_cnt->prev_packets = cnt->packets;
+ percpu_cnt->prev_bytes = cnt->bytes;
+ }
+
+ return !!ret;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = LINUX_VERSION_CODE;
--- /dev/null
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") htab = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(long),
+ .max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(long),
+ .max_entries = 2,
+};
+
+/* Sample program which should always load for testing control paths. */
+SEC(".text") int func()
+{
+ __u64 key64 = 0;
+ __u32 key = 0;
+ long *value;
+
+ value = bpf_map_lookup_elem(&htab, &key);
+ if (!value)
+ return 1;
+ value = bpf_map_lookup_elem(&array, &key64);
+ if (!value)
+ return 1;
+
+ return 0;
+}
--- /dev/null
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+
+/* Sample program which should always load for testing control paths. */
+int func()
+{
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */
+#define SRC2_IP4 0x00000000U
+#define SRC_REWRITE_IP4 0x7f000004U
+#define DST_IP4 0xC0A801FEU /* 192.168.1.254 */
+#define DST_REWRITE_IP4 0x7f000001U
+#define DST_PORT 4040
+#define DST_REWRITE_PORT4 4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg4")
+int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_DGRAM)
+ return 0;
+
+ /* Rewrite source. */
+ if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) ||
+ ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) {
+ ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ /* Rewrite destination. */
+ if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&
+ ctx->user_port == bpf_htons(DST_PORT)) {
+ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0 0
+#define SRC_REWRITE_IP6_1 0
+#define SRC_REWRITE_IP6_2 0
+#define SRC_REWRITE_IP6_3 6
+
+#define DST_REWRITE_IP6_0 0
+#define DST_REWRITE_IP6_1 0
+#define DST_REWRITE_IP6_2 0
+#define DST_REWRITE_IP6_3 1
+
+#define DST_REWRITE_PORT6 6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_DGRAM)
+ return 0;
+
+ /* Rewrite source. */
+ if (ctx->msg_src_ip6[3] == bpf_htonl(1) ||
+ ctx->msg_src_ip6[3] == bpf_htonl(0)) {
+ ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ /* Rewrite destination. */
+ if ((ctx->user_ip6[0] & 0xFFFF) == bpf_htons(0xFACE) &&
+ ctx->user_ip6[0] >> 16 == bpf_htons(0xB00C)) {
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+ } else {
+ /* Unexpected destination. Reject sendmsg. */
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct bpf_map_def SEC("maps") socket_cookies = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u64),
+ .value_size = sizeof(__u32),
+ .max_entries = 1 << 8,
+};
+
+SEC("cgroup/connect6")
+int set_cookie(struct bpf_sock_addr *ctx)
+{
+ __u32 cookie_value = 0xFF;
+ __u64 cookie_key;
+
+ if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+ return 1;
+
+ cookie_key = bpf_get_socket_cookie(ctx);
+ if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0))
+ return 0;
+
+ return 1;
+}
+
+SEC("sockops")
+int update_cookie(struct bpf_sock_ops *ctx)
+{
+ __u32 new_cookie_value;
+ __u32 *cookie_value;
+ __u64 cookie_key;
+
+ if (ctx->family != AF_INET6)
+ return 1;
+
+ if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+ return 1;
+
+ cookie_key = bpf_get_socket_cookie(ctx);
+
+ cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key);
+ if (!cookie_value)
+ return 1;
+
+ new_cookie_value = (ctx->local_port << 8) | *cookie_value;
+ bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
+
+ return 1;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long) skb->data_end;
+ void *data = (void *)(long) skb->data;
+ __u32 lport = skb->local_port;
+ __u32 rport = skb->remote_port;
+ __u8 *d = data;
+ __u32 len = (__u32) data_end - (__u32) data;
+ int err;
+
+ if (data + 10 > data_end) {
+ err = bpf_skb_pull_data(skb, 10);
+ if (err)
+ return SK_DROP;
+
+ data_end = (void *)(long)skb->data_end;
+ data = (void *)(long)skb->data;
+ if (data + 10 > data_end)
+ return SK_DROP;
+ }
+
+ /* This write/read is a bit pointless but tests the verifier and
+ * strparser handler for read/write pkt data and access into sk
+ * fields.
+ */
+ d = data;
+ d[7] = 1;
+ return skb->len;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sk_msg1")
+int bpf_prog1(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+
+ char *d;
+
+ if (data + 8 > data_end)
+ return SK_DROP;
+
+ bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
+ d = (char *)data;
+ bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
+
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+struct bpf_map_def SEC("maps") sock_map_rx = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_tx = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_msg = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_break = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long) skb->data_end;
+ void *data = (void *)(long) skb->data;
+ __u32 lport = skb->local_port;
+ __u32 rport = skb->remote_port;
+ __u8 *d = data;
+ __u8 sk, map;
+
+ if (data + 8 > data_end)
+ return SK_DROP;
+
+ map = d[0];
+ sk = d[1];
+
+ d[0] = 0xd;
+ d[1] = 0xe;
+ d[2] = 0xa;
+ d[3] = 0xd;
+ d[4] = 0xb;
+ d[5] = 0xe;
+ d[6] = 0xe;
+ d[7] = 0xf;
+
+ if (!map)
+ return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
+ return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("xdp_adjust_tail")
+int _xdp_adjust_tail(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ int offset = 0;
+
+ if (data_end - data == 54)
+ offset = 256;
+ else
+ offset = 20;
+ if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+ return XDP_DROP;
+ return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+ unsigned int v4;
+ unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct ipv_counts),
+ .max_entries = 4,
+};
+
+BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+__attribute__((noinline))
+static int test_long_fname_2(struct dummy_tracepoint_args *arg)
+{
+ struct ipv_counts *counts;
+ int key = 0;
+
+ if (!arg->sock)
+ return 0;
+
+ counts = bpf_map_lookup_elem(&btf_map, &key);
+ if (!counts)
+ return 0;
+
+ counts->v6++;
+
+ return 0;
+}
+
+__attribute__((noinline))
+static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+{
+ return test_long_fname_2(arg);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ return test_long_fname_1(arg);
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+ unsigned int v4;
+ unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct ipv_counts),
+ .max_entries = 4,
+};
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+__attribute__((noinline))
+static int test_long_fname_2(struct dummy_tracepoint_args *arg)
+{
+ struct ipv_counts *counts;
+ int key = 0;
+
+ if (!arg->sock)
+ return 0;
+
+ counts = bpf_map_lookup_elem(&btf_map, &key);
+ if (!counts)
+ return 0;
+
+ counts->v6++;
+
+ return 0;
+}
+
+__attribute__((noinline))
+static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+{
+ return test_long_fname_2(arg);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ return test_long_fname_1(arg);
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Permit pretty deep stack traces */
+#define MAX_STACK_RAWTP 100
+struct stack_trace_t {
+ int pid;
+ int kern_stack_size;
+ int user_stack_size;
+ int user_stack_buildid_size;
+ __u64 kern_stack[MAX_STACK_RAWTP];
+ __u64 user_stack[MAX_STACK_RAWTP];
+ struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+struct bpf_map_def SEC("maps") perfmap = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(__u32),
+ .max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") stackdata_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct stack_trace_t),
+ .max_entries = 1,
+};
+
+/* Allocate per-cpu space twice the needed. For the code below
+ * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+ * if (usize < 0)
+ * return 0;
+ * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+ *
+ * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
+ * verifier will complain that access "raw_data + usize"
+ * with size "max_len - usize" may be out of bound.
+ * The maximum "raw_data + usize" is "raw_data + max_len"
+ * and the maximum "max_len - usize" is "max_len", verifier
+ * concludes that the maximum buffer access range is
+ * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
+ *
+ * Doubling the to-be-used max buffer size can fix this verifier
+ * issue and avoid complicated C programming massaging.
+ * This is an acceptable workaround since there is one entry here.
+ */
+struct bpf_map_def SEC("maps") rawdata_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
+ .max_entries = 1,
+};
+
+SEC("tracepoint/raw_syscalls/sys_enter")
+int bpf_prog1(void *ctx)
+{
+ int max_len, max_buildid_len, usize, ksize, total_size;
+ struct stack_trace_t *data;
+ void *raw_data;
+ __u32 key = 0;
+
+ data = bpf_map_lookup_elem(&stackdata_map, &key);
+ if (!data)
+ return 0;
+
+ max_len = MAX_STACK_RAWTP * sizeof(__u64);
+ max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
+ data->pid = bpf_get_current_pid_tgid();
+ data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
+ max_len, 0);
+ data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
+ BPF_F_USER_STACK);
+ data->user_stack_buildid_size = bpf_get_stack(
+ ctx, data->user_stack_buildid, max_buildid_len,
+ BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+ bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
+
+ /* write both kernel and user stacks to the same buffer */
+ raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
+ if (!raw_data)
+ return 0;
+
+ usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+ if (usize < 0)
+ return 0;
+
+ ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+ if (ksize < 0)
+ return 0;
+
+ total_size = usize + ksize;
+ if (total_size > 0 && total_size <= max_len)
+ bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
--- /dev/null
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct vip_stats),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = CTL_MAP_SIZE,
+};
+
+static __always_inline __u32 get_packet_hash(struct packet_description *pckt,
+ bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static __always_inline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE;
+ __u32 key = RING_SIZE * vip_info->vip_num + hash;
+ __u32 *real_pos;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static __always_inline int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static __always_inline bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static __always_inline int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ void *pkt_start = (void *)(long)skb->data;
+ struct packet_description pckt = {};
+ struct eth_hdr *eth = pkt_start;
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ return process_packet(data, nh_off, data_end, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ return process_packet(data, nh_off, data_end, true, ctx);
+ else
+ return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct vip_stats),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = CTL_MAP_SIZE,
+};
+
+static __u32 get_packet_hash(struct packet_description *pckt,
+ bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6);
+ __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
+ __u32 *real_pos;
+
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return 0;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ void *pkt_start = (void *)(long)skb->data;
+ struct packet_description pckt = {};
+ struct eth_hdr *eth = pkt_start;
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ return process_packet(data, nh_off, data_end, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ return process_packet(data, nh_off, data_end, true, ctx);
+ else
+ return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// test ir decoder
+//
+// Copyright (C) 2018 Sean Young <sean@mess.org>
+
+#include <linux/bpf.h>
+#include <linux/lirc.h>
+#include "bpf_helpers.h"
+
+SEC("lirc_mode2")
+int bpf_decoder(unsigned int *sample)
+{
+ if (LIRC_IS_PULSE(*sample)) {
+ unsigned int duration = LIRC_VALUE(*sample);
+
+ if (duration & 0x10000)
+ bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0);
+ if (duration & 0x20000)
+ bpf_rc_pointer_rel(sample, (duration >> 8) & 0xff,
+ duration & 0xff);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+#include <stddef.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <linux/seg6_local.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+/* Packet parsing state machine helpers. */
+#define cursor_advance(_cursor, _len) \
+ ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
+
+#define SR6_FLAG_ALERT (1 << 4)
+
+#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
+ 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
+#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
+ 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
+#define BPF_PACKET_HEADER __attribute__((packed))
+
+struct ip6_t {
+ unsigned int ver:4;
+ unsigned int priority:8;
+ unsigned int flow_label:20;
+ unsigned short payload_len;
+ unsigned char next_header;
+ unsigned char hop_limit;
+ unsigned long long src_hi;
+ unsigned long long src_lo;
+ unsigned long long dst_hi;
+ unsigned long long dst_lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_addr_t {
+ unsigned long long hi;
+ unsigned long long lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_srh_t {
+ unsigned char nexthdr;
+ unsigned char hdrlen;
+ unsigned char type;
+ unsigned char segments_left;
+ unsigned char first_segment;
+ unsigned char flags;
+ unsigned short tag;
+
+ struct ip6_addr_t segments[0];
+} BPF_PACKET_HEADER;
+
+struct sr6_tlv_t {
+ unsigned char type;
+ unsigned char len;
+ unsigned char value[0];
+} BPF_PACKET_HEADER;
+
+__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+{
+ void *cursor, *data_end;
+ struct ip6_srh_t *srh;
+ struct ip6_t *ip;
+ uint8_t *ipver;
+
+ data_end = (void *)(long)skb->data_end;
+ cursor = (void *)(long)skb->data;
+ ipver = (uint8_t *)cursor;
+
+ if ((void *)ipver + sizeof(*ipver) > data_end)
+ return NULL;
+
+ if ((*ipver >> 4) != 6)
+ return NULL;
+
+ ip = cursor_advance(cursor, sizeof(*ip));
+ if ((void *)ip + sizeof(*ip) > data_end)
+ return NULL;
+
+ if (ip->next_header != 43)
+ return NULL;
+
+ srh = cursor_advance(cursor, sizeof(*srh));
+ if ((void *)srh + sizeof(*srh) > data_end)
+ return NULL;
+
+ if (srh->type != 4)
+ return NULL;
+
+ return srh;
+}
+
+__attribute__((always_inline))
+int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
+ uint32_t old_pad, uint32_t pad_off)
+{
+ int err;
+
+ if (new_pad != old_pad) {
+ err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
+ (int) new_pad - (int) old_pad);
+ if (err)
+ return err;
+ }
+
+ if (new_pad > 0) {
+ char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0};
+ struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
+
+ pad_tlv->type = SR6_TLV_PADDING;
+ pad_tlv->len = new_pad - 2;
+
+ err = bpf_lwt_seg6_store_bytes(skb, pad_off,
+ (void *)pad_tlv_buf, new_pad);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+__attribute__((always_inline))
+int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
+ uint32_t *tlv_off, uint32_t *pad_size,
+ uint32_t *pad_off)
+{
+ uint32_t srh_off, cur_off;
+ int offset_valid = 0;
+ int err;
+
+ srh_off = (char *)srh - (char *)(long)skb->data;
+ // cur_off = end of segments, start of possible TLVs
+ cur_off = srh_off + sizeof(*srh) +
+ sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
+
+ *pad_off = 0;
+
+ // we can only go as far as ~10 TLVs due to the BPF max stack size
+ #pragma clang loop unroll(full)
+ for (int i = 0; i < 10; i++) {
+ struct sr6_tlv_t tlv;
+
+ if (cur_off == *tlv_off)
+ offset_valid = 1;
+
+ if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
+ break;
+
+ err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
+ if (err)
+ return err;
+
+ if (tlv.type == SR6_TLV_PADDING) {
+ *pad_size = tlv.len + sizeof(tlv);
+ *pad_off = cur_off;
+
+ if (*tlv_off == srh_off) {
+ *tlv_off = cur_off;
+ offset_valid = 1;
+ }
+ break;
+
+ } else if (tlv.type == SR6_TLV_HMAC) {
+ break;
+ }
+
+ cur_off += sizeof(tlv) + tlv.len;
+ } // we reached the padding or HMAC TLVs, or the end of the SRH
+
+ if (*pad_off == 0)
+ *pad_off = cur_off;
+
+ if (*tlv_off == -1)
+ *tlv_off = cur_off;
+ else if (!offset_valid)
+ return -EINVAL;
+
+ return 0;
+}
+
+__attribute__((always_inline))
+int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
+ struct sr6_tlv_t *itlv, uint8_t tlv_size)
+{
+ uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+ uint8_t len_remaining, new_pad;
+ uint32_t pad_off = 0;
+ uint32_t pad_size = 0;
+ uint32_t partial_srh_len;
+ int err;
+
+ if (tlv_off != -1)
+ tlv_off += srh_off;
+
+ if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
+ return -EINVAL;
+
+ err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
+ if (err)
+ return err;
+
+ // the following can't be moved inside update_tlv_pad because the
+ // bpf verifier has some issues with it
+ pad_off += sizeof(*itlv) + itlv->len;
+ partial_srh_len = pad_off - srh_off;
+ len_remaining = partial_srh_len % 8;
+ new_pad = 8 - len_remaining;
+
+ if (new_pad == 1) // cannot pad for 1 byte only
+ new_pad = 9;
+ else if (new_pad == 8)
+ new_pad = 0;
+
+ return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+__attribute__((always_inline))
+int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
+ uint32_t tlv_off)
+{
+ uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+ uint8_t len_remaining, new_pad;
+ uint32_t partial_srh_len;
+ uint32_t pad_off = 0;
+ uint32_t pad_size = 0;
+ struct sr6_tlv_t tlv;
+ int err;
+
+ tlv_off += srh_off;
+
+ err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+ if (err)
+ return err;
+
+ err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
+ if (err)
+ return err;
+
+ pad_off -= sizeof(tlv) + tlv.len;
+ partial_srh_len = pad_off - srh_off;
+ len_remaining = partial_srh_len % 8;
+ new_pad = 8 - len_remaining;
+ if (new_pad == 1) // cannot pad for 1 byte only
+ new_pad = 9;
+ else if (new_pad == 8)
+ new_pad = 0;
+
+ return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+__attribute__((always_inline))
+int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
+{
+ int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
+ ((srh->first_segment + 1) << 4);
+ struct sr6_tlv_t tlv;
+
+ if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
+ return 0;
+
+ if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
+ struct ip6_addr_t egr_addr;
+
+ if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
+ return 0;
+
+ // check if egress TLV value is correct
+ if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
+ ntohll(egr_addr.lo) == 0x4)
+ return 1;
+ }
+
+ return 0;
+}
+
+// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
+// fd00::4
+SEC("encap_srh")
+int __encap_srh(struct __sk_buff *skb)
+{
+ unsigned long long hi = 0xfd00000000000000;
+ struct ip6_addr_t *seg;
+ struct ip6_srh_t *srh;
+ char srh_buf[72]; // room for 4 segments
+ int err;
+
+ srh = (struct ip6_srh_t *)srh_buf;
+ srh->nexthdr = 0;
+ srh->hdrlen = 8;
+ srh->type = 4;
+ srh->segments_left = 3;
+ srh->first_segment = 3;
+ srh->flags = 0;
+ srh->tag = 0;
+
+ seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
+
+ #pragma clang loop unroll(full)
+ for (unsigned long long lo = 0; lo < 4; lo++) {
+ seg->lo = htonll(4 - lo);
+ seg->hi = htonll(hi);
+ seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
+ }
+
+ err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
+ if (err)
+ return BPF_DROP;
+
+ return BPF_REDIRECT;
+}
+
+// Add an Egress TLV fc00::4, add the flag A,
+// and apply End.X action to fc42::1
+SEC("add_egr_x")
+int __add_egr_x(struct __sk_buff *skb)
+{
+ unsigned long long hi = 0xfc42000000000000;
+ unsigned long long lo = 0x1;
+ struct ip6_srh_t *srh = get_srh(skb);
+ uint8_t new_flags = SR6_FLAG_ALERT;
+ struct ip6_addr_t addr;
+ int err, offset;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
+
+ err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
+ (struct sr6_tlv_t *)&tlv, 20);
+ if (err)
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+ err = bpf_lwt_seg6_store_bytes(skb, offset,
+ (void *)&new_flags, sizeof(new_flags));
+ if (err)
+ return BPF_DROP;
+
+ addr.lo = htonll(lo);
+ addr.hi = htonll(hi);
+ err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
+ (void *)&addr, sizeof(addr));
+ if (err)
+ return BPF_DROP;
+ return BPF_REDIRECT;
+}
+
+// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
+// simple End action
+SEC("pop_egr")
+int __pop_egr(struct __sk_buff *skb)
+{
+ struct ip6_srh_t *srh = get_srh(skb);
+ uint16_t new_tag = bpf_htons(2442);
+ uint8_t new_flags = 0;
+ int err, offset;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ if (srh->flags != SR6_FLAG_ALERT)
+ return BPF_DROP;
+
+ if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
+ return BPF_DROP;
+
+ if (!has_egr_tlv(skb, srh))
+ return BPF_DROP;
+
+ err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
+ if (err)
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+ if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
+ sizeof(new_flags)))
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
+ if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
+ sizeof(new_tag)))
+ return BPF_DROP;
+
+ return BPF_OK;
+}
+
+// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
+// then apply a End.T action to reach the last segment
+SEC("inspect_t")
+int __inspect_t(struct __sk_buff *skb)
+{
+ struct ip6_srh_t *srh = get_srh(skb);
+ int table = 117;
+ int err;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ if (srh->flags != 0)
+ return BPF_DROP;
+
+ if (srh->tag != bpf_htons(2442))
+ return BPF_DROP;
+
+ if (srh->hdrlen != 8) // 4 segments
+ return BPF_DROP;
+
+ err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
+ (void *)&table, sizeof(table));
+
+ if (err)
+ return BPF_DROP;
+
+ return BPF_REDIRECT;
+}
+
+char __license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") mim_array = {
+ .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ .key_size = sizeof(int),
+ /* must be sizeof(__u32) for map in map */
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+ .map_flags = 0,
+};
+
+struct bpf_map_def SEC("maps") mim_hash = {
+ .type = BPF_MAP_TYPE_HASH_OF_MAPS,
+ .key_size = sizeof(int),
+ /* must be sizeof(__u32) for map in map */
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+ .map_flags = 0,
+};
+
+SEC("xdp_mimtest")
+int xdp_mimtest0(struct xdp_md *ctx)
+{
+ int value = 123;
+ int key = 0;
+ void *map;
+
+ map = bpf_map_lookup_elem(&mim_array, &key);
+ if (!map)
+ return XDP_DROP;
+
+ bpf_map_update_elem(map, &key, &value, 0);
+
+ map = bpf_map_lookup_elem(&mim_hash, &key);
+ if (!map)
+ return XDP_DROP;
+
+ bpf_map_update_elem(map, &key, &value, 0);
+
+ return XDP_PASS;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+#define VAR_NUM 16
+
+struct hmap_elem {
+ struct bpf_spin_lock lock;
+ int var[VAR_NUM];
+};
+
+struct bpf_map_def SEC("maps") hash_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct hmap_elem),
+ .max_entries = 1,
+};
+
+BPF_ANNOTATE_KV_PAIR(hash_map, int, struct hmap_elem);
+
+struct array_elem {
+ struct bpf_spin_lock lock;
+ int var[VAR_NUM];
+};
+
+struct bpf_map_def SEC("maps") array_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct array_elem),
+ .max_entries = 1,
+};
+
+BPF_ANNOTATE_KV_PAIR(array_map, int, struct array_elem);
+
+SEC("map_lock_demo")
+int bpf_map_lock_test(struct __sk_buff *skb)
+{
+ struct hmap_elem zero = {}, *val;
+ int rnd = bpf_get_prandom_u32();
+ int key = 0, err = 1, i;
+ struct array_elem *q;
+
+ val = bpf_map_lookup_elem(&hash_map, &key);
+ if (!val)
+ goto err;
+ /* spin_lock in hash map */
+ bpf_spin_lock(&val->lock);
+ for (i = 0; i < VAR_NUM; i++)
+ val->var[i] = rnd;
+ bpf_spin_unlock(&val->lock);
+
+ /* spin_lock in array */
+ q = bpf_map_lookup_elem(&array_map, &key);
+ if (!q)
+ goto err;
+ bpf_spin_lock(&q->lock);
+ for (i = 0; i < VAR_NUM; i++)
+ q->var[i] = rnd;
+ bpf_spin_unlock(&q->lock);
+ err = 0;
+err:
+ return err;
+}
+char _license[] SEC("license") = "GPL";
--- /dev/null
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+/* It is a dumb bpf program such that it must have no
+ * issue to be loaded since testing the verifier is
+ * not the focus here.
+ */
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") test_map_id = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 1,
+};
+
+SEC("test_obj_id_dummy")
+int test_obj_id(struct __sk_buff *skb)
+{
+ __u32 key = 0;
+ __u64 *value;
+
+ value = bpf_map_lookup_elem(&test_map_id, &key);
+
+ return TC_ACT_OK;
+}
--- /dev/null
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+int _version SEC("version") = 1;
+
+SEC("test1")
+int process(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = (struct ethhdr *)(data);
+ struct tcphdr *tcp = NULL;
+ __u8 proto = 255;
+ __u64 ihl_len;
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ ihl_len = iph->ihl * 4;
+ proto = iph->protocol;
+ tcp = (struct tcphdr *)((void *)(iph) + ihl_len);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1);
+
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+ ihl_len = sizeof(*ip6h);
+ proto = ip6h->nexthdr;
+ tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len);
+ }
+
+ if (tcp) {
+ if (((void *)(tcp) + 20) > data_end || proto != 6)
+ return TC_ACT_SHOT;
+ barrier(); /* to force ordering of checks */
+ if (((void *)(tcp) + 18) > data_end)
+ return TC_ACT_SHOT;
+ if (tcp->urg_ptr == 123)
+ return TC_ACT_OK;
+ }
+
+ return TC_ACT_UNSPEC;
+}
--- /dev/null
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define TEST_FIELD(TYPE, FIELD, MASK) \
+ { \
+ TYPE tmp = *(volatile TYPE *)&skb->FIELD; \
+ if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \
+ return TC_ACT_SHOT; \
+ }
+#else
+#define TEST_FIELD_OFFSET(a, b) ((sizeof(a) - sizeof(b)) / sizeof(b))
+#define TEST_FIELD(TYPE, FIELD, MASK) \
+ { \
+ TYPE tmp = *((volatile TYPE *)&skb->FIELD + \
+ TEST_FIELD_OFFSET(skb->FIELD, TYPE)); \
+ if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \
+ return TC_ACT_SHOT; \
+ }
+#endif
+
+SEC("test1")
+int process(struct __sk_buff *skb)
+{
+ TEST_FIELD(__u8, len, 0xFF);
+ TEST_FIELD(__u16, len, 0xFFFF);
+ TEST_FIELD(__u32, len, 0xFFFFFFFF);
+ TEST_FIELD(__u16, protocol, 0xFFFF);
+ TEST_FIELD(__u32, protocol, 0xFFFFFFFF);
+ TEST_FIELD(__u8, hash, 0xFF);
+ TEST_FIELD(__u16, hash, 0xFFFF);
+ TEST_FIELD(__u32, hash, 0xFFFFFFFF);
+
+ return TC_ACT_OK;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_QUEUE
+#include "test_queue_stack_map.h"
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "bpf_endian.h"
+#include "bpf_helpers.h"
+#include "test_select_reuseport_common.h"
+
+int _version SEC("version") = 1;
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+struct bpf_map_def SEC("maps") outer_map = {
+ .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") result_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = NR_RESULTS,
+};
+
+struct bpf_map_def SEC("maps") tmp_index_ovr_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(int),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") linum_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") data_check_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct data_check),
+ .max_entries = 1,
+};
+
+#define GOTO_DONE(_result) ({ \
+ result = (_result); \
+ linum = __LINE__; \
+ goto done; \
+})
+
+SEC("select_by_skb_data")
+int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
+{
+ __u32 linum, index = 0, flags = 0, index_zero = 0;
+ __u32 *result_cnt, *linum_value;
+ struct data_check data_check = {};
+ struct cmd *cmd, cmd_copy;
+ void *data, *data_end;
+ void *reuseport_array;
+ enum result result;
+ int *index_ovr;
+ int err;
+
+ data = reuse_md->data;
+ data_end = reuse_md->data_end;
+ data_check.len = reuse_md->len;
+ data_check.eth_protocol = reuse_md->eth_protocol;
+ data_check.ip_protocol = reuse_md->ip_protocol;
+ data_check.hash = reuse_md->hash;
+ data_check.bind_inany = reuse_md->bind_inany;
+ if (data_check.eth_protocol == bpf_htons(ETH_P_IP)) {
+ if (bpf_skb_load_bytes_relative(reuse_md,
+ offsetof(struct iphdr, saddr),
+ data_check.skb_addrs, 8,
+ BPF_HDR_START_NET))
+ GOTO_DONE(DROP_MISC);
+ } else {
+ if (bpf_skb_load_bytes_relative(reuse_md,
+ offsetof(struct ipv6hdr, saddr),
+ data_check.skb_addrs, 32,
+ BPF_HDR_START_NET))
+ GOTO_DONE(DROP_MISC);
+ }
+
+ /*
+ * The ip_protocol could be a compile time decision
+ * if the bpf_prog.o is dedicated to either TCP or
+ * UDP.
+ *
+ * Otherwise, reuse_md->ip_protocol or
+ * the protocol field in the iphdr can be used.
+ */
+ if (data_check.ip_protocol == IPPROTO_TCP) {
+ struct tcphdr *th = data;
+
+ if (th + 1 > data_end)
+ GOTO_DONE(DROP_MISC);
+
+ data_check.skb_ports[0] = th->source;
+ data_check.skb_ports[1] = th->dest;
+
+ if ((th->doff << 2) + sizeof(*cmd) > data_check.len)
+ GOTO_DONE(DROP_ERR_SKB_DATA);
+ if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy,
+ sizeof(cmd_copy)))
+ GOTO_DONE(DROP_MISC);
+ cmd = &cmd_copy;
+ } else if (data_check.ip_protocol == IPPROTO_UDP) {
+ struct udphdr *uh = data;
+
+ if (uh + 1 > data_end)
+ GOTO_DONE(DROP_MISC);
+
+ data_check.skb_ports[0] = uh->source;
+ data_check.skb_ports[1] = uh->dest;
+
+ if (sizeof(struct udphdr) + sizeof(*cmd) > data_check.len)
+ GOTO_DONE(DROP_ERR_SKB_DATA);
+ if (data + sizeof(struct udphdr) + sizeof(*cmd) > data_end) {
+ if (bpf_skb_load_bytes(reuse_md, sizeof(struct udphdr),
+ &cmd_copy, sizeof(cmd_copy)))
+ GOTO_DONE(DROP_MISC);
+ cmd = &cmd_copy;
+ } else {
+ cmd = data + sizeof(struct udphdr);
+ }
+ } else {
+ GOTO_DONE(DROP_MISC);
+ }
+
+ reuseport_array = bpf_map_lookup_elem(&outer_map, &index_zero);
+ if (!reuseport_array)
+ GOTO_DONE(DROP_ERR_INNER_MAP);
+
+ index = cmd->reuseport_index;
+ index_ovr = bpf_map_lookup_elem(&tmp_index_ovr_map, &index_zero);
+ if (!index_ovr)
+ GOTO_DONE(DROP_MISC);
+
+ if (*index_ovr != -1) {
+ index = *index_ovr;
+ *index_ovr = -1;
+ }
+ err = bpf_sk_select_reuseport(reuse_md, reuseport_array, &index,
+ flags);
+ if (!err)
+ GOTO_DONE(PASS);
+
+ if (cmd->pass_on_failure)
+ GOTO_DONE(PASS_ERR_SK_SELECT_REUSEPORT);
+ else
+ GOTO_DONE(DROP_ERR_SK_SELECT_REUSEPORT);
+
+done:
+ result_cnt = bpf_map_lookup_elem(&result_map, &result);
+ if (!result_cnt)
+ return SK_DROP;
+
+ bpf_map_update_elem(&linum_map, &index_zero, &linum, BPF_ANY);
+ bpf_map_update_elem(&data_check_map, &index_zero, &data_check, BPF_ANY);
+
+ (*result_cnt)++;
+ return result < PASS ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
+static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
+ void *data_end, __u16 eth_proto,
+ bool *ipv4)
+{
+ struct bpf_sock_tuple *result;
+ __u8 proto = 0;
+ __u64 ihl_len;
+
+ if (eth_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)(data + nh_off);
+
+ if (iph + 1 > data_end)
+ return NULL;
+ ihl_len = iph->ihl * 4;
+ proto = iph->protocol;
+ *ipv4 = true;
+ result = (struct bpf_sock_tuple *)&iph->saddr;
+ } else if (eth_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off);
+
+ if (ip6h + 1 > data_end)
+ return NULL;
+ ihl_len = sizeof(*ip6h);
+ proto = ip6h->nexthdr;
+ *ipv4 = true;
+ result = (struct bpf_sock_tuple *)&ip6h->saddr;
+ }
+
+ if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP)
+ return NULL;
+
+ return result;
+}
+
+SEC("sk_lookup_success")
+int bpf_sk_lookup_test0(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = (struct ethhdr *)(data);
+ struct bpf_sock_tuple *tuple;
+ struct bpf_sock *sk;
+ size_t tuple_len;
+ bool ipv4;
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4);
+ if (!tuple || tuple + sizeof *tuple > data_end)
+ return TC_ACT_SHOT;
+
+ tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+ sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+ if (sk)
+ bpf_sk_release(sk);
+ return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
+}
+
+SEC("sk_lookup_success_simple")
+int bpf_sk_lookup_test1(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+ if (sk)
+ bpf_sk_release(sk);
+ return 0;
+}
+
+SEC("fail_use_after_free")
+int bpf_sk_lookup_uaf(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+ __u32 family = 0;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+ if (sk) {
+ bpf_sk_release(sk);
+ family = sk->family;
+ }
+ return family;
+}
+
+SEC("fail_modify_sk_pointer")
+int bpf_sk_lookup_modptr(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+ __u32 family;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+ if (sk) {
+ sk += 1;
+ bpf_sk_release(sk);
+ }
+ return 0;
+}
+
+SEC("fail_modify_sk_or_null_pointer")
+int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+ __u32 family;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+ sk += 1;
+ if (sk)
+ bpf_sk_release(sk);
+ return 0;
+}
+
+SEC("fail_no_release")
+int bpf_sk_lookup_test2(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+
+ bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+ return 0;
+}
+
+SEC("fail_release_twice")
+int bpf_sk_lookup_test3(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+ bpf_sk_release(sk);
+ bpf_sk_release(sk);
+ return 0;
+}
+
+SEC("fail_release_unchecked")
+int bpf_sk_lookup_test4(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ struct bpf_sock *sk;
+
+ sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+ bpf_sk_release(sk);
+ return 0;
+}
+
+void lookup_no_release(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple tuple = {};
+ bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+}
+
+SEC("fail_no_release_subcall")
+int bpf_sk_lookup_test5(struct __sk_buff *skb)
+{
+ lookup_no_release(skb);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+#include <string.h>
+
+#include "bpf_helpers.h"
+
+#define NUM_CGROUP_LEVELS 4
+
+struct bpf_map_def SEC("maps") cgroup_ids = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = NUM_CGROUP_LEVELS,
+};
+
+static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
+{
+ __u64 id;
+
+ /* [1] &level passed to external function that may change it, it's
+ * incompatible with loop unroll.
+ */
+ id = bpf_skb_ancestor_cgroup_id(skb, level);
+ bpf_map_update_elem(&cgroup_ids, &level, &id, 0);
+}
+
+SEC("cgroup_id_logger")
+int log_cgroup_id(struct __sk_buff *skb)
+{
+ /* Loop unroll can't be used here due to [1]. Unrolling manually.
+ * Number of calls should be in sync with NUM_CGROUP_LEVELS.
+ */
+ log_nth_level(skb, 0);
+ log_nth_level(skb, 1);
+ log_nth_level(skb, 2);
+ log_nth_level(skb, 3);
+
+ return TC_ACT_OK;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+enum bpf_array_idx {
+ SRV_IDX,
+ CLI_IDX,
+ __NR_BPF_ARRAY_IDX,
+};
+
+struct bpf_map_def SEC("maps") addr_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct sockaddr_in6),
+ .max_entries = __NR_BPF_ARRAY_IDX,
+};
+
+struct bpf_map_def SEC("maps") sock_result_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct bpf_sock),
+ .max_entries = __NR_BPF_ARRAY_IDX,
+};
+
+struct bpf_map_def SEC("maps") tcp_sock_result_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct bpf_tcp_sock),
+ .max_entries = __NR_BPF_ARRAY_IDX,
+};
+
+struct bpf_map_def SEC("maps") linum_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+static bool is_loopback6(__u32 *a6)
+{
+ return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
+}
+
+static void skcpy(struct bpf_sock *dst,
+ const struct bpf_sock *src)
+{
+ dst->bound_dev_if = src->bound_dev_if;
+ dst->family = src->family;
+ dst->type = src->type;
+ dst->protocol = src->protocol;
+ dst->mark = src->mark;
+ dst->priority = src->priority;
+ dst->src_ip4 = src->src_ip4;
+ dst->src_ip6[0] = src->src_ip6[0];
+ dst->src_ip6[1] = src->src_ip6[1];
+ dst->src_ip6[2] = src->src_ip6[2];
+ dst->src_ip6[3] = src->src_ip6[3];
+ dst->src_port = src->src_port;
+ dst->dst_ip4 = src->dst_ip4;
+ dst->dst_ip6[0] = src->dst_ip6[0];
+ dst->dst_ip6[1] = src->dst_ip6[1];
+ dst->dst_ip6[2] = src->dst_ip6[2];
+ dst->dst_ip6[3] = src->dst_ip6[3];
+ dst->dst_port = src->dst_port;
+ dst->state = src->state;
+}
+
+static void tpcpy(struct bpf_tcp_sock *dst,
+ const struct bpf_tcp_sock *src)
+{
+ dst->snd_cwnd = src->snd_cwnd;
+ dst->srtt_us = src->srtt_us;
+ dst->rtt_min = src->rtt_min;
+ dst->snd_ssthresh = src->snd_ssthresh;
+ dst->rcv_nxt = src->rcv_nxt;
+ dst->snd_nxt = src->snd_nxt;
+ dst->snd_una = src->snd_una;
+ dst->mss_cache = src->mss_cache;
+ dst->ecn_flags = src->ecn_flags;
+ dst->rate_delivered = src->rate_delivered;
+ dst->rate_interval_us = src->rate_interval_us;
+ dst->packets_out = src->packets_out;
+ dst->retrans_out = src->retrans_out;
+ dst->total_retrans = src->total_retrans;
+ dst->segs_in = src->segs_in;
+ dst->data_segs_in = src->data_segs_in;
+ dst->segs_out = src->segs_out;
+ dst->data_segs_out = src->data_segs_out;
+ dst->lost_out = src->lost_out;
+ dst->sacked_out = src->sacked_out;
+ dst->bytes_received = src->bytes_received;
+ dst->bytes_acked = src->bytes_acked;
+}
+
+#define RETURN { \
+ linum = __LINE__; \
+ bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \
+ return 1; \
+}
+
+SEC("cgroup_skb/egress")
+int read_sock_fields(struct __sk_buff *skb)
+{
+ __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx;
+ struct sockaddr_in6 *srv_sa6, *cli_sa6;
+ struct bpf_tcp_sock *tp, *tp_ret;
+ struct bpf_sock *sk, *sk_ret;
+ __u32 linum, idx0 = 0;
+
+ sk = skb->sk;
+ if (!sk || sk->state == 10)
+ RETURN;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
+ !is_loopback6(sk->src_ip6))
+ RETURN;
+
+ tp = bpf_tcp_sock(sk);
+ if (!tp)
+ RETURN;
+
+ srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
+ cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
+ if (!srv_sa6 || !cli_sa6)
+ RETURN;
+
+ if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
+ idx = srv_idx;
+ else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
+ idx = cli_idx;
+ else
+ RETURN;
+
+ sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx);
+ tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx);
+ if (!sk_ret || !tp_ret)
+ RETURN;
+
+ skcpy(sk_ret, sk);
+ tpcpy(tp_ret, tp);
+
+ RETURN;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#undef SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH
+#include "./test_sockmap_kern.h"
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#define SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP
+#include "./test_sockmap_kern.h"
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+struct hmap_elem {
+ volatile int cnt;
+ struct bpf_spin_lock lock;
+ int test_padding;
+};
+
+struct bpf_map_def SEC("maps") hmap = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct hmap_elem),
+ .max_entries = 1,
+};
+
+BPF_ANNOTATE_KV_PAIR(hmap, int, struct hmap_elem);
+
+
+struct cls_elem {
+ struct bpf_spin_lock lock;
+ volatile int cnt;
+};
+
+struct bpf_map_def SEC("maps") cls_map = {
+ .type = BPF_MAP_TYPE_CGROUP_STORAGE,
+ .key_size = sizeof(struct bpf_cgroup_storage_key),
+ .value_size = sizeof(struct cls_elem),
+};
+
+BPF_ANNOTATE_KV_PAIR(cls_map, struct bpf_cgroup_storage_key,
+ struct cls_elem);
+
+struct bpf_vqueue {
+ struct bpf_spin_lock lock;
+ /* 4 byte hole */
+ unsigned long long lasttime;
+ int credit;
+ unsigned int rate;
+};
+
+struct bpf_map_def SEC("maps") vqueue = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct bpf_vqueue),
+ .max_entries = 1,
+};
+
+BPF_ANNOTATE_KV_PAIR(vqueue, int, struct bpf_vqueue);
+#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
+
+SEC("spin_lock_demo")
+int bpf_sping_lock_test(struct __sk_buff *skb)
+{
+ volatile int credit = 0, max_credit = 100, pkt_len = 64;
+ struct hmap_elem zero = {}, *val;
+ unsigned long long curtime;
+ struct bpf_vqueue *q;
+ struct cls_elem *cls;
+ int key = 0;
+ int err = 0;
+
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (!val) {
+ bpf_map_update_elem(&hmap, &key, &zero, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (!val) {
+ err = 1;
+ goto err;
+ }
+ }
+ /* spin_lock in hash map run time test */
+ bpf_spin_lock(&val->lock);
+ if (val->cnt)
+ val->cnt--;
+ else
+ val->cnt++;
+ if (val->cnt != 0 && val->cnt != 1)
+ err = 1;
+ bpf_spin_unlock(&val->lock);
+
+ /* spin_lock in array. virtual queue demo */
+ q = bpf_map_lookup_elem(&vqueue, &key);
+ if (!q)
+ goto err;
+ curtime = bpf_ktime_get_ns();
+ bpf_spin_lock(&q->lock);
+ q->credit += CREDIT_PER_NS(curtime - q->lasttime, q->rate);
+ q->lasttime = curtime;
+ if (q->credit > max_credit)
+ q->credit = max_credit;
+ q->credit -= pkt_len;
+ credit = q->credit;
+ bpf_spin_unlock(&q->lock);
+
+ /* spin_lock in cgroup local storage */
+ cls = bpf_get_local_storage(&cls_map, 0);
+ bpf_spin_lock(&cls->lock);
+ cls->cnt++;
+ bpf_spin_unlock(&cls->lock);
+
+err:
+ return err;
+}
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_STACK
+#include "test_queue_stack_map.h"
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH,
+ .max_entries = 128,
+ .map_flags = BPF_F_STACK_BUILD_ID,
+};
+
+struct bpf_map_def SEC("maps") stack_amap = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH,
+ .max_entries = 128,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
+struct random_urandom_args {
+ unsigned long long pad;
+ int got_bits;
+ int pool_left;
+ int input_left;
+};
+
+SEC("tracepoint/random/urandom_read")
+int oncpu(struct random_urandom_args *args)
+{
+ __u32 max_len = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH;
+ __u32 key = 0, val = 0, *value_p;
+ void *stack_p;
+
+ value_p = bpf_map_lookup_elem(&control_map, &key);
+ if (value_p && *value_p)
+ return 0; /* skip if non-zero *value_p */
+
+ /* The size of stackmap and stackid_hmap should be the same */
+ key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
+ if ((int)key >= 0) {
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+ stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+ if (stack_p)
+ bpf_get_stack(args, stack_p, max_len,
+ BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+ .max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stack_amap = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+ .max_entries = 16384,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+ unsigned long long pad;
+ char prev_comm[16];
+ int prev_pid;
+ int prev_prio;
+ long long prev_state;
+ char next_comm[16];
+ int next_pid;
+ int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+ __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
+ __u32 key = 0, val = 0, *value_p;
+ void *stack_p;
+
+ value_p = bpf_map_lookup_elem(&control_map, &key);
+ if (value_p && *value_p)
+ return 0; /* skip if non-zero *value_p */
+
+ /* The size of stackmap and stackid_hmap should be the same */
+ key = bpf_get_stackid(ctx, &stackmap, 0);
+ if ((int)key >= 0) {
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+ stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+ if (stack_p)
+ bpf_get_stack(ctx, stack_p, max_len, 0);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
--- /dev/null
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+/* This program shows clang/llvm is able to generate code pattern
+ * like:
+ * _tcp_send_active_reset:
+ * 0: bf 16 00 00 00 00 00 00 r6 = r1
+ * ......
+ * 335: b7 01 00 00 0f 00 00 00 r1 = 15
+ * 336: 05 00 48 00 00 00 00 00 goto 72
+ *
+ * LBB0_3:
+ * 337: b7 01 00 00 01 00 00 00 r1 = 1
+ * 338: 63 1a d0 ff 00 00 00 00 *(u32 *)(r10 - 48) = r1
+ * 408: b7 01 00 00 03 00 00 00 r1 = 3
+ *
+ * LBB0_4:
+ * 409: 71 a2 fe ff 00 00 00 00 r2 = *(u8 *)(r10 - 2)
+ * 410: bf a7 00 00 00 00 00 00 r7 = r10
+ * 411: 07 07 00 00 b8 ff ff ff r7 += -72
+ * 412: bf 73 00 00 00 00 00 00 r3 = r7
+ * 413: 0f 13 00 00 00 00 00 00 r3 += r1
+ * 414: 73 23 2d 00 00 00 00 00 *(u8 *)(r3 + 45) = r2
+ *
+ * From the above code snippet, the code generated by the compiler
+ * is reasonable. The "r1" is assigned to different values in basic
+ * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4".
+ * The verifier should be able to handle such code patterns.
+ */
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/ipv6.h>
+#include <linux/version.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define TCP_ESTATS_MAGIC 0xBAADBEEF
+
+/* This test case needs "sock" and "pt_regs" data structure.
+ * Recursively, "sock" needs "sock_common" and "inet_sock".
+ * However, this is a unit test case only for
+ * verifier purpose without bpf program execution.
+ * We can safely mock much simpler data structures, basically
+ * only taking the necessary fields from kernel headers.
+ */
+typedef __u32 __bitwise __portpair;
+typedef __u64 __bitwise __addrpair;
+
+struct sock_common {
+ unsigned short skc_family;
+ union {
+ __addrpair skc_addrpair;
+ struct {
+ __be32 skc_daddr;
+ __be32 skc_rcv_saddr;
+ };
+ };
+ union {
+ __portpair skc_portpair;
+ struct {
+ __be16 skc_dport;
+ __u16 skc_num;
+ };
+ };
+ struct in6_addr skc_v6_daddr;
+ struct in6_addr skc_v6_rcv_saddr;
+};
+
+struct sock {
+ struct sock_common __sk_common;
+#define sk_family __sk_common.skc_family
+#define sk_v6_daddr __sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
+};
+
+struct inet_sock {
+ struct sock sk;
+#define inet_daddr sk.__sk_common.skc_daddr
+#define inet_dport sk.__sk_common.skc_dport
+ __be32 inet_saddr;
+ __be16 inet_sport;
+};
+
+struct pt_regs {
+ long di;
+};
+
+static inline struct inet_sock *inet_sk(const struct sock *sk)
+{
+ return (struct inet_sock *)sk;
+}
+
+/* Define various data structures for state recording.
+ * Some fields are not used due to test simplification.
+ */
+enum tcp_estats_addrtype {
+ TCP_ESTATS_ADDRTYPE_IPV4 = 1,
+ TCP_ESTATS_ADDRTYPE_IPV6 = 2
+};
+
+enum tcp_estats_event_type {
+ TCP_ESTATS_ESTABLISH,
+ TCP_ESTATS_PERIODIC,
+ TCP_ESTATS_TIMEOUT,
+ TCP_ESTATS_RETRANSMIT_TIMEOUT,
+ TCP_ESTATS_RETRANSMIT_OTHER,
+ TCP_ESTATS_SYN_RETRANSMIT,
+ TCP_ESTATS_SYNACK_RETRANSMIT,
+ TCP_ESTATS_TERM,
+ TCP_ESTATS_TX_RESET,
+ TCP_ESTATS_RX_RESET,
+ TCP_ESTATS_WRITE_TIMEOUT,
+ TCP_ESTATS_CONN_TIMEOUT,
+ TCP_ESTATS_ACK_LATENCY,
+ TCP_ESTATS_NEVENTS,
+};
+
+struct tcp_estats_event {
+ int pid;
+ int cpu;
+ unsigned long ts;
+ unsigned int magic;
+ enum tcp_estats_event_type event_type;
+};
+
+/* The below data structure is packed in order for
+ * llvm compiler to generate expected code.
+ */
+struct tcp_estats_conn_id {
+ unsigned int localaddressType;
+ struct {
+ unsigned char data[16];
+ } localaddress;
+ struct {
+ unsigned char data[16];
+ } remaddress;
+ unsigned short localport;
+ unsigned short remport;
+} __attribute__((__packed__));
+
+struct tcp_estats_basic_event {
+ struct tcp_estats_event event;
+ struct tcp_estats_conn_id conn_id;
+};
+
+struct bpf_map_def SEC("maps") ev_record_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct tcp_estats_basic_event),
+ .max_entries = 1024,
+};
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event,
+ enum tcp_estats_event_type type)
+{
+ event->magic = TCP_ESTATS_MAGIC;
+ event->ts = bpf_ktime_get_ns();
+ event->event_type = type;
+}
+
+static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from)
+{
+ to[0] = _(from[0]);
+ to[1] = _(from[1]);
+ to[2] = _(from[2]);
+ to[3] = _(from[3]);
+}
+
+static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id,
+ __be32 *saddr, __be32 *daddr)
+{
+ conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4;
+
+ unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
+ unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr);
+}
+
+static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id,
+ __be32 *saddr, __be32 *daddr)
+{
+ conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6;
+
+ unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32),
+ (__u8 *)(saddr + 1));
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2,
+ (__u8 *)(saddr + 2));
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3,
+ (__u8 *)(saddr + 3));
+
+ unaligned_u32_set(conn_id->remaddress.data,
+ (__u8 *)(daddr));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32),
+ (__u8 *)(daddr + 1));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2,
+ (__u8 *)(daddr + 2));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3,
+ (__u8 *)(daddr + 3));
+}
+
+static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id,
+ struct sock *sk)
+{
+ conn_id->localport = _(inet_sk(sk)->inet_sport);
+ conn_id->remport = _(inet_sk(sk)->inet_dport);
+
+ if (_(sk->sk_family) == AF_INET6)
+ conn_id_ipv6_init(conn_id,
+ sk->sk_v6_rcv_saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32);
+ else
+ conn_id_ipv4_init(conn_id,
+ &inet_sk(sk)->inet_saddr,
+ &inet_sk(sk)->inet_daddr);
+}
+
+static __always_inline void tcp_estats_init(struct sock *sk,
+ struct tcp_estats_event *event,
+ struct tcp_estats_conn_id *conn_id,
+ enum tcp_estats_event_type type)
+{
+ tcp_estats_ev_init(event, type);
+ tcp_estats_conn_id_init(conn_id, sk);
+}
+
+static __always_inline void send_basic_event(struct sock *sk,
+ enum tcp_estats_event_type type)
+{
+ struct tcp_estats_basic_event ev;
+ __u32 key = bpf_get_prandom_u32();
+
+ memset(&ev, 0, sizeof(ev));
+ tcp_estats_init(sk, &ev.event, &ev.conn_id, type);
+ bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ if (!arg->sock)
+ return 0;
+
+ send_basic_event(arg->sock, TCP_ESTATS_TX_RESET);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_tcpbpf.h"
+
+struct bpf_map_def SEC("maps") global_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct tcpbpf_globals),
+ .max_entries = 4,
+};
+
+struct bpf_map_def SEC("maps") sockopt_results = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(int),
+ .max_entries = 2,
+};
+
+static inline void update_event_map(int event)
+{
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (gp == NULL) {
+ struct tcpbpf_globals g = {0};
+
+ g.event_map |= (1 << event);
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ } else {
+ g = *gp;
+ g.event_map |= (1 << event);
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+}
+
+int _version SEC("version") = 1;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+ char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
+ struct tcphdr *thdr;
+ int good_call_rv = 0;
+ int bad_call_rv = 0;
+ int save_syn = 1;
+ int rv = -1;
+ int v = 0;
+ int op;
+
+ op = (int) skops->op;
+
+ update_event_map(op);
+
+ switch (op) {
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ /* Test failure to set largest cb flag (assumes not defined) */
+ bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
+ /* Set callback */
+ good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+ BPF_SOCK_OPS_STATE_CB_FLAG);
+ /* Update results */
+ {
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ g.bad_cb_test_rv = bad_call_rv;
+ g.good_cb_test_rv = good_call_rv;
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ skops->sk_txhash = 0x12345f;
+ v = 0xff;
+ rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
+ sizeof(v));
+ if (skops->family == AF_INET6) {
+ v = bpf_getsockopt(skops, IPPROTO_TCP, TCP_SAVED_SYN,
+ header, (sizeof(struct ipv6hdr) +
+ sizeof(struct tcphdr)));
+ if (!v) {
+ int offset = sizeof(struct ipv6hdr);
+
+ thdr = (struct tcphdr *)(header + offset);
+ v = thdr->syn;
+ __u32 key = 1;
+
+ bpf_map_update_elem(&sockopt_results, &key, &v,
+ BPF_ANY);
+ }
+ }
+ break;
+ case BPF_SOCK_OPS_RTO_CB:
+ break;
+ case BPF_SOCK_OPS_RETRANS_CB:
+ break;
+ case BPF_SOCK_OPS_STATE_CB:
+ if (skops->args[1] == BPF_TCP_CLOSE) {
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ if (skops->args[0] == BPF_TCP_LISTEN) {
+ g.num_listen++;
+ } else {
+ g.total_retrans = skops->total_retrans;
+ g.data_segs_in = skops->data_segs_in;
+ g.data_segs_out = skops->data_segs_out;
+ g.bytes_received = skops->bytes_received;
+ g.bytes_acked = skops->bytes_acked;
+ }
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+ break;
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+ v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN,
+ &save_syn, sizeof(save_syn));
+ /* Update global map w/ result of setsock opt */
+ __u32 key = 0;
+
+ bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY);
+ break;
+ default:
+ rv = -1;
+ }
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_tcpnotify.h"
+
+struct bpf_map_def SEC("maps") global_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct tcpnotify_globals),
+ .max_entries = 4,
+};
+
+struct bpf_map_def SEC("maps") perf_event_map = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(__u32),
+ .max_entries = 2,
+};
+
+int _version SEC("version") = 1;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+ int rv = -1;
+ int op;
+
+ op = (int) skops->op;
+
+ if (bpf_ntohl(skops->remote_port) != TESTPORT) {
+ skops->reply = -1;
+ return 0;
+ }
+
+ switch (op) {
+ case BPF_SOCK_OPS_TIMEOUT_INIT:
+ case BPF_SOCK_OPS_RWND_INIT:
+ case BPF_SOCK_OPS_NEEDS_ECN:
+ case BPF_SOCK_OPS_BASE_RTT:
+ case BPF_SOCK_OPS_RTO_CB:
+ rv = 1;
+ break;
+
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ bpf_sock_ops_cb_flags_set(skops, (BPF_SOCK_OPS_RETRANS_CB_FLAG|
+ BPF_SOCK_OPS_RTO_CB_FLAG));
+ rv = 1;
+ break;
+ case BPF_SOCK_OPS_RETRANS_CB: {
+ __u32 key = 0;
+ struct tcpnotify_globals g, *gp;
+ struct tcp_notifier msg = {
+ .type = 0xde,
+ .subtype = 0xad,
+ .source = 0xbe,
+ .hash = 0xef,
+ };
+
+ rv = 1;
+
+ /* Update results */
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ g.total_retrans = skops->total_retrans;
+ g.ncalls++;
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ bpf_perf_event_output(skops, &perf_event_map,
+ BPF_F_CURRENT_CPU,
+ &msg, sizeof(msg));
+ }
+ break;
+ default:
+ rv = -1;
+ }
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+ unsigned long long pad;
+ char prev_comm[16];
+ int prev_pid;
+ int prev_prio;
+ long long prev_state;
+ char next_comm[16];
+ int next_pid;
+ int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 VMware
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/pkt_cls.h>
+#include <linux/erspan.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define ERROR(ret) do {\
+ char fmt[] = "ERROR line:%d ret:%d\n";\
+ bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
+ } while (0)
+
+int _version SEC("version") = 1;
+
+struct geneve_opt {
+ __be16 opt_class;
+ __u8 type;
+ __u8 length:5;
+ __u8 r3:1;
+ __u8 r2:1;
+ __u8 r1:1;
+ __u8 opt_data[8]; /* hard-coded to 8 byte */
+};
+
+struct vxlan_metadata {
+ __u32 gbp;
+};
+
+SEC("gre_set_tunnel")
+int _gre_set_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("gre_get_tunnel")
+int _gre_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "key %d remote ip 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+ return TC_ACT_OK;
+}
+
+SEC("ip6gretap_set_tunnel")
+int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+ key.tunnel_label = 0xabcde;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+ BPF_F_SEQ_NUMBER);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6gretap_get_tunnel")
+int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip6 ::%x label %x\n";
+ struct bpf_tunnel_key key;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+ return TC_ACT_OK;
+}
+
+SEC("erspan_set_tunnel")
+int _erspan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&md, 0, sizeof(md));
+#ifdef ERSPAN_V1
+ md.version = 1;
+ md.u.index = bpf_htonl(123);
+#else
+ __u8 direction = 1;
+ __u8 hwid = 7;
+
+ md.version = 2;
+ md.u.md2.dir = direction;
+ md.u.md2.hwid = hwid & 0xf;
+ md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("erspan_get_tunnel")
+int _erspan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ __u32 index;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+ char fmt2[] = "\tindex %x\n";
+
+ index = bpf_ntohl(md.u.index);
+ bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+ char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+ bpf_trace_printk(fmt2, sizeof(fmt2),
+ md.u.md2.dir,
+ (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+ bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+ return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_set_tunnel")
+int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11);
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&md, 0, sizeof(md));
+
+#ifdef ERSPAN_V1
+ md.u.index = bpf_htonl(123);
+ md.version = 1;
+#else
+ __u8 direction = 0;
+ __u8 hwid = 17;
+
+ md.version = 2;
+ md.u.md2.dir = direction;
+ md.u.md2.hwid = hwid & 0xf;
+ md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_get_tunnel")
+int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ __u32 index;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+ char fmt2[] = "\tindex %x\n";
+
+ index = bpf_ntohl(md.u.index);
+ bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+ char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+ bpf_trace_printk(fmt2, sizeof(fmt2),
+ md.u.md2.dir,
+ (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+ bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+ return TC_ACT_OK;
+}
+
+SEC("vxlan_set_tunnel")
+int _vxlan_set_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ struct vxlan_metadata md;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("vxlan_get_tunnel")
+int _vxlan_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ struct vxlan_metadata md;
+ char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.gbp);
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_set_tunnel")
+int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 22;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_get_tunnel")
+int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip6 ::%x label %x\n";
+ struct bpf_tunnel_key key;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+ return TC_ACT_OK;
+}
+
+SEC("geneve_set_tunnel")
+int _geneve_set_tunnel(struct __sk_buff *skb)
+{
+ int ret, ret2;
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ __builtin_memset(&gopt, 0x0, sizeof(gopt));
+ gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt.type = 0x08;
+ gopt.r1 = 0;
+ gopt.r2 = 0;
+ gopt.r3 = 0;
+ gopt.length = 2; /* 4-byte multiple */
+ *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("geneve_get_tunnel")
+int _geneve_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+ return TC_ACT_OK;
+}
+
+SEC("ip6geneve_set_tunnel")
+int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 22;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&gopt, 0x0, sizeof(gopt));
+ gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt.type = 0x08;
+ gopt.r1 = 0;
+ gopt.r2 = 0;
+ gopt.r3 = 0;
+ gopt.length = 2; /* 4-byte multiple */
+ *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+
+ ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6geneve_get_tunnel")
+int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+
+ return TC_ACT_OK;
+}
+
+SEC("ipip_set_tunnel")
+int _ipip_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ struct tcphdr *tcp = data + sizeof(*iph);
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ /* single length check */
+ if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+ ERROR(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.tunnel_ttl = 64;
+ if (iph->protocol == IPPROTO_ICMP) {
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ } else {
+ if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ if (tcp->dest == bpf_htons(5200))
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ else if (tcp->dest == bpf_htons(5201))
+ key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
+ else
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ipip_get_tunnel")
+int _ipip_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "remote ip 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+ return TC_ACT_OK;
+}
+
+SEC("ipip6_set_tunnel")
+int _ipip6_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ struct tcphdr *tcp = data + sizeof(*iph);
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ /* single length check */
+ if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+ ERROR(1);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ipip6_get_tunnel")
+int _ipip6_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "remote ip6 %x::%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
+ return TC_ACT_OK;
+}
+
+SEC("ip6ip6_set_tunnel")
+int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ void *data = (void *)(long)skb->data;
+ struct ipv6hdr *iph = data;
+ struct tcphdr *tcp = data + sizeof(*iph);
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ /* single length check */
+ if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+ ERROR(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.remote_ipv6[0] = bpf_htonl(0x2401db00);
+ key.tunnel_ttl = 64;
+
+ if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
+ key.remote_ipv6[3] = bpf_htonl(1);
+ } else {
+ if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
+ ERROR(iph->nexthdr);
+ return TC_ACT_SHOT;
+ }
+
+ if (tcp->dest == bpf_htons(5200)) {
+ key.remote_ipv6[3] = bpf_htonl(1);
+ } else if (tcp->dest == bpf_htons(5201)) {
+ key.remote_ipv6[3] = bpf_htonl(2);
+ } else {
+ ERROR(tcp->dest);
+ return TC_ACT_SHOT;
+ }
+ }
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6ip6_get_tunnel")
+int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "remote ip6 %x::%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
+ return TC_ACT_OK;
+}
+
+SEC("xfrm_get_state")
+int _xfrm_get_state(struct __sk_buff *skb)
+{
+ struct bpf_xfrm_state x;
+ char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
+ int ret;
+
+ ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
+ if (ret < 0)
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
+ bpf_ntohl(x.remote_ipv4));
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+/* Copyright (c) 2016,2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_iptunnel_common.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") rxcnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct iptnl_info),
+ .max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(__u32 protocol)
+{
+ __u64 *rxcnt_count;
+
+ rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+ if (rxcnt_count)
+ *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+ __u8 protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ if (th + 1 > data_end)
+ return -1;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ if (uh + 1 > data_end)
+ return -1;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+ const struct ethhdr *old_eth,
+ const struct iptnl_info *tnl,
+ __be16 h_proto)
+{
+ memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+ memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+ new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct iphdr *iph = data + sizeof(struct ethhdr);
+ __u16 *next_iph;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+ __u32 csum = 0;
+ int i;
+
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(iph + 1, data_end, iph->protocol);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = iph->protocol;
+ vip.family = AF_INET;
+ vip.daddr.v4 = iph->daddr;
+ vip.dport = dport;
+ payload_len = bpf_ntohs(iph->tot_len);
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v4-in-v4 */
+ if (!tnl || tnl->family != AF_INET)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ iph = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*iph);
+
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end ||
+ iph + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) >> 2;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 0;
+ iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
+ iph->daddr = tnl->daddr.v4;
+ iph->saddr = tnl->saddr.v4;
+ iph->ttl = 8;
+
+ next_iph = (__u16 *)iph;
+#pragma clang loop unroll(full)
+ for (i = 0; i < sizeof(*iph) >> 1; i++)
+ csum += *next_iph++;
+
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = ip6h->nexthdr;
+ vip.family = AF_INET6;
+ memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+ vip.dport = dport;
+ payload_len = ip6h->payload_len;
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v6-in-v6 */
+ if (!tnl || tnl->family != AF_INET6)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ ip6h = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*ip6h);
+
+ if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
+ ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+ ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip6h->hop_limit = 8;
+ memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+ memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return handle_ipv4(xdp);
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+ return handle_ipv6(xdp);
+ else
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+
+#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
+#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
+#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
+
+SEC("t")
+int ing_cls(struct __sk_buff *ctx)
+{
+ __u8 *data, *data_meta, *data_end;
+ __u32 diff = 0;
+
+ data_meta = ctx_ptr(ctx, data_meta);
+ data_end = ctx_ptr(ctx, data_end);
+ data = ctx_ptr(ctx, data);
+
+ if (data + ETH_ALEN > data_end ||
+ data_meta + round_up(ETH_ALEN, 4) > data)
+ return TC_ACT_SHOT;
+
+ diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
+ diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
+
+ return diff ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("x")
+int ing_xdp(struct xdp_md *ctx)
+{
+ __u8 *data, *data_meta, *data_end;
+ int ret;
+
+ ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
+ if (ret < 0)
+ return XDP_DROP;
+
+ data_meta = ctx_ptr(ctx, data_meta);
+ data_end = ctx_ptr(ctx, data_end);
+ data = ctx_ptr(ctx, data);
+
+ if (data + ETH_ALEN > data_end ||
+ data_meta + round_up(ETH_ALEN, 4) > data)
+ return XDP_DROP;
+
+ __builtin_memcpy(data_meta, data, ETH_ALEN);
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static __attribute__ ((noinline))
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static __attribute__ ((noinline))
+u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static __attribute__ ((noinline))
+u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+struct flow_key {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+};
+
+struct packet_description {
+ struct flow_key flow;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_definition {
+ union {
+ __be32 vip;
+ __be32 vipv6[4];
+ };
+ __u16 port;
+ __u16 family;
+ __u8 proto;
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_pos_lru {
+ __u32 pos;
+ __u64 atime;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct lb_stats {
+ __u64 v2;
+ __u64 v1;
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip_definition),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = 512,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
+ .type = BPF_MAP_TYPE_LRU_HASH,
+ .key_size = sizeof(struct flow_key),
+ .value_size = sizeof(struct real_pos_lru),
+ .max_entries = 300,
+ .map_flags = 1U << 1,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 12 * 655,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = 40,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct lb_stats),
+ .max_entries = 515,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = 16,
+ .map_flags = 0,
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[6];
+ unsigned char eth_source[6];
+ unsigned short eth_proto;
+};
+
+static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+{
+ __u64 off = sizeof(struct eth_hdr);
+ if (is_ipv6) {
+ off += sizeof(struct ipv6hdr);
+ if (is_icmp)
+ off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
+ } else {
+ off += sizeof(struct iphdr);
+ if (is_icmp)
+ off += sizeof(struct icmphdr) + sizeof(struct iphdr);
+ }
+ return off;
+}
+
+static __attribute__ ((noinline))
+bool parse_udp(void *data, void *data_end,
+ bool is_ipv6, struct packet_description *pckt)
+{
+
+ bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+ __u64 off = calc_offset(is_ipv6, is_icmp);
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return 0;
+ if (!is_icmp) {
+ pckt->flow.port16[0] = udp->source;
+ pckt->flow.port16[1] = udp->dest;
+ } else {
+ pckt->flow.port16[0] = udp->dest;
+ pckt->flow.port16[1] = udp->source;
+ }
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool parse_tcp(void *data, void *data_end,
+ bool is_ipv6, struct packet_description *pckt)
+{
+
+ bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+ __u64 off = calc_offset(is_ipv6, is_icmp);
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return 0;
+ if (tcp->syn)
+ pckt->flags |= (1 << 1);
+ if (!is_icmp) {
+ pckt->flow.port16[0] = tcp->source;
+ pckt->flow.port16[1] = tcp->dest;
+ } else {
+ pckt->flow.port16[0] = tcp->dest;
+ pckt->flow.port16[1] = tcp->source;
+ }
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
+ struct packet_description *pckt,
+ struct real_definition *dst, __u32 pkt_bytes)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+ struct ipv6hdr *ip6h;
+ __u32 ip_suffix;
+ void *data_end;
+ void *data;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+ return 0;
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ new_eth = data;
+ ip6h = data + sizeof(struct eth_hdr);
+ old_eth = data + sizeof(struct ipv6hdr);
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end || ip6h + 1 > data_end)
+ return 0;
+ memcpy(new_eth->eth_dest, cval->mac, 6);
+ memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 56710;
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
+ ip6h->payload_len =
+ __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
+ ip6h->hop_limit = 4;
+
+ ip6h->saddr.in6_u.u6_addr32[0] = 1;
+ ip6h->saddr.in6_u.u6_addr32[1] = 2;
+ ip6h->saddr.in6_u.u6_addr32[2] = 3;
+ ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
+ memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
+ struct packet_description *pckt,
+ struct real_definition *dst, __u32 pkt_bytes)
+{
+
+ __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+ __u16 *next_iph_u16;
+ struct iphdr *iph;
+ __u32 csum = 0;
+ void *data_end;
+ void *data;
+
+ ip_suffix <<= 15;
+ ip_suffix ^= pckt->flow.src;
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+ return 0;
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ new_eth = data;
+ iph = data + sizeof(struct eth_hdr);
+ old_eth = data + sizeof(struct iphdr);
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end || iph + 1 > data_end)
+ return 0;
+ memcpy(new_eth->eth_dest, cval->mac, 6);
+ memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 8;
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 1;
+ iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
+ /* don't update iph->daddr, since it will overwrite old eth_proto
+ * and multiple iterations of bpf_prog_run() will fail
+ */
+
+ iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
+ iph->ttl = 4;
+
+ next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+ for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+ csum += *next_iph_u16++;
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+ return 0;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+
+ old_eth = *data;
+ new_eth = *data + sizeof(struct ipv6hdr);
+ memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+ memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+ if (inner_v4)
+ new_eth->eth_proto = 8;
+ else
+ new_eth->eth_proto = 56710;
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
+ return 0;
+ *data = (void *)(long)xdp->data;
+ *data_end = (void *)(long)xdp->data_end;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+
+ old_eth = *data;
+ new_eth = *data + sizeof(struct iphdr);
+ memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+ memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 8;
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+ return 0;
+ *data = (void *)(long)xdp->data;
+ *data_end = (void *)(long)xdp->data_end;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+int swap_mac_and_send(void *data, void *data_end)
+{
+ unsigned char tmp_mac[6];
+ struct eth_hdr *eth;
+
+ eth = data;
+ memcpy(tmp_mac, eth->eth_source, 6);
+ memcpy(eth->eth_source, eth->eth_dest, 6);
+ memcpy(eth->eth_dest, tmp_mac, 6);
+ return XDP_TX;
+}
+
+static __attribute__ ((noinline))
+int send_icmp_reply(void *data, void *data_end)
+{
+ struct icmphdr *icmp_hdr;
+ __u16 *next_iph_u16;
+ __u32 tmp_addr = 0;
+ struct iphdr *iph;
+ __u32 csum1 = 0;
+ __u32 csum = 0;
+ __u64 off = 0;
+
+ if (data + sizeof(struct eth_hdr)
+ + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
+ return XDP_DROP;
+ off += sizeof(struct eth_hdr);
+ iph = data + off;
+ off += sizeof(struct iphdr);
+ icmp_hdr = data + off;
+ icmp_hdr->type = 0;
+ icmp_hdr->checksum += 0x0007;
+ iph->ttl = 4;
+ tmp_addr = iph->daddr;
+ iph->daddr = iph->saddr;
+ iph->saddr = tmp_addr;
+ iph->check = 0;
+ next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+ for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+ csum += *next_iph_u16++;
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+ return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int send_icmp6_reply(void *data, void *data_end)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+ __be32 tmp_addr[4];
+ __u64 off = 0;
+
+ if (data + sizeof(struct eth_hdr)
+ + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
+ return XDP_DROP;
+ off += sizeof(struct eth_hdr);
+ ip6h = data + off;
+ off += sizeof(struct ipv6hdr);
+ icmp_hdr = data + off;
+ icmp_hdr->icmp6_type = 129;
+ icmp_hdr->icmp6_cksum -= 0x0001;
+ ip6h->hop_limit = 4;
+ memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
+ memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
+ memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
+ return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return XDP_DROP;
+ if (icmp_hdr->icmp6_type == 128)
+ return send_icmp6_reply(data, data_end);
+ if (icmp_hdr->icmp6_type != 3)
+ return XDP_PASS;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+ pckt->flow.proto = ip6h->nexthdr;
+ pckt->flags |= (1 << 0);
+ memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
+ memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
+ return -1;
+}
+
+static __attribute__ ((noinline))
+int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return XDP_DROP;
+ if (icmp_hdr->type == 8)
+ return send_icmp_reply(data, data_end);
+ if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
+ return XDP_PASS;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+ if (iph->ihl != 5)
+ return XDP_DROP;
+ pckt->flow.proto = iph->protocol;
+ pckt->flags |= (1 << 0);
+ pckt->flow.src = iph->daddr;
+ pckt->flow.dst = iph->saddr;
+ return -1;
+}
+
+static __attribute__ ((noinline))
+__u32 get_packet_hash(struct packet_description *pckt,
+ bool hash_16bytes)
+{
+ if (hash_16bytes)
+ return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
+ pckt->flow.ports, 24);
+ else
+ return jhash_2words(pckt->flow.src, pckt->flow.ports,
+ 24);
+}
+
+__attribute__ ((noinline))
+static bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6, void *lru_map)
+{
+ struct real_pos_lru new_dst_lru = { };
+ bool hash_16bytes = is_ipv6;
+ __u32 *real_pos, hash, key;
+ __u64 cur_time;
+
+ if (vip_info->flags & (1 << 2))
+ hash_16bytes = 1;
+ if (vip_info->flags & (1 << 3)) {
+ pckt->flow.port16[0] = pckt->flow.port16[1];
+ memset(pckt->flow.srcv6, 0, 16);
+ }
+ hash = get_packet_hash(pckt, hash_16bytes);
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return 0;
+ key = 2 * vip_info->vip_num + hash % 2;
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return 0;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return 0;
+ if (!(vip_info->flags & (1 << 1))) {
+ __u32 conn_rate_key = 512 + 2;
+ struct lb_stats *conn_rate_stats =
+ bpf_map_lookup_elem(&stats, &conn_rate_key);
+
+ if (!conn_rate_stats)
+ return 1;
+ cur_time = bpf_ktime_get_ns();
+ if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
+ conn_rate_stats->v1 = 1;
+ conn_rate_stats->v2 = cur_time;
+ } else {
+ conn_rate_stats->v1 += 1;
+ if (conn_rate_stats->v1 >= 1)
+ return 1;
+ }
+ if (pckt->flow.proto == IPPROTO_UDP)
+ new_dst_lru.atime = cur_time;
+ new_dst_lru.pos = key;
+ bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
+ }
+ return 1;
+}
+
+__attribute__ ((noinline))
+static void connection_table_lookup(struct real_definition **real,
+ struct packet_description *pckt,
+ void *lru_map)
+{
+
+ struct real_pos_lru *dst_lru;
+ __u64 cur_time;
+ __u32 key;
+
+ dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
+ if (!dst_lru)
+ return;
+ if (pckt->flow.proto == IPPROTO_UDP) {
+ cur_time = bpf_ktime_get_ns();
+ if (cur_time - dst_lru->atime > 300000)
+ return;
+ dst_lru->atime = cur_time;
+ }
+ key = dst_lru->pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+}
+
+/* don't believe your eyes!
+ * below function has 6 arguments whereas bpf and llvm allow maximum of 5
+ * but since it's _static_ llvm can optimize one argument away
+ */
+__attribute__ ((noinline))
+static int process_l3_headers_v6(struct packet_description *pckt,
+ __u8 *protocol, __u64 off,
+ __u16 *pkt_bytes, void *data,
+ void *data_end)
+{
+ struct ipv6hdr *ip6h;
+ __u64 iph_len;
+ int action;
+
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+ iph_len = sizeof(struct ipv6hdr);
+ *protocol = ip6h->nexthdr;
+ pckt->flow.proto = *protocol;
+ *pkt_bytes = __builtin_bswap16(ip6h->payload_len);
+ off += iph_len;
+ if (*protocol == 45) {
+ return XDP_DROP;
+ } else if (*protocol == 59) {
+ action = parse_icmpv6(data, data_end, off, pckt);
+ if (action >= 0)
+ return action;
+ } else {
+ memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
+ memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
+ }
+ return -1;
+}
+
+__attribute__ ((noinline))
+static int process_l3_headers_v4(struct packet_description *pckt,
+ __u8 *protocol, __u64 off,
+ __u16 *pkt_bytes, void *data,
+ void *data_end)
+{
+ struct iphdr *iph;
+ __u64 iph_len;
+ int action;
+
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+ if (iph->ihl != 5)
+ return XDP_DROP;
+ *protocol = iph->protocol;
+ pckt->flow.proto = *protocol;
+ *pkt_bytes = __builtin_bswap16(iph->tot_len);
+ off += 20;
+ if (iph->frag_off & 65343)
+ return XDP_DROP;
+ if (*protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, pckt);
+ if (action >= 0)
+ return action;
+ } else {
+ pckt->flow.src = iph->saddr;
+ pckt->flow.dst = iph->daddr;
+ }
+ return -1;
+}
+
+__attribute__ ((noinline))
+static int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct xdp_md *xdp)
+{
+
+ struct real_definition *dst = NULL;
+ struct packet_description pckt = { };
+ struct vip_definition vip = { };
+ struct lb_stats *data_stats;
+ struct eth_hdr *eth = data;
+ void *lru_map = &lru_cache;
+ struct vip_meta *vip_info;
+ __u32 lru_stats_key = 513;
+ __u32 mac_addr_pos = 0;
+ __u32 stats_key = 512;
+ struct ctl_value *cval;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ if (is_ipv6)
+ action = process_l3_headers_v6(&pckt, &protocol, off,
+ &pkt_bytes, data, data_end);
+ else
+ action = process_l3_headers_v4(&pckt, &protocol, off,
+ &pkt_bytes, data, data_end);
+ if (action >= 0)
+ return action;
+ protocol = pckt.flow.proto;
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, data_end, is_ipv6, &pckt))
+ return XDP_DROP;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, data_end, is_ipv6, &pckt))
+ return XDP_DROP;
+ } else {
+ return XDP_TX;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.vipv6, pckt.flow.dstv6, 16);
+ else
+ vip.vip = pckt.flow.dst;
+ vip.port = pckt.flow.port16[1];
+ vip.proto = pckt.flow.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.port = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return XDP_PASS;
+ if (!(vip_info->flags & (1 << 4)))
+ pckt.flow.port16[1] = 0;
+ }
+ if (data_end - data > 1400)
+ return XDP_DROP;
+ data_stats = bpf_map_lookup_elem(&stats, &stats_key);
+ if (!data_stats)
+ return XDP_DROP;
+ data_stats->v1 += 1;
+ if (!dst) {
+ if (vip_info->flags & (1 << 0))
+ pckt.flow.port16[0] = 0;
+ if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
+ connection_table_lookup(&dst, &pckt, lru_map);
+ if (dst)
+ goto out;
+ if (pckt.flow.proto == IPPROTO_TCP) {
+ struct lb_stats *lru_stats =
+ bpf_map_lookup_elem(&stats, &lru_stats_key);
+
+ if (!lru_stats)
+ return XDP_DROP;
+ if (pckt.flags & (1 << 1))
+ lru_stats->v1 += 1;
+ else
+ lru_stats->v2 += 1;
+ }
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
+ return XDP_DROP;
+ data_stats->v2 += 1;
+ }
+out:
+ cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
+ if (!cval)
+ return XDP_DROP;
+ if (dst->flags & (1 << 0)) {
+ if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
+ return XDP_DROP;
+ } else {
+ if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
+ return XDP_DROP;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return XDP_DROP;
+ data_stats->v1 += 1;
+ data_stats->v2 += pkt_bytes;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ if (data + 4 > data_end)
+ return XDP_DROP;
+ *(u32 *)data = dst->dst;
+ return XDP_DROP;
+}
+
+__attribute__ ((section("xdp-test"), used))
+int balancer_ingress(struct xdp_md *ctx)
+{
+ void *data = (void *)(long)ctx->data;
+ void *data_end = (void *)(long)ctx->data_end;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == 8)
+ return process_packet(data, nh_off, data_end, 0, ctx);
+ else if (eth_proto == 56710)
+ return process_packet(data, nh_off, data_end, 1, ctx);
+ else
+ return XDP_DROP;
+}
+
+char _license[] __attribute__ ((section("license"), used)) = "GPL";
+int _version __attribute__ ((section("version"), used)) = 1;
--- /dev/null
+/* Copyright (c) 2017 VMware
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("redirect_to_111")
+int xdp_redirect_to_111(struct xdp_md *xdp)
+{
+ return bpf_redirect(111, 0);
+}
+SEC("redirect_to_222")
+int xdp_redirect_to_222(struct xdp_md *xdp)
+{
+ return bpf_redirect(222, 0);
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2018 Jesper Dangaard Brouer.
+ *
+ * XDP/TC VLAN manipulation example
+ *
+ * GOTCHA: Remember to disable NIC hardware offloading of VLANs,
+ * else the VLAN tags are NOT inlined in the packet payload:
+ *
+ * # ethtool -K ixgbe2 rxvlan off
+ *
+ * Verify setting:
+ * # ethtool -k ixgbe2 | grep rx-vlan-offload
+ * rx-vlan-offload: off
+ *
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
+ *
+ * struct vlan_hdr - vlan header
+ * @h_vlan_TCI: priority and VLAN ID
+ * @h_vlan_encapsulated_proto: packet type ID or len
+ */
+struct _vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */
+#define VLAN_PRIO_SHIFT 13
+#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */
+#define VLAN_TAG_PRESENT VLAN_CFI_MASK
+#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */
+#define VLAN_N_VID 4096
+
+struct parse_pkt {
+ __u16 l3_proto;
+ __u16 l3_offset;
+ __u16 vlan_outer;
+ __u16 vlan_inner;
+ __u8 vlan_outer_offset;
+ __u8 vlan_inner_offset;
+};
+
+char _license[] SEC("license") = "GPL";
+
+static __always_inline
+bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
+{
+ __u16 eth_type;
+ __u8 offset;
+
+ offset = sizeof(*eth);
+ /* Make sure packet is large enough for parsing eth + 2 VLAN headers */
+ if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end)
+ return false;
+
+ eth_type = eth->h_proto;
+
+ /* Handle outer VLAN tag */
+ if (eth_type == bpf_htons(ETH_P_8021Q)
+ || eth_type == bpf_htons(ETH_P_8021AD)) {
+ struct _vlan_hdr *vlan_hdr;
+
+ vlan_hdr = (void *)eth + offset;
+ pkt->vlan_outer_offset = offset;
+ pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI)
+ & VLAN_VID_MASK;
+ eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+ offset += sizeof(*vlan_hdr);
+ }
+
+ /* Handle inner (double) VLAN tag */
+ if (eth_type == bpf_htons(ETH_P_8021Q)
+ || eth_type == bpf_htons(ETH_P_8021AD)) {
+ struct _vlan_hdr *vlan_hdr;
+
+ vlan_hdr = (void *)eth + offset;
+ pkt->vlan_inner_offset = offset;
+ pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI)
+ & VLAN_VID_MASK;
+ eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+ offset += sizeof(*vlan_hdr);
+ }
+
+ pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */
+ pkt->l3_offset = offset;
+
+ return true;
+}
+
+/* Hint, VLANs are choosen to hit network-byte-order issues */
+#define TESTVLAN 4011 /* 0xFAB */
+// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */
+
+SEC("xdp_drop_vlan_4011")
+int xdp_prognum0(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct parse_pkt pkt = { 0 };
+
+ if (!parse_eth_frame(data, data_end, &pkt))
+ return XDP_ABORTED;
+
+ /* Drop specific VLAN ID example */
+ if (pkt.vlan_outer == TESTVLAN)
+ return XDP_ABORTED;
+ /*
+ * Using XDP_ABORTED makes it possible to record this event,
+ * via tracepoint xdp:xdp_exception like:
+ * # perf record -a -e xdp:xdp_exception
+ * # perf script
+ */
+ return XDP_PASS;
+}
+/*
+Commands to setup VLAN on Linux to test packets gets dropped:
+
+ export ROOTDEV=ixgbe2
+ export VLANID=4011
+ ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID
+ ip link set dev $ROOTDEV.$VLANID up
+
+ ip link set dev $ROOTDEV mtu 1508
+ ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID
+
+Load prog with ip tool:
+
+ ip link set $ROOTDEV xdp off
+ ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011
+
+*/
+
+/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
+#define TO_VLAN 0
+
+SEC("xdp_vlan_change")
+int xdp_prognum1(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct parse_pkt pkt = { 0 };
+
+ if (!parse_eth_frame(data, data_end, &pkt))
+ return XDP_ABORTED;
+
+ /* Change specific VLAN ID */
+ if (pkt.vlan_outer == TESTVLAN) {
+ struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset;
+
+ /* Modifying VLAN, preserve top 4 bits */
+ vlan_hdr->h_vlan_TCI =
+ bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
+ | TO_VLAN);
+ }
+
+ return XDP_PASS;
+}
+
+/*
+ * Show XDP+TC can cooperate, on creating a VLAN rewriter.
+ * 1. Create a XDP prog that can "pop"/remove a VLAN header.
+ * 2. Create a TC-bpf prog that egress can add a VLAN header.
+ */
+
+#ifndef ETH_ALEN /* Ethernet MAC address length */
+#define ETH_ALEN 6 /* bytes */
+#endif
+#define VLAN_HDR_SZ 4 /* bytes */
+
+SEC("xdp_vlan_remove_outer")
+int xdp_prognum2(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct parse_pkt pkt = { 0 };
+ char *dest;
+
+ if (!parse_eth_frame(data, data_end, &pkt))
+ return XDP_ABORTED;
+
+ /* Skip packet if no outer VLAN was detected */
+ if (pkt.vlan_outer_offset == 0)
+ return XDP_PASS;
+
+ /* Moving Ethernet header, dest overlap with src, memmove handle this */
+ dest = data;
+ dest+= VLAN_HDR_SZ;
+ /*
+ * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
+ * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
+ */
+ __builtin_memmove(dest, data, ETH_ALEN * 2);
+ /* Note: LLVM built-in memmove inlining require size to be constant */
+
+ /* Move start of packet header seen by Linux kernel stack */
+ bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
+
+ return XDP_PASS;
+}
+
+static __always_inline
+void shift_mac_4bytes_16bit(void *data)
+{
+ __u16 *p = data;
+
+ p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
+ p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
+ p[5] = p[3];
+ p[4] = p[2];
+ p[3] = p[1];
+ p[2] = p[0];
+}
+
+static __always_inline
+void shift_mac_4bytes_32bit(void *data)
+{
+ __u32 *p = data;
+
+ /* Assuming VLAN hdr present. The 4 bytes in p[3] that gets
+ * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI.
+ * The vlan_hdr->h_vlan_encapsulated_proto take over role as
+ * ethhdr->h_proto.
+ */
+ p[3] = p[2];
+ p[2] = p[1];
+ p[1] = p[0];
+}
+
+SEC("xdp_vlan_remove_outer2")
+int xdp_prognum3(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *orig_eth = data;
+ struct parse_pkt pkt = { 0 };
+
+ if (!parse_eth_frame(orig_eth, data_end, &pkt))
+ return XDP_ABORTED;
+
+ /* Skip packet if no outer VLAN was detected */
+ if (pkt.vlan_outer_offset == 0)
+ return XDP_PASS;
+
+ /* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */
+ shift_mac_4bytes_32bit(data);
+
+ /* Move start of packet header seen by Linux kernel stack */
+ bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
+
+ return XDP_PASS;
+}
+
+/*=====================================
+ * BELOW: TC-hook based ebpf programs
+ * ====================================
+ * The TC-clsact eBPF programs (currently) need to be attach via TC commands
+ */
+
+SEC("tc_vlan_push")
+int _tc_progA(struct __sk_buff *ctx)
+{
+ bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
+
+ return TC_ACT_OK;
+}
+/*
+Commands to setup TC to use above bpf prog:
+
+export ROOTDEV=ixgbe2
+export FILE=xdp_vlan01_kern.o
+
+# Re-attach clsact to clear/flush existing role
+tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\
+tc qdisc add dev $ROOTDEV clsact
+
+# Attach BPF prog EGRESS
+tc filter add dev $ROOTDEV egress \
+ prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+
+tc filter show dev $ROOTDEV egress
+*/
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#define KBUILD_MODNAME "xdp_dummy"
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
+++ /dev/null
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-
-struct bpf_map_def SEC("maps") htab = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(__u32),
- .value_size = sizeof(long),
- .max_entries = 2,
-};
-
-struct bpf_map_def SEC("maps") array = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(long),
- .max_entries = 2,
-};
-
-/* Sample program which should always load for testing control paths. */
-SEC(".text") int func()
-{
- __u64 key64 = 0;
- __u32 key = 0;
- long *value;
-
- value = bpf_map_lookup_elem(&htab, &key);
- if (!value)
- return 1;
- value = bpf_map_lookup_elem(&array, &key64);
- if (!value)
- return 1;
-
- return 0;
-}
+++ /dev/null
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
-
-/* Sample program which should always load for testing control paths. */
-int func()
-{
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <sys/socket.h>
-
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */
-#define SRC2_IP4 0x00000000U
-#define SRC_REWRITE_IP4 0x7f000004U
-#define DST_IP4 0xC0A801FEU /* 192.168.1.254 */
-#define DST_REWRITE_IP4 0x7f000001U
-#define DST_PORT 4040
-#define DST_REWRITE_PORT4 4444
-
-int _version SEC("version") = 1;
-
-SEC("cgroup/sendmsg4")
-int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
-{
- if (ctx->type != SOCK_DGRAM)
- return 0;
-
- /* Rewrite source. */
- if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) ||
- ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) {
- ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4);
- } else {
- /* Unexpected source. Reject sendmsg. */
- return 0;
- }
-
- /* Rewrite destination. */
- if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&
- ctx->user_port == bpf_htons(DST_PORT)) {
- ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
- ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
- } else {
- /* Unexpected source. Reject sendmsg. */
- return 0;
- }
-
- return 1;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <sys/socket.h>
-
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-#define SRC_REWRITE_IP6_0 0
-#define SRC_REWRITE_IP6_1 0
-#define SRC_REWRITE_IP6_2 0
-#define SRC_REWRITE_IP6_3 6
-
-#define DST_REWRITE_IP6_0 0
-#define DST_REWRITE_IP6_1 0
-#define DST_REWRITE_IP6_2 0
-#define DST_REWRITE_IP6_3 1
-
-#define DST_REWRITE_PORT6 6666
-
-int _version SEC("version") = 1;
-
-SEC("cgroup/sendmsg6")
-int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
-{
- if (ctx->type != SOCK_DGRAM)
- return 0;
-
- /* Rewrite source. */
- if (ctx->msg_src_ip6[3] == bpf_htonl(1) ||
- ctx->msg_src_ip6[3] == bpf_htonl(0)) {
- ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
- ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
- ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
- ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
- } else {
- /* Unexpected source. Reject sendmsg. */
- return 0;
- }
-
- /* Rewrite destination. */
- if ((ctx->user_ip6[0] & 0xFFFF) == bpf_htons(0xFACE) &&
- ctx->user_ip6[0] >> 16 == bpf_htons(0xB00C)) {
- ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
- ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
- ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
- ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
-
- ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
- } else {
- /* Unexpected destination. Reject sendmsg. */
- return 0;
- }
-
- return 1;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <linux/bpf.h>
-#include <sys/socket.h>
-
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-struct bpf_map_def SEC("maps") socket_cookies = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(__u64),
- .value_size = sizeof(__u32),
- .max_entries = 1 << 8,
-};
-
-SEC("cgroup/connect6")
-int set_cookie(struct bpf_sock_addr *ctx)
-{
- __u32 cookie_value = 0xFF;
- __u64 cookie_key;
-
- if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
- return 1;
-
- cookie_key = bpf_get_socket_cookie(ctx);
- if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0))
- return 0;
-
- return 1;
-}
-
-SEC("sockops")
-int update_cookie(struct bpf_sock_ops *ctx)
-{
- __u32 new_cookie_value;
- __u32 *cookie_value;
- __u64 cookie_key;
-
- if (ctx->family != AF_INET6)
- return 1;
-
- if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
- return 1;
-
- cookie_key = bpf_get_socket_cookie(ctx);
-
- cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key);
- if (!cookie_value)
- return 1;
-
- new_cookie_value = (ctx->local_port << 8) | *cookie_value;
- bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
-
- return 1;
-}
-
-int _version SEC("version") = 1;
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_util.h"
-#include "bpf_endian.h"
-
-int _version SEC("version") = 1;
-
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
-SEC("sk_skb1")
-int bpf_prog1(struct __sk_buff *skb)
-{
- void *data_end = (void *)(long) skb->data_end;
- void *data = (void *)(long) skb->data;
- __u32 lport = skb->local_port;
- __u32 rport = skb->remote_port;
- __u8 *d = data;
- __u32 len = (__u32) data_end - (__u32) data;
- int err;
-
- if (data + 10 > data_end) {
- err = bpf_skb_pull_data(skb, 10);
- if (err)
- return SK_DROP;
-
- data_end = (void *)(long)skb->data_end;
- data = (void *)(long)skb->data;
- if (data + 10 > data_end)
- return SK_DROP;
- }
-
- /* This write/read is a bit pointless but tests the verifier and
- * strparser handler for read/write pkt data and access into sk
- * fields.
- */
- d = data;
- d[7] = 1;
- return skb->len;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_util.h"
-#include "bpf_endian.h"
-
-int _version SEC("version") = 1;
-
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
-SEC("sk_msg1")
-int bpf_prog1(struct sk_msg_md *msg)
-{
- void *data_end = (void *)(long) msg->data_end;
- void *data = (void *)(long) msg->data;
-
- char *d;
-
- if (data + 8 > data_end)
- return SK_DROP;
-
- bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
- d = (char *)data;
- bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
-
- return SK_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_util.h"
-#include "bpf_endian.h"
-
-int _version SEC("version") = 1;
-
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
-struct bpf_map_def SEC("maps") sock_map_rx = {
- .type = BPF_MAP_TYPE_SOCKMAP,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_tx = {
- .type = BPF_MAP_TYPE_SOCKMAP,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_msg = {
- .type = BPF_MAP_TYPE_SOCKMAP,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_break = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 20,
-};
-
-SEC("sk_skb2")
-int bpf_prog2(struct __sk_buff *skb)
-{
- void *data_end = (void *)(long) skb->data_end;
- void *data = (void *)(long) skb->data;
- __u32 lport = skb->local_port;
- __u32 rport = skb->remote_port;
- __u8 *d = data;
- __u8 sk, map;
-
- if (data + 8 > data_end)
- return SK_DROP;
-
- map = d[0];
- sk = d[1];
-
- d[0] = 0xd;
- d[1] = 0xe;
- d[2] = 0xa;
- d[3] = 0xd;
- d[4] = 0xb;
- d[5] = 0xe;
- d[6] = 0xe;
- d[7] = 0xf;
-
- if (!map)
- return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
- return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include "bpf_helpers.h"
-
-int _version SEC("version") = 1;
-
-SEC("xdp_adjust_tail")
-int _xdp_adjust_tail(struct xdp_md *xdp)
-{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
- int offset = 0;
-
- if (data_end - data == 54)
- offset = 256;
- else
- offset = 20;
- if (bpf_xdp_adjust_tail(xdp, 0 - offset))
- return XDP_DROP;
- return XDP_TX;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2018 Facebook */
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-
-int _version SEC("version") = 1;
-
-struct ipv_counts {
- unsigned int v4;
- unsigned int v6;
-};
-
-struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
-};
-
-BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
-
-struct dummy_tracepoint_args {
- unsigned long long pad;
- struct sock *sock;
-};
-
-__attribute__((noinline))
-static int test_long_fname_2(struct dummy_tracepoint_args *arg)
-{
- struct ipv_counts *counts;
- int key = 0;
-
- if (!arg->sock)
- return 0;
-
- counts = bpf_map_lookup_elem(&btf_map, &key);
- if (!counts)
- return 0;
-
- counts->v6++;
-
- return 0;
-}
-
-__attribute__((noinline))
-static int test_long_fname_1(struct dummy_tracepoint_args *arg)
-{
- return test_long_fname_2(arg);
-}
-
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
-{
- return test_long_fname_1(arg);
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2018 Facebook */
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-
-int _version SEC("version") = 1;
-
-struct ipv_counts {
- unsigned int v4;
- unsigned int v6;
-};
-
-struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
-};
-
-struct dummy_tracepoint_args {
- unsigned long long pad;
- struct sock *sock;
-};
-
-__attribute__((noinline))
-static int test_long_fname_2(struct dummy_tracepoint_args *arg)
-{
- struct ipv_counts *counts;
- int key = 0;
-
- if (!arg->sock)
- return 0;
-
- counts = bpf_map_lookup_elem(&btf_map, &key);
- if (!counts)
- return 0;
-
- counts->v6++;
-
- return 0;
-}
-
-__attribute__((noinline))
-static int test_long_fname_1(struct dummy_tracepoint_args *arg)
-{
- return test_long_fname_2(arg);
-}
-
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
-{
- return test_long_fname_1(arg);
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-
-/* Permit pretty deep stack traces */
-#define MAX_STACK_RAWTP 100
-struct stack_trace_t {
- int pid;
- int kern_stack_size;
- int user_stack_size;
- int user_stack_buildid_size;
- __u64 kern_stack[MAX_STACK_RAWTP];
- __u64 user_stack[MAX_STACK_RAWTP];
- struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
-};
-
-struct bpf_map_def SEC("maps") perfmap = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(__u32),
- .max_entries = 2,
-};
-
-struct bpf_map_def SEC("maps") stackdata_map = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct stack_trace_t),
- .max_entries = 1,
-};
-
-/* Allocate per-cpu space twice the needed. For the code below
- * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
- * if (usize < 0)
- * return 0;
- * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
- *
- * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
- * verifier will complain that access "raw_data + usize"
- * with size "max_len - usize" may be out of bound.
- * The maximum "raw_data + usize" is "raw_data + max_len"
- * and the maximum "max_len - usize" is "max_len", verifier
- * concludes that the maximum buffer access range is
- * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
- *
- * Doubling the to-be-used max buffer size can fix this verifier
- * issue and avoid complicated C programming massaging.
- * This is an acceptable workaround since there is one entry here.
- */
-struct bpf_map_def SEC("maps") rawdata_map = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
- .max_entries = 1,
-};
-
-SEC("tracepoint/raw_syscalls/sys_enter")
-int bpf_prog1(void *ctx)
-{
- int max_len, max_buildid_len, usize, ksize, total_size;
- struct stack_trace_t *data;
- void *raw_data;
- __u32 key = 0;
-
- data = bpf_map_lookup_elem(&stackdata_map, &key);
- if (!data)
- return 0;
-
- max_len = MAX_STACK_RAWTP * sizeof(__u64);
- max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
- data->pid = bpf_get_current_pid_tgid();
- data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
- max_len, 0);
- data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
- BPF_F_USER_STACK);
- data->user_stack_buildid_size = bpf_get_stack(
- ctx, data->user_stack_buildid, max_buildid_len,
- BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
- bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
-
- /* write both kernel and user stacks to the same buffer */
- raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
- if (!raw_data)
- return 0;
-
- usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
- if (usize < 0)
- return 0;
-
- ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
- if (ksize < 0)
- return 0;
-
- total_size = usize + ksize;
- if (total_size > 0 && total_size <= max_len)
- bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
-
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
+++ /dev/null
-/* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <stddef.h>
-#include <stdbool.h>
-#include <string.h>
-#include <linux/pkt_cls.h>
-#include <linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/icmp.h>
-#include <linux/icmpv6.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include "bpf_helpers.h"
-#include "test_iptunnel_common.h"
-#include "bpf_endian.h"
-
-int _version SEC("version") = 1;
-
-static inline __u32 rol32(__u32 word, unsigned int shift)
-{
- return (word << shift) | (word >> ((-shift) & 31));
-}
-
-/* copy paste of jhash from kernel sources to make sure llvm
- * can compile it into valid sequence of bpf instructions
- */
-#define __jhash_mix(a, b, c) \
-{ \
- a -= c; a ^= rol32(c, 4); c += b; \
- b -= a; b ^= rol32(a, 6); a += c; \
- c -= b; c ^= rol32(b, 8); b += a; \
- a -= c; a ^= rol32(c, 16); c += b; \
- b -= a; b ^= rol32(a, 19); a += c; \
- c -= b; c ^= rol32(b, 4); b += a; \
-}
-
-#define __jhash_final(a, b, c) \
-{ \
- c ^= b; c -= rol32(b, 14); \
- a ^= c; a -= rol32(c, 11); \
- b ^= a; b -= rol32(a, 25); \
- c ^= b; c -= rol32(b, 16); \
- a ^= c; a -= rol32(c, 4); \
- b ^= a; b -= rol32(a, 14); \
- c ^= b; c -= rol32(b, 24); \
-}
-
-#define JHASH_INITVAL 0xdeadbeef
-
-typedef unsigned int u32;
-
-static inline u32 jhash(const void *key, u32 length, u32 initval)
-{
- u32 a, b, c;
- const unsigned char *k = key;
-
- a = b = c = JHASH_INITVAL + length + initval;
-
- while (length > 12) {
- a += *(u32 *)(k);
- b += *(u32 *)(k + 4);
- c += *(u32 *)(k + 8);
- __jhash_mix(a, b, c);
- length -= 12;
- k += 12;
- }
- switch (length) {
- case 12: c += (u32)k[11]<<24;
- case 11: c += (u32)k[10]<<16;
- case 10: c += (u32)k[9]<<8;
- case 9: c += k[8];
- case 8: b += (u32)k[7]<<24;
- case 7: b += (u32)k[6]<<16;
- case 6: b += (u32)k[5]<<8;
- case 5: b += k[4];
- case 4: a += (u32)k[3]<<24;
- case 3: a += (u32)k[2]<<16;
- case 2: a += (u32)k[1]<<8;
- case 1: a += k[0];
- __jhash_final(a, b, c);
- case 0: /* Nothing left to add */
- break;
- }
-
- return c;
-}
-
-static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
-{
- a += initval;
- b += initval;
- c += initval;
- __jhash_final(a, b, c);
- return c;
-}
-
-static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
-{
- return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
-}
-
-#define PCKT_FRAGMENTED 65343
-#define IPV4_HDR_LEN_NO_OPT 20
-#define IPV4_PLUS_ICMP_HDR 28
-#define IPV6_PLUS_ICMP_HDR 48
-#define RING_SIZE 2
-#define MAX_VIPS 12
-#define MAX_REALS 5
-#define CTL_MAP_SIZE 16
-#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
-#define F_IPV6 (1 << 0)
-#define F_HASH_NO_SRC_PORT (1 << 0)
-#define F_ICMP (1 << 0)
-#define F_SYN_SET (1 << 1)
-
-struct packet_description {
- union {
- __be32 src;
- __be32 srcv6[4];
- };
- union {
- __be32 dst;
- __be32 dstv6[4];
- };
- union {
- __u32 ports;
- __u16 port16[2];
- };
- __u8 proto;
- __u8 flags;
-};
-
-struct ctl_value {
- union {
- __u64 value;
- __u32 ifindex;
- __u8 mac[6];
- };
-};
-
-struct vip_meta {
- __u32 flags;
- __u32 vip_num;
-};
-
-struct real_definition {
- union {
- __be32 dst;
- __be32 dstv6[4];
- };
- __u8 flags;
-};
-
-struct vip_stats {
- __u64 bytes;
- __u64 pkts;
-};
-
-struct eth_hdr {
- unsigned char eth_dest[ETH_ALEN];
- unsigned char eth_source[ETH_ALEN];
- unsigned short eth_proto;
-};
-
-struct bpf_map_def SEC("maps") vip_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct vip),
- .value_size = sizeof(struct vip_meta),
- .max_entries = MAX_VIPS,
-};
-
-struct bpf_map_def SEC("maps") ch_rings = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = CH_RINGS_SIZE,
-};
-
-struct bpf_map_def SEC("maps") reals = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct real_definition),
- .max_entries = MAX_REALS,
-};
-
-struct bpf_map_def SEC("maps") stats = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct vip_stats),
- .max_entries = MAX_VIPS,
-};
-
-struct bpf_map_def SEC("maps") ctl_array = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct ctl_value),
- .max_entries = CTL_MAP_SIZE,
-};
-
-static __always_inline __u32 get_packet_hash(struct packet_description *pckt,
- bool ipv6)
-{
- if (ipv6)
- return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
- pckt->ports, CH_RINGS_SIZE);
- else
- return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
-}
-
-static __always_inline bool get_packet_dst(struct real_definition **real,
- struct packet_description *pckt,
- struct vip_meta *vip_info,
- bool is_ipv6)
-{
- __u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE;
- __u32 key = RING_SIZE * vip_info->vip_num + hash;
- __u32 *real_pos;
-
- real_pos = bpf_map_lookup_elem(&ch_rings, &key);
- if (!real_pos)
- return false;
- key = *real_pos;
- *real = bpf_map_lookup_elem(&reals, &key);
- if (!(*real))
- return false;
- return true;
-}
-
-static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
-{
- struct icmp6hdr *icmp_hdr;
- struct ipv6hdr *ip6h;
-
- icmp_hdr = data + off;
- if (icmp_hdr + 1 > data_end)
- return TC_ACT_SHOT;
- if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
- return TC_ACT_OK;
- off += sizeof(struct icmp6hdr);
- ip6h = data + off;
- if (ip6h + 1 > data_end)
- return TC_ACT_SHOT;
- pckt->proto = ip6h->nexthdr;
- pckt->flags |= F_ICMP;
- memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
- memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
- return TC_ACT_UNSPEC;
-}
-
-static __always_inline int parse_icmp(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
-{
- struct icmphdr *icmp_hdr;
- struct iphdr *iph;
-
- icmp_hdr = data + off;
- if (icmp_hdr + 1 > data_end)
- return TC_ACT_SHOT;
- if (icmp_hdr->type != ICMP_DEST_UNREACH ||
- icmp_hdr->code != ICMP_FRAG_NEEDED)
- return TC_ACT_OK;
- off += sizeof(struct icmphdr);
- iph = data + off;
- if (iph + 1 > data_end)
- return TC_ACT_SHOT;
- if (iph->ihl != 5)
- return TC_ACT_SHOT;
- pckt->proto = iph->protocol;
- pckt->flags |= F_ICMP;
- pckt->src = iph->daddr;
- pckt->dst = iph->saddr;
- return TC_ACT_UNSPEC;
-}
-
-static __always_inline bool parse_udp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
-{
- struct udphdr *udp;
- udp = data + off;
-
- if (udp + 1 > data_end)
- return false;
-
- if (!(pckt->flags & F_ICMP)) {
- pckt->port16[0] = udp->source;
- pckt->port16[1] = udp->dest;
- } else {
- pckt->port16[0] = udp->dest;
- pckt->port16[1] = udp->source;
- }
- return true;
-}
-
-static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
-{
- struct tcphdr *tcp;
-
- tcp = data + off;
- if (tcp + 1 > data_end)
- return false;
-
- if (tcp->syn)
- pckt->flags |= F_SYN_SET;
-
- if (!(pckt->flags & F_ICMP)) {
- pckt->port16[0] = tcp->source;
- pckt->port16[1] = tcp->dest;
- } else {
- pckt->port16[0] = tcp->dest;
- pckt->port16[1] = tcp->source;
- }
- return true;
-}
-
-static __always_inline int process_packet(void *data, __u64 off, void *data_end,
- bool is_ipv6, struct __sk_buff *skb)
-{
- void *pkt_start = (void *)(long)skb->data;
- struct packet_description pckt = {};
- struct eth_hdr *eth = pkt_start;
- struct bpf_tunnel_key tkey = {};
- struct vip_stats *data_stats;
- struct real_definition *dst;
- struct vip_meta *vip_info;
- struct ctl_value *cval;
- __u32 v4_intf_pos = 1;
- __u32 v6_intf_pos = 2;
- struct ipv6hdr *ip6h;
- struct vip vip = {};
- struct iphdr *iph;
- int tun_flag = 0;
- __u16 pkt_bytes;
- __u64 iph_len;
- __u32 ifindex;
- __u8 protocol;
- __u32 vip_num;
- int action;
-
- tkey.tunnel_ttl = 64;
- if (is_ipv6) {
- ip6h = data + off;
- if (ip6h + 1 > data_end)
- return TC_ACT_SHOT;
-
- iph_len = sizeof(struct ipv6hdr);
- protocol = ip6h->nexthdr;
- pckt.proto = protocol;
- pkt_bytes = bpf_ntohs(ip6h->payload_len);
- off += iph_len;
- if (protocol == IPPROTO_FRAGMENT) {
- return TC_ACT_SHOT;
- } else if (protocol == IPPROTO_ICMPV6) {
- action = parse_icmpv6(data, data_end, off, &pckt);
- if (action >= 0)
- return action;
- off += IPV6_PLUS_ICMP_HDR;
- } else {
- memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
- memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
- }
- } else {
- iph = data + off;
- if (iph + 1 > data_end)
- return TC_ACT_SHOT;
- if (iph->ihl != 5)
- return TC_ACT_SHOT;
-
- protocol = iph->protocol;
- pckt.proto = protocol;
- pkt_bytes = bpf_ntohs(iph->tot_len);
- off += IPV4_HDR_LEN_NO_OPT;
-
- if (iph->frag_off & PCKT_FRAGMENTED)
- return TC_ACT_SHOT;
- if (protocol == IPPROTO_ICMP) {
- action = parse_icmp(data, data_end, off, &pckt);
- if (action >= 0)
- return action;
- off += IPV4_PLUS_ICMP_HDR;
- } else {
- pckt.src = iph->saddr;
- pckt.dst = iph->daddr;
- }
- }
- protocol = pckt.proto;
-
- if (protocol == IPPROTO_TCP) {
- if (!parse_tcp(data, off, data_end, &pckt))
- return TC_ACT_SHOT;
- } else if (protocol == IPPROTO_UDP) {
- if (!parse_udp(data, off, data_end, &pckt))
- return TC_ACT_SHOT;
- } else {
- return TC_ACT_SHOT;
- }
-
- if (is_ipv6)
- memcpy(vip.daddr.v6, pckt.dstv6, 16);
- else
- vip.daddr.v4 = pckt.dst;
-
- vip.dport = pckt.port16[1];
- vip.protocol = pckt.proto;
- vip_info = bpf_map_lookup_elem(&vip_map, &vip);
- if (!vip_info) {
- vip.dport = 0;
- vip_info = bpf_map_lookup_elem(&vip_map, &vip);
- if (!vip_info)
- return TC_ACT_SHOT;
- pckt.port16[1] = 0;
- }
-
- if (vip_info->flags & F_HASH_NO_SRC_PORT)
- pckt.port16[0] = 0;
-
- if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
- return TC_ACT_SHOT;
-
- if (dst->flags & F_IPV6) {
- cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
- if (!cval)
- return TC_ACT_SHOT;
- ifindex = cval->ifindex;
- memcpy(tkey.remote_ipv6, dst->dstv6, 16);
- tun_flag = BPF_F_TUNINFO_IPV6;
- } else {
- cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
- if (!cval)
- return TC_ACT_SHOT;
- ifindex = cval->ifindex;
- tkey.remote_ipv4 = dst->dst;
- }
- vip_num = vip_info->vip_num;
- data_stats = bpf_map_lookup_elem(&stats, &vip_num);
- if (!data_stats)
- return TC_ACT_SHOT;
- data_stats->pkts++;
- data_stats->bytes += pkt_bytes;
- bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
- *(u32 *)eth->eth_dest = tkey.remote_ipv4;
- return bpf_redirect(ifindex, 0);
-}
-
-SEC("l4lb-demo")
-int balancer_ingress(struct __sk_buff *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct eth_hdr *eth = data;
- __u32 eth_proto;
- __u32 nh_off;
-
- nh_off = sizeof(struct eth_hdr);
- if (data + nh_off > data_end)
- return TC_ACT_SHOT;
- eth_proto = eth->eth_proto;
- if (eth_proto == bpf_htons(ETH_P_IP))
- return process_packet(data, nh_off, data_end, false, ctx);
- else if (eth_proto == bpf_htons(ETH_P_IPV6))
- return process_packet(data, nh_off, data_end, true, ctx);
- else
- return TC_ACT_SHOT;
-}
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2017 Facebook
-#include <stddef.h>
-#include <stdbool.h>
-#include <string.h>
-#include <linux/pkt_cls.h>
-#include <linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/icmp.h>
-#include <linux/icmpv6.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include "bpf_helpers.h"
-#include "test_iptunnel_common.h"
-#include "bpf_endian.h"
-
-int _version SEC("version") = 1;
-
-static __u32 rol32(__u32 word, unsigned int shift)
-{
- return (word << shift) | (word >> ((-shift) & 31));
-}
-
-/* copy paste of jhash from kernel sources to make sure llvm
- * can compile it into valid sequence of bpf instructions
- */
-#define __jhash_mix(a, b, c) \
-{ \
- a -= c; a ^= rol32(c, 4); c += b; \
- b -= a; b ^= rol32(a, 6); a += c; \
- c -= b; c ^= rol32(b, 8); b += a; \
- a -= c; a ^= rol32(c, 16); c += b; \
- b -= a; b ^= rol32(a, 19); a += c; \
- c -= b; c ^= rol32(b, 4); b += a; \
-}
-
-#define __jhash_final(a, b, c) \
-{ \
- c ^= b; c -= rol32(b, 14); \
- a ^= c; a -= rol32(c, 11); \
- b ^= a; b -= rol32(a, 25); \
- c ^= b; c -= rol32(b, 16); \
- a ^= c; a -= rol32(c, 4); \
- b ^= a; b -= rol32(a, 14); \
- c ^= b; c -= rol32(b, 24); \
-}
-
-#define JHASH_INITVAL 0xdeadbeef
-
-typedef unsigned int u32;
-
-static u32 jhash(const void *key, u32 length, u32 initval)
-{
- u32 a, b, c;
- const unsigned char *k = key;
-
- a = b = c = JHASH_INITVAL + length + initval;
-
- while (length > 12) {
- a += *(u32 *)(k);
- b += *(u32 *)(k + 4);
- c += *(u32 *)(k + 8);
- __jhash_mix(a, b, c);
- length -= 12;
- k += 12;
- }
- switch (length) {
- case 12: c += (u32)k[11]<<24;
- case 11: c += (u32)k[10]<<16;
- case 10: c += (u32)k[9]<<8;
- case 9: c += k[8];
- case 8: b += (u32)k[7]<<24;
- case 7: b += (u32)k[6]<<16;
- case 6: b += (u32)k[5]<<8;
- case 5: b += k[4];
- case 4: a += (u32)k[3]<<24;
- case 3: a += (u32)k[2]<<16;
- case 2: a += (u32)k[1]<<8;
- case 1: a += k[0];
- __jhash_final(a, b, c);
- case 0: /* Nothing left to add */
- break;
- }
-
- return c;
-}
-
-static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
-{
- a += initval;
- b += initval;
- c += initval;
- __jhash_final(a, b, c);
- return c;
-}
-
-static u32 jhash_2words(u32 a, u32 b, u32 initval)
-{
- return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
-}
-
-#define PCKT_FRAGMENTED 65343
-#define IPV4_HDR_LEN_NO_OPT 20
-#define IPV4_PLUS_ICMP_HDR 28
-#define IPV6_PLUS_ICMP_HDR 48
-#define RING_SIZE 2
-#define MAX_VIPS 12
-#define MAX_REALS 5
-#define CTL_MAP_SIZE 16
-#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
-#define F_IPV6 (1 << 0)
-#define F_HASH_NO_SRC_PORT (1 << 0)
-#define F_ICMP (1 << 0)
-#define F_SYN_SET (1 << 1)
-
-struct packet_description {
- union {
- __be32 src;
- __be32 srcv6[4];
- };
- union {
- __be32 dst;
- __be32 dstv6[4];
- };
- union {
- __u32 ports;
- __u16 port16[2];
- };
- __u8 proto;
- __u8 flags;
-};
-
-struct ctl_value {
- union {
- __u64 value;
- __u32 ifindex;
- __u8 mac[6];
- };
-};
-
-struct vip_meta {
- __u32 flags;
- __u32 vip_num;
-};
-
-struct real_definition {
- union {
- __be32 dst;
- __be32 dstv6[4];
- };
- __u8 flags;
-};
-
-struct vip_stats {
- __u64 bytes;
- __u64 pkts;
-};
-
-struct eth_hdr {
- unsigned char eth_dest[ETH_ALEN];
- unsigned char eth_source[ETH_ALEN];
- unsigned short eth_proto;
-};
-
-struct bpf_map_def SEC("maps") vip_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct vip),
- .value_size = sizeof(struct vip_meta),
- .max_entries = MAX_VIPS,
-};
-
-struct bpf_map_def SEC("maps") ch_rings = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = CH_RINGS_SIZE,
-};
-
-struct bpf_map_def SEC("maps") reals = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct real_definition),
- .max_entries = MAX_REALS,
-};
-
-struct bpf_map_def SEC("maps") stats = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct vip_stats),
- .max_entries = MAX_VIPS,
-};
-
-struct bpf_map_def SEC("maps") ctl_array = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct ctl_value),
- .max_entries = CTL_MAP_SIZE,
-};
-
-static __u32 get_packet_hash(struct packet_description *pckt,
- bool ipv6)
-{
- if (ipv6)
- return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
- pckt->ports, CH_RINGS_SIZE);
- else
- return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
-}
-
-static bool get_packet_dst(struct real_definition **real,
- struct packet_description *pckt,
- struct vip_meta *vip_info,
- bool is_ipv6)
-{
- __u32 hash = get_packet_hash(pckt, is_ipv6);
- __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
- __u32 *real_pos;
-
- if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
- hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
- return 0;
-
- real_pos = bpf_map_lookup_elem(&ch_rings, &key);
- if (!real_pos)
- return false;
- key = *real_pos;
- *real = bpf_map_lookup_elem(&reals, &key);
- if (!(*real))
- return false;
- return true;
-}
-
-static int parse_icmpv6(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
-{
- struct icmp6hdr *icmp_hdr;
- struct ipv6hdr *ip6h;
-
- icmp_hdr = data + off;
- if (icmp_hdr + 1 > data_end)
- return TC_ACT_SHOT;
- if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
- return TC_ACT_OK;
- off += sizeof(struct icmp6hdr);
- ip6h = data + off;
- if (ip6h + 1 > data_end)
- return TC_ACT_SHOT;
- pckt->proto = ip6h->nexthdr;
- pckt->flags |= F_ICMP;
- memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
- memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
- return TC_ACT_UNSPEC;
-}
-
-static int parse_icmp(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
-{
- struct icmphdr *icmp_hdr;
- struct iphdr *iph;
-
- icmp_hdr = data + off;
- if (icmp_hdr + 1 > data_end)
- return TC_ACT_SHOT;
- if (icmp_hdr->type != ICMP_DEST_UNREACH ||
- icmp_hdr->code != ICMP_FRAG_NEEDED)
- return TC_ACT_OK;
- off += sizeof(struct icmphdr);
- iph = data + off;
- if (iph + 1 > data_end)
- return TC_ACT_SHOT;
- if (iph->ihl != 5)
- return TC_ACT_SHOT;
- pckt->proto = iph->protocol;
- pckt->flags |= F_ICMP;
- pckt->src = iph->daddr;
- pckt->dst = iph->saddr;
- return TC_ACT_UNSPEC;
-}
-
-static bool parse_udp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
-{
- struct udphdr *udp;
- udp = data + off;
-
- if (udp + 1 > data_end)
- return false;
-
- if (!(pckt->flags & F_ICMP)) {
- pckt->port16[0] = udp->source;
- pckt->port16[1] = udp->dest;
- } else {
- pckt->port16[0] = udp->dest;
- pckt->port16[1] = udp->source;
- }
- return true;
-}
-
-static bool parse_tcp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
-{
- struct tcphdr *tcp;
-
- tcp = data + off;
- if (tcp + 1 > data_end)
- return false;
-
- if (tcp->syn)
- pckt->flags |= F_SYN_SET;
-
- if (!(pckt->flags & F_ICMP)) {
- pckt->port16[0] = tcp->source;
- pckt->port16[1] = tcp->dest;
- } else {
- pckt->port16[0] = tcp->dest;
- pckt->port16[1] = tcp->source;
- }
- return true;
-}
-
-static int process_packet(void *data, __u64 off, void *data_end,
- bool is_ipv6, struct __sk_buff *skb)
-{
- void *pkt_start = (void *)(long)skb->data;
- struct packet_description pckt = {};
- struct eth_hdr *eth = pkt_start;
- struct bpf_tunnel_key tkey = {};
- struct vip_stats *data_stats;
- struct real_definition *dst;
- struct vip_meta *vip_info;
- struct ctl_value *cval;
- __u32 v4_intf_pos = 1;
- __u32 v6_intf_pos = 2;
- struct ipv6hdr *ip6h;
- struct vip vip = {};
- struct iphdr *iph;
- int tun_flag = 0;
- __u16 pkt_bytes;
- __u64 iph_len;
- __u32 ifindex;
- __u8 protocol;
- __u32 vip_num;
- int action;
-
- tkey.tunnel_ttl = 64;
- if (is_ipv6) {
- ip6h = data + off;
- if (ip6h + 1 > data_end)
- return TC_ACT_SHOT;
-
- iph_len = sizeof(struct ipv6hdr);
- protocol = ip6h->nexthdr;
- pckt.proto = protocol;
- pkt_bytes = bpf_ntohs(ip6h->payload_len);
- off += iph_len;
- if (protocol == IPPROTO_FRAGMENT) {
- return TC_ACT_SHOT;
- } else if (protocol == IPPROTO_ICMPV6) {
- action = parse_icmpv6(data, data_end, off, &pckt);
- if (action >= 0)
- return action;
- off += IPV6_PLUS_ICMP_HDR;
- } else {
- memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
- memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
- }
- } else {
- iph = data + off;
- if (iph + 1 > data_end)
- return TC_ACT_SHOT;
- if (iph->ihl != 5)
- return TC_ACT_SHOT;
-
- protocol = iph->protocol;
- pckt.proto = protocol;
- pkt_bytes = bpf_ntohs(iph->tot_len);
- off += IPV4_HDR_LEN_NO_OPT;
-
- if (iph->frag_off & PCKT_FRAGMENTED)
- return TC_ACT_SHOT;
- if (protocol == IPPROTO_ICMP) {
- action = parse_icmp(data, data_end, off, &pckt);
- if (action >= 0)
- return action;
- off += IPV4_PLUS_ICMP_HDR;
- } else {
- pckt.src = iph->saddr;
- pckt.dst = iph->daddr;
- }
- }
- protocol = pckt.proto;
-
- if (protocol == IPPROTO_TCP) {
- if (!parse_tcp(data, off, data_end, &pckt))
- return TC_ACT_SHOT;
- } else if (protocol == IPPROTO_UDP) {
- if (!parse_udp(data, off, data_end, &pckt))
- return TC_ACT_SHOT;
- } else {
- return TC_ACT_SHOT;
- }
-
- if (is_ipv6)
- memcpy(vip.daddr.v6, pckt.dstv6, 16);
- else
- vip.daddr.v4 = pckt.dst;
-
- vip.dport = pckt.port16[1];
- vip.protocol = pckt.proto;
- vip_info = bpf_map_lookup_elem(&vip_map, &vip);
- if (!vip_info) {
- vip.dport = 0;
- vip_info = bpf_map_lookup_elem(&vip_map, &vip);
- if (!vip_info)
- return TC_ACT_SHOT;
- pckt.port16[1] = 0;
- }
-
- if (vip_info->flags & F_HASH_NO_SRC_PORT)
- pckt.port16[0] = 0;
-
- if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
- return TC_ACT_SHOT;
-
- if (dst->flags & F_IPV6) {
- cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
- if (!cval)
- return TC_ACT_SHOT;
- ifindex = cval->ifindex;
- memcpy(tkey.remote_ipv6, dst->dstv6, 16);
- tun_flag = BPF_F_TUNINFO_IPV6;
- } else {
- cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
- if (!cval)
- return TC_ACT_SHOT;
- ifindex = cval->ifindex;
- tkey.remote_ipv4 = dst->dst;
- }
- vip_num = vip_info->vip_num;
- data_stats = bpf_map_lookup_elem(&stats, &vip_num);
- if (!data_stats)
- return TC_ACT_SHOT;
- data_stats->pkts++;
- data_stats->bytes += pkt_bytes;
- bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
- *(u32 *)eth->eth_dest = tkey.remote_ipv4;
- return bpf_redirect(ifindex, 0);
-}
-
-SEC("l4lb-demo")
-int balancer_ingress(struct __sk_buff *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct eth_hdr *eth = data;
- __u32 eth_proto;
- __u32 nh_off;
-
- nh_off = sizeof(struct eth_hdr);
- if (data + nh_off > data_end)
- return TC_ACT_SHOT;
- eth_proto = eth->eth_proto;
- if (eth_proto == bpf_htons(ETH_P_IP))
- return process_packet(data, nh_off, data_end, false, ctx);
- else if (eth_proto == bpf_htons(ETH_P_IPV6))
- return process_packet(data, nh_off, data_end, true, ctx);
- else
- return TC_ACT_SHOT;
-}
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// test ir decoder
-//
-// Copyright (C) 2018 Sean Young <sean@mess.org>
-
-#include <linux/bpf.h>
-#include <linux/lirc.h>
-#include "bpf_helpers.h"
-
-SEC("lirc_mode2")
-int bpf_decoder(unsigned int *sample)
-{
- if (LIRC_IS_PULSE(*sample)) {
- unsigned int duration = LIRC_VALUE(*sample);
-
- if (duration & 0x10000)
- bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0);
- if (duration & 0x20000)
- bpf_rc_pointer_rel(sample, (duration >> 8) & 0xff,
- duration & 0xff);
- }
-
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-#include <stddef.h>
-#include <inttypes.h>
-#include <errno.h>
-#include <linux/seg6_local.h>
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
-/* Packet parsing state machine helpers. */
-#define cursor_advance(_cursor, _len) \
- ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
-
-#define SR6_FLAG_ALERT (1 << 4)
-
-#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
- 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
-#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
- 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
-#define BPF_PACKET_HEADER __attribute__((packed))
-
-struct ip6_t {
- unsigned int ver:4;
- unsigned int priority:8;
- unsigned int flow_label:20;
- unsigned short payload_len;
- unsigned char next_header;
- unsigned char hop_limit;
- unsigned long long src_hi;
- unsigned long long src_lo;
- unsigned long long dst_hi;
- unsigned long long dst_lo;
-} BPF_PACKET_HEADER;
-
-struct ip6_addr_t {
- unsigned long long hi;
- unsigned long long lo;
-} BPF_PACKET_HEADER;
-
-struct ip6_srh_t {
- unsigned char nexthdr;
- unsigned char hdrlen;
- unsigned char type;
- unsigned char segments_left;
- unsigned char first_segment;
- unsigned char flags;
- unsigned short tag;
-
- struct ip6_addr_t segments[0];
-} BPF_PACKET_HEADER;
-
-struct sr6_tlv_t {
- unsigned char type;
- unsigned char len;
- unsigned char value[0];
-} BPF_PACKET_HEADER;
-
-__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
-{
- void *cursor, *data_end;
- struct ip6_srh_t *srh;
- struct ip6_t *ip;
- uint8_t *ipver;
-
- data_end = (void *)(long)skb->data_end;
- cursor = (void *)(long)skb->data;
- ipver = (uint8_t *)cursor;
-
- if ((void *)ipver + sizeof(*ipver) > data_end)
- return NULL;
-
- if ((*ipver >> 4) != 6)
- return NULL;
-
- ip = cursor_advance(cursor, sizeof(*ip));
- if ((void *)ip + sizeof(*ip) > data_end)
- return NULL;
-
- if (ip->next_header != 43)
- return NULL;
-
- srh = cursor_advance(cursor, sizeof(*srh));
- if ((void *)srh + sizeof(*srh) > data_end)
- return NULL;
-
- if (srh->type != 4)
- return NULL;
-
- return srh;
-}
-
-__attribute__((always_inline))
-int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
- uint32_t old_pad, uint32_t pad_off)
-{
- int err;
-
- if (new_pad != old_pad) {
- err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
- (int) new_pad - (int) old_pad);
- if (err)
- return err;
- }
-
- if (new_pad > 0) {
- char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0};
- struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
-
- pad_tlv->type = SR6_TLV_PADDING;
- pad_tlv->len = new_pad - 2;
-
- err = bpf_lwt_seg6_store_bytes(skb, pad_off,
- (void *)pad_tlv_buf, new_pad);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-__attribute__((always_inline))
-int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
- uint32_t *tlv_off, uint32_t *pad_size,
- uint32_t *pad_off)
-{
- uint32_t srh_off, cur_off;
- int offset_valid = 0;
- int err;
-
- srh_off = (char *)srh - (char *)(long)skb->data;
- // cur_off = end of segments, start of possible TLVs
- cur_off = srh_off + sizeof(*srh) +
- sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
-
- *pad_off = 0;
-
- // we can only go as far as ~10 TLVs due to the BPF max stack size
- #pragma clang loop unroll(full)
- for (int i = 0; i < 10; i++) {
- struct sr6_tlv_t tlv;
-
- if (cur_off == *tlv_off)
- offset_valid = 1;
-
- if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
- break;
-
- err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
- if (err)
- return err;
-
- if (tlv.type == SR6_TLV_PADDING) {
- *pad_size = tlv.len + sizeof(tlv);
- *pad_off = cur_off;
-
- if (*tlv_off == srh_off) {
- *tlv_off = cur_off;
- offset_valid = 1;
- }
- break;
-
- } else if (tlv.type == SR6_TLV_HMAC) {
- break;
- }
-
- cur_off += sizeof(tlv) + tlv.len;
- } // we reached the padding or HMAC TLVs, or the end of the SRH
-
- if (*pad_off == 0)
- *pad_off = cur_off;
-
- if (*tlv_off == -1)
- *tlv_off = cur_off;
- else if (!offset_valid)
- return -EINVAL;
-
- return 0;
-}
-
-__attribute__((always_inline))
-int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
- struct sr6_tlv_t *itlv, uint8_t tlv_size)
-{
- uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
- uint8_t len_remaining, new_pad;
- uint32_t pad_off = 0;
- uint32_t pad_size = 0;
- uint32_t partial_srh_len;
- int err;
-
- if (tlv_off != -1)
- tlv_off += srh_off;
-
- if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
- return -EINVAL;
-
- err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
- if (err)
- return err;
-
- err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
- if (err)
- return err;
-
- err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
- if (err)
- return err;
-
- // the following can't be moved inside update_tlv_pad because the
- // bpf verifier has some issues with it
- pad_off += sizeof(*itlv) + itlv->len;
- partial_srh_len = pad_off - srh_off;
- len_remaining = partial_srh_len % 8;
- new_pad = 8 - len_remaining;
-
- if (new_pad == 1) // cannot pad for 1 byte only
- new_pad = 9;
- else if (new_pad == 8)
- new_pad = 0;
-
- return update_tlv_pad(skb, new_pad, pad_size, pad_off);
-}
-
-__attribute__((always_inline))
-int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
- uint32_t tlv_off)
-{
- uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
- uint8_t len_remaining, new_pad;
- uint32_t partial_srh_len;
- uint32_t pad_off = 0;
- uint32_t pad_size = 0;
- struct sr6_tlv_t tlv;
- int err;
-
- tlv_off += srh_off;
-
- err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
- if (err)
- return err;
-
- err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
- if (err)
- return err;
-
- err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
- if (err)
- return err;
-
- pad_off -= sizeof(tlv) + tlv.len;
- partial_srh_len = pad_off - srh_off;
- len_remaining = partial_srh_len % 8;
- new_pad = 8 - len_remaining;
- if (new_pad == 1) // cannot pad for 1 byte only
- new_pad = 9;
- else if (new_pad == 8)
- new_pad = 0;
-
- return update_tlv_pad(skb, new_pad, pad_size, pad_off);
-}
-
-__attribute__((always_inline))
-int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
-{
- int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
- ((srh->first_segment + 1) << 4);
- struct sr6_tlv_t tlv;
-
- if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
- return 0;
-
- if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
- struct ip6_addr_t egr_addr;
-
- if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
- return 0;
-
- // check if egress TLV value is correct
- if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
- ntohll(egr_addr.lo) == 0x4)
- return 1;
- }
-
- return 0;
-}
-
-// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
-// fd00::4
-SEC("encap_srh")
-int __encap_srh(struct __sk_buff *skb)
-{
- unsigned long long hi = 0xfd00000000000000;
- struct ip6_addr_t *seg;
- struct ip6_srh_t *srh;
- char srh_buf[72]; // room for 4 segments
- int err;
-
- srh = (struct ip6_srh_t *)srh_buf;
- srh->nexthdr = 0;
- srh->hdrlen = 8;
- srh->type = 4;
- srh->segments_left = 3;
- srh->first_segment = 3;
- srh->flags = 0;
- srh->tag = 0;
-
- seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
-
- #pragma clang loop unroll(full)
- for (unsigned long long lo = 0; lo < 4; lo++) {
- seg->lo = htonll(4 - lo);
- seg->hi = htonll(hi);
- seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
- }
-
- err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
- if (err)
- return BPF_DROP;
-
- return BPF_REDIRECT;
-}
-
-// Add an Egress TLV fc00::4, add the flag A,
-// and apply End.X action to fc42::1
-SEC("add_egr_x")
-int __add_egr_x(struct __sk_buff *skb)
-{
- unsigned long long hi = 0xfc42000000000000;
- unsigned long long lo = 0x1;
- struct ip6_srh_t *srh = get_srh(skb);
- uint8_t new_flags = SR6_FLAG_ALERT;
- struct ip6_addr_t addr;
- int err, offset;
-
- if (srh == NULL)
- return BPF_DROP;
-
- uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
-
- err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
- (struct sr6_tlv_t *)&tlv, 20);
- if (err)
- return BPF_DROP;
-
- offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
- err = bpf_lwt_seg6_store_bytes(skb, offset,
- (void *)&new_flags, sizeof(new_flags));
- if (err)
- return BPF_DROP;
-
- addr.lo = htonll(lo);
- addr.hi = htonll(hi);
- err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
- (void *)&addr, sizeof(addr));
- if (err)
- return BPF_DROP;
- return BPF_REDIRECT;
-}
-
-// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
-// simple End action
-SEC("pop_egr")
-int __pop_egr(struct __sk_buff *skb)
-{
- struct ip6_srh_t *srh = get_srh(skb);
- uint16_t new_tag = bpf_htons(2442);
- uint8_t new_flags = 0;
- int err, offset;
-
- if (srh == NULL)
- return BPF_DROP;
-
- if (srh->flags != SR6_FLAG_ALERT)
- return BPF_DROP;
-
- if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
- return BPF_DROP;
-
- if (!has_egr_tlv(skb, srh))
- return BPF_DROP;
-
- err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
- if (err)
- return BPF_DROP;
-
- offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
- if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
- sizeof(new_flags)))
- return BPF_DROP;
-
- offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
- if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
- sizeof(new_tag)))
- return BPF_DROP;
-
- return BPF_OK;
-}
-
-// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
-// then apply a End.T action to reach the last segment
-SEC("inspect_t")
-int __inspect_t(struct __sk_buff *skb)
-{
- struct ip6_srh_t *srh = get_srh(skb);
- int table = 117;
- int err;
-
- if (srh == NULL)
- return BPF_DROP;
-
- if (srh->flags != 0)
- return BPF_DROP;
-
- if (srh->tag != bpf_htons(2442))
- return BPF_DROP;
-
- if (srh->hdrlen != 8) // 4 segments
- return BPF_DROP;
-
- err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
- (void *)&table, sizeof(table));
-
- if (err)
- return BPF_DROP;
-
- return BPF_REDIRECT;
-}
-
-char __license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2018 Facebook */
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/types.h>
-#include "bpf_helpers.h"
-
-struct bpf_map_def SEC("maps") mim_array = {
- .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
- .key_size = sizeof(int),
- /* must be sizeof(__u32) for map in map */
- .value_size = sizeof(__u32),
- .max_entries = 1,
- .map_flags = 0,
-};
-
-struct bpf_map_def SEC("maps") mim_hash = {
- .type = BPF_MAP_TYPE_HASH_OF_MAPS,
- .key_size = sizeof(int),
- /* must be sizeof(__u32) for map in map */
- .value_size = sizeof(__u32),
- .max_entries = 1,
- .map_flags = 0,
-};
-
-SEC("xdp_mimtest")
-int xdp_mimtest0(struct xdp_md *ctx)
-{
- int value = 123;
- int key = 0;
- void *map;
-
- map = bpf_map_lookup_elem(&mim_array, &key);
- if (!map)
- return XDP_DROP;
-
- bpf_map_update_elem(map, &key, &value, 0);
-
- map = bpf_map_lookup_elem(&mim_hash, &key);
- if (!map)
- return XDP_DROP;
-
- bpf_map_update_elem(map, &key, &value, 0);
-
- return XDP_PASS;
-}
-
-int _version SEC("version") = 1;
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2019 Facebook
-#include <linux/bpf.h>
-#include <linux/version.h>
-#include "bpf_helpers.h"
-
-#define VAR_NUM 16
-
-struct hmap_elem {
- struct bpf_spin_lock lock;
- int var[VAR_NUM];
-};
-
-struct bpf_map_def SEC("maps") hash_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(struct hmap_elem),
- .max_entries = 1,
-};
-
-BPF_ANNOTATE_KV_PAIR(hash_map, int, struct hmap_elem);
-
-struct array_elem {
- struct bpf_spin_lock lock;
- int var[VAR_NUM];
-};
-
-struct bpf_map_def SEC("maps") array_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct array_elem),
- .max_entries = 1,
-};
-
-BPF_ANNOTATE_KV_PAIR(array_map, int, struct array_elem);
-
-SEC("map_lock_demo")
-int bpf_map_lock_test(struct __sk_buff *skb)
-{
- struct hmap_elem zero = {}, *val;
- int rnd = bpf_get_prandom_u32();
- int key = 0, err = 1, i;
- struct array_elem *q;
-
- val = bpf_map_lookup_elem(&hash_map, &key);
- if (!val)
- goto err;
- /* spin_lock in hash map */
- bpf_spin_lock(&val->lock);
- for (i = 0; i < VAR_NUM; i++)
- val->var[i] = rnd;
- bpf_spin_unlock(&val->lock);
-
- /* spin_lock in array */
- q = bpf_map_lookup_elem(&array_map, &key);
- if (!q)
- goto err;
- bpf_spin_lock(&q->lock);
- for (i = 0; i < VAR_NUM; i++)
- q->var[i] = rnd;
- bpf_spin_unlock(&q->lock);
- err = 0;
-err:
- return err;
-}
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-/* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
-
-/* It is a dumb bpf program such that it must have no
- * issue to be loaded since testing the verifier is
- * not the focus here.
- */
-
-int _version SEC("version") = 1;
-
-struct bpf_map_def SEC("maps") test_map_id = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64),
- .max_entries = 1,
-};
-
-SEC("test_obj_id_dummy")
-int test_obj_id(struct __sk_buff *skb)
-{
- __u32 key = 0;
- __u64 *value;
-
- value = bpf_map_lookup_elem(&test_map_id, &key);
-
- return TC_ACT_OK;
-}
+++ /dev/null
-/* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <stddef.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/in.h>
-#include <linux/tcp.h>
-#include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-#define barrier() __asm__ __volatile__("": : :"memory")
-int _version SEC("version") = 1;
-
-SEC("test1")
-int process(struct __sk_buff *skb)
-{
- void *data_end = (void *)(long)skb->data_end;
- void *data = (void *)(long)skb->data;
- struct ethhdr *eth = (struct ethhdr *)(data);
- struct tcphdr *tcp = NULL;
- __u8 proto = 255;
- __u64 ihl_len;
-
- if (eth + 1 > data_end)
- return TC_ACT_SHOT;
-
- if (eth->h_proto == bpf_htons(ETH_P_IP)) {
- struct iphdr *iph = (struct iphdr *)(eth + 1);
-
- if (iph + 1 > data_end)
- return TC_ACT_SHOT;
- ihl_len = iph->ihl * 4;
- proto = iph->protocol;
- tcp = (struct tcphdr *)((void *)(iph) + ihl_len);
- } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1);
-
- if (ip6h + 1 > data_end)
- return TC_ACT_SHOT;
- ihl_len = sizeof(*ip6h);
- proto = ip6h->nexthdr;
- tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len);
- }
-
- if (tcp) {
- if (((void *)(tcp) + 20) > data_end || proto != 6)
- return TC_ACT_SHOT;
- barrier(); /* to force ordering of checks */
- if (((void *)(tcp) + 18) > data_end)
- return TC_ACT_SHOT;
- if (tcp->urg_ptr == 123)
- return TC_ACT_OK;
- }
-
- return TC_ACT_UNSPEC;
-}
+++ /dev/null
-/* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <stddef.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
-
-int _version SEC("version") = 1;
-
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#define TEST_FIELD(TYPE, FIELD, MASK) \
- { \
- TYPE tmp = *(volatile TYPE *)&skb->FIELD; \
- if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \
- return TC_ACT_SHOT; \
- }
-#else
-#define TEST_FIELD_OFFSET(a, b) ((sizeof(a) - sizeof(b)) / sizeof(b))
-#define TEST_FIELD(TYPE, FIELD, MASK) \
- { \
- TYPE tmp = *((volatile TYPE *)&skb->FIELD + \
- TEST_FIELD_OFFSET(skb->FIELD, TYPE)); \
- if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \
- return TC_ACT_SHOT; \
- }
-#endif
-
-SEC("test1")
-int process(struct __sk_buff *skb)
-{
- TEST_FIELD(__u8, len, 0xFF);
- TEST_FIELD(__u16, len, 0xFFFF);
- TEST_FIELD(__u32, len, 0xFFFFFFFF);
- TEST_FIELD(__u16, protocol, 0xFFFF);
- TEST_FIELD(__u32, protocol, 0xFFFFFFFF);
- TEST_FIELD(__u8, hash, 0xFF);
- TEST_FIELD(__u16, hash, 0xFFFF);
- TEST_FIELD(__u32, hash, 0xFFFFFFFF);
-
- return TC_ACT_OK;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Politecnico di Torino
-#define MAP_TYPE BPF_MAP_TYPE_QUEUE
-#include "test_queue_stack_map.h"
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2018 Facebook */
-
-#include <stdlib.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/bpf.h>
-#include <linux/types.h>
-#include <linux/if_ether.h>
-
-#include "bpf_endian.h"
-#include "bpf_helpers.h"
-#include "test_select_reuseport_common.h"
-
-int _version SEC("version") = 1;
-
-#ifndef offsetof
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#endif
-
-struct bpf_map_def SEC("maps") outer_map = {
- .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") result_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = NR_RESULTS,
-};
-
-struct bpf_map_def SEC("maps") tmp_index_ovr_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") linum_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") data_check_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct data_check),
- .max_entries = 1,
-};
-
-#define GOTO_DONE(_result) ({ \
- result = (_result); \
- linum = __LINE__; \
- goto done; \
-})
-
-SEC("select_by_skb_data")
-int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
-{
- __u32 linum, index = 0, flags = 0, index_zero = 0;
- __u32 *result_cnt, *linum_value;
- struct data_check data_check = {};
- struct cmd *cmd, cmd_copy;
- void *data, *data_end;
- void *reuseport_array;
- enum result result;
- int *index_ovr;
- int err;
-
- data = reuse_md->data;
- data_end = reuse_md->data_end;
- data_check.len = reuse_md->len;
- data_check.eth_protocol = reuse_md->eth_protocol;
- data_check.ip_protocol = reuse_md->ip_protocol;
- data_check.hash = reuse_md->hash;
- data_check.bind_inany = reuse_md->bind_inany;
- if (data_check.eth_protocol == bpf_htons(ETH_P_IP)) {
- if (bpf_skb_load_bytes_relative(reuse_md,
- offsetof(struct iphdr, saddr),
- data_check.skb_addrs, 8,
- BPF_HDR_START_NET))
- GOTO_DONE(DROP_MISC);
- } else {
- if (bpf_skb_load_bytes_relative(reuse_md,
- offsetof(struct ipv6hdr, saddr),
- data_check.skb_addrs, 32,
- BPF_HDR_START_NET))
- GOTO_DONE(DROP_MISC);
- }
-
- /*
- * The ip_protocol could be a compile time decision
- * if the bpf_prog.o is dedicated to either TCP or
- * UDP.
- *
- * Otherwise, reuse_md->ip_protocol or
- * the protocol field in the iphdr can be used.
- */
- if (data_check.ip_protocol == IPPROTO_TCP) {
- struct tcphdr *th = data;
-
- if (th + 1 > data_end)
- GOTO_DONE(DROP_MISC);
-
- data_check.skb_ports[0] = th->source;
- data_check.skb_ports[1] = th->dest;
-
- if ((th->doff << 2) + sizeof(*cmd) > data_check.len)
- GOTO_DONE(DROP_ERR_SKB_DATA);
- if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy,
- sizeof(cmd_copy)))
- GOTO_DONE(DROP_MISC);
- cmd = &cmd_copy;
- } else if (data_check.ip_protocol == IPPROTO_UDP) {
- struct udphdr *uh = data;
-
- if (uh + 1 > data_end)
- GOTO_DONE(DROP_MISC);
-
- data_check.skb_ports[0] = uh->source;
- data_check.skb_ports[1] = uh->dest;
-
- if (sizeof(struct udphdr) + sizeof(*cmd) > data_check.len)
- GOTO_DONE(DROP_ERR_SKB_DATA);
- if (data + sizeof(struct udphdr) + sizeof(*cmd) > data_end) {
- if (bpf_skb_load_bytes(reuse_md, sizeof(struct udphdr),
- &cmd_copy, sizeof(cmd_copy)))
- GOTO_DONE(DROP_MISC);
- cmd = &cmd_copy;
- } else {
- cmd = data + sizeof(struct udphdr);
- }
- } else {
- GOTO_DONE(DROP_MISC);
- }
-
- reuseport_array = bpf_map_lookup_elem(&outer_map, &index_zero);
- if (!reuseport_array)
- GOTO_DONE(DROP_ERR_INNER_MAP);
-
- index = cmd->reuseport_index;
- index_ovr = bpf_map_lookup_elem(&tmp_index_ovr_map, &index_zero);
- if (!index_ovr)
- GOTO_DONE(DROP_MISC);
-
- if (*index_ovr != -1) {
- index = *index_ovr;
- *index_ovr = -1;
- }
- err = bpf_sk_select_reuseport(reuse_md, reuseport_array, &index,
- flags);
- if (!err)
- GOTO_DONE(PASS);
-
- if (cmd->pass_on_failure)
- GOTO_DONE(PASS_ERR_SK_SELECT_REUSEPORT);
- else
- GOTO_DONE(DROP_ERR_SK_SELECT_REUSEPORT);
-
-done:
- result_cnt = bpf_map_lookup_elem(&result_map, &result);
- if (!result_cnt)
- return SK_DROP;
-
- bpf_map_update_elem(&linum_map, &index_zero, &linum, BPF_ANY);
- bpf_map_update_elem(&data_check_map, &index_zero, &data_check, BPF_ANY);
-
- (*result_cnt)++;
- return result < PASS ? SK_DROP : SK_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
-
-#include <stddef.h>
-#include <stdbool.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/pkt_cls.h>
-#include <linux/tcp.h>
-#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-int _version SEC("version") = 1;
-char _license[] SEC("license") = "GPL";
-
-/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
-static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
- void *data_end, __u16 eth_proto,
- bool *ipv4)
-{
- struct bpf_sock_tuple *result;
- __u8 proto = 0;
- __u64 ihl_len;
-
- if (eth_proto == bpf_htons(ETH_P_IP)) {
- struct iphdr *iph = (struct iphdr *)(data + nh_off);
-
- if (iph + 1 > data_end)
- return NULL;
- ihl_len = iph->ihl * 4;
- proto = iph->protocol;
- *ipv4 = true;
- result = (struct bpf_sock_tuple *)&iph->saddr;
- } else if (eth_proto == bpf_htons(ETH_P_IPV6)) {
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off);
-
- if (ip6h + 1 > data_end)
- return NULL;
- ihl_len = sizeof(*ip6h);
- proto = ip6h->nexthdr;
- *ipv4 = true;
- result = (struct bpf_sock_tuple *)&ip6h->saddr;
- }
-
- if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP)
- return NULL;
-
- return result;
-}
-
-SEC("sk_lookup_success")
-int bpf_sk_lookup_test0(struct __sk_buff *skb)
-{
- void *data_end = (void *)(long)skb->data_end;
- void *data = (void *)(long)skb->data;
- struct ethhdr *eth = (struct ethhdr *)(data);
- struct bpf_sock_tuple *tuple;
- struct bpf_sock *sk;
- size_t tuple_len;
- bool ipv4;
-
- if (eth + 1 > data_end)
- return TC_ACT_SHOT;
-
- tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4);
- if (!tuple || tuple + sizeof *tuple > data_end)
- return TC_ACT_SHOT;
-
- tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
- sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
- if (sk)
- bpf_sk_release(sk);
- return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
-}
-
-SEC("sk_lookup_success_simple")
-int bpf_sk_lookup_test1(struct __sk_buff *skb)
-{
- struct bpf_sock_tuple tuple = {};
- struct bpf_sock *sk;
-
- sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
- if (sk)
- bpf_sk_release(sk);
- return 0;
-}
-
-SEC("fail_use_after_free")
-int bpf_sk_lookup_uaf(struct __sk_buff *skb)
-{
- struct bpf_sock_tuple tuple = {};
- struct bpf_sock *sk;
- __u32 family = 0;
-
- sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
- if (sk) {
- bpf_sk_release(sk);
- family = sk->family;
- }
- return family;
-}
-
-SEC("fail_modify_sk_pointer")
-int bpf_sk_lookup_modptr(struct __sk_buff *skb)
-{
- struct bpf_sock_tuple tuple = {};
- struct bpf_sock *sk;
- __u32 family;
-
- sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
- if (sk) {
- sk += 1;
- bpf_sk_release(sk);
- }
- return 0;
-}
-
-SEC("fail_modify_sk_or_null_pointer")
-int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
-{
- struct bpf_sock_tuple tuple = {};
- struct bpf_sock *sk;
- __u32 family;
-
- sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
- sk += 1;
- if (sk)
- bpf_sk_release(sk);
- return 0;
-}
-
-SEC("fail_no_release")
-int bpf_sk_lookup_test2(struct __sk_buff *skb)
-{
- struct bpf_sock_tuple tuple = {};
-
- bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
- return 0;
-}
-
-SEC("fail_release_twice")
-int bpf_sk_lookup_test3(struct __sk_buff *skb)
-{
- struct bpf_sock_tuple tuple = {};
- struct bpf_sock *sk;
-
- sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
- bpf_sk_release(sk);
- bpf_sk_release(sk);
- return 0;
-}
-
-SEC("fail_release_unchecked")
-int bpf_sk_lookup_test4(struct __sk_buff *skb)
-{
- struct bpf_sock_tuple tuple = {};
- struct bpf_sock *sk;
-
- sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
- bpf_sk_release(sk);
- return 0;
-}
-
-void lookup_no_release(struct __sk_buff *skb)
-{
- struct bpf_sock_tuple tuple = {};
- bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
-}
-
-SEC("fail_no_release_subcall")
-int bpf_sk_lookup_test5(struct __sk_buff *skb)
-{
- lookup_no_release(skb);
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <linux/bpf.h>
-#include <linux/pkt_cls.h>
-
-#include <string.h>
-
-#include "bpf_helpers.h"
-
-#define NUM_CGROUP_LEVELS 4
-
-struct bpf_map_def SEC("maps") cgroup_ids = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64),
- .max_entries = NUM_CGROUP_LEVELS,
-};
-
-static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
-{
- __u64 id;
-
- /* [1] &level passed to external function that may change it, it's
- * incompatible with loop unroll.
- */
- id = bpf_skb_ancestor_cgroup_id(skb, level);
- bpf_map_update_elem(&cgroup_ids, &level, &id, 0);
-}
-
-SEC("cgroup_id_logger")
-int log_cgroup_id(struct __sk_buff *skb)
-{
- /* Loop unroll can't be used here due to [1]. Unrolling manually.
- * Number of calls should be in sync with NUM_CGROUP_LEVELS.
- */
- log_nth_level(skb, 0);
- log_nth_level(skb, 1);
- log_nth_level(skb, 2);
- log_nth_level(skb, 3);
-
- return TC_ACT_OK;
-}
-
-int _version SEC("version") = 1;
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2019 Facebook */
-
-#include <linux/bpf.h>
-#include <netinet/in.h>
-#include <stdbool.h>
-
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-enum bpf_array_idx {
- SRV_IDX,
- CLI_IDX,
- __NR_BPF_ARRAY_IDX,
-};
-
-struct bpf_map_def SEC("maps") addr_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct sockaddr_in6),
- .max_entries = __NR_BPF_ARRAY_IDX,
-};
-
-struct bpf_map_def SEC("maps") sock_result_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct bpf_sock),
- .max_entries = __NR_BPF_ARRAY_IDX,
-};
-
-struct bpf_map_def SEC("maps") tcp_sock_result_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct bpf_tcp_sock),
- .max_entries = __NR_BPF_ARRAY_IDX,
-};
-
-struct bpf_map_def SEC("maps") linum_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 1,
-};
-
-static bool is_loopback6(__u32 *a6)
-{
- return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
-}
-
-static void skcpy(struct bpf_sock *dst,
- const struct bpf_sock *src)
-{
- dst->bound_dev_if = src->bound_dev_if;
- dst->family = src->family;
- dst->type = src->type;
- dst->protocol = src->protocol;
- dst->mark = src->mark;
- dst->priority = src->priority;
- dst->src_ip4 = src->src_ip4;
- dst->src_ip6[0] = src->src_ip6[0];
- dst->src_ip6[1] = src->src_ip6[1];
- dst->src_ip6[2] = src->src_ip6[2];
- dst->src_ip6[3] = src->src_ip6[3];
- dst->src_port = src->src_port;
- dst->dst_ip4 = src->dst_ip4;
- dst->dst_ip6[0] = src->dst_ip6[0];
- dst->dst_ip6[1] = src->dst_ip6[1];
- dst->dst_ip6[2] = src->dst_ip6[2];
- dst->dst_ip6[3] = src->dst_ip6[3];
- dst->dst_port = src->dst_port;
- dst->state = src->state;
-}
-
-static void tpcpy(struct bpf_tcp_sock *dst,
- const struct bpf_tcp_sock *src)
-{
- dst->snd_cwnd = src->snd_cwnd;
- dst->srtt_us = src->srtt_us;
- dst->rtt_min = src->rtt_min;
- dst->snd_ssthresh = src->snd_ssthresh;
- dst->rcv_nxt = src->rcv_nxt;
- dst->snd_nxt = src->snd_nxt;
- dst->snd_una = src->snd_una;
- dst->mss_cache = src->mss_cache;
- dst->ecn_flags = src->ecn_flags;
- dst->rate_delivered = src->rate_delivered;
- dst->rate_interval_us = src->rate_interval_us;
- dst->packets_out = src->packets_out;
- dst->retrans_out = src->retrans_out;
- dst->total_retrans = src->total_retrans;
- dst->segs_in = src->segs_in;
- dst->data_segs_in = src->data_segs_in;
- dst->segs_out = src->segs_out;
- dst->data_segs_out = src->data_segs_out;
- dst->lost_out = src->lost_out;
- dst->sacked_out = src->sacked_out;
- dst->bytes_received = src->bytes_received;
- dst->bytes_acked = src->bytes_acked;
-}
-
-#define RETURN { \
- linum = __LINE__; \
- bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \
- return 1; \
-}
-
-SEC("cgroup_skb/egress")
-int read_sock_fields(struct __sk_buff *skb)
-{
- __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx;
- struct sockaddr_in6 *srv_sa6, *cli_sa6;
- struct bpf_tcp_sock *tp, *tp_ret;
- struct bpf_sock *sk, *sk_ret;
- __u32 linum, idx0 = 0;
-
- sk = skb->sk;
- if (!sk || sk->state == 10)
- RETURN;
-
- sk = bpf_sk_fullsock(sk);
- if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
- !is_loopback6(sk->src_ip6))
- RETURN;
-
- tp = bpf_tcp_sock(sk);
- if (!tp)
- RETURN;
-
- srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
- cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
- if (!srv_sa6 || !cli_sa6)
- RETURN;
-
- if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
- idx = srv_idx;
- else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
- idx = cli_idx;
- else
- RETURN;
-
- sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx);
- tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx);
- if (!sk_ret || !tp_ret)
- RETURN;
-
- skcpy(sk_ret, sk);
- tpcpy(tp_ret, tp);
-
- RETURN;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
-#undef SOCKMAP
-#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH
-#include "./test_sockmap_kern.h"
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
-#define SOCKMAP
-#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP
-#include "./test_sockmap_kern.h"
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2019 Facebook
-#include <linux/bpf.h>
-#include <linux/version.h>
-#include "bpf_helpers.h"
-
-struct hmap_elem {
- volatile int cnt;
- struct bpf_spin_lock lock;
- int test_padding;
-};
-
-struct bpf_map_def SEC("maps") hmap = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(struct hmap_elem),
- .max_entries = 1,
-};
-
-BPF_ANNOTATE_KV_PAIR(hmap, int, struct hmap_elem);
-
-
-struct cls_elem {
- struct bpf_spin_lock lock;
- volatile int cnt;
-};
-
-struct bpf_map_def SEC("maps") cls_map = {
- .type = BPF_MAP_TYPE_CGROUP_STORAGE,
- .key_size = sizeof(struct bpf_cgroup_storage_key),
- .value_size = sizeof(struct cls_elem),
-};
-
-BPF_ANNOTATE_KV_PAIR(cls_map, struct bpf_cgroup_storage_key,
- struct cls_elem);
-
-struct bpf_vqueue {
- struct bpf_spin_lock lock;
- /* 4 byte hole */
- unsigned long long lasttime;
- int credit;
- unsigned int rate;
-};
-
-struct bpf_map_def SEC("maps") vqueue = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct bpf_vqueue),
- .max_entries = 1,
-};
-
-BPF_ANNOTATE_KV_PAIR(vqueue, int, struct bpf_vqueue);
-#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
-
-SEC("spin_lock_demo")
-int bpf_sping_lock_test(struct __sk_buff *skb)
-{
- volatile int credit = 0, max_credit = 100, pkt_len = 64;
- struct hmap_elem zero = {}, *val;
- unsigned long long curtime;
- struct bpf_vqueue *q;
- struct cls_elem *cls;
- int key = 0;
- int err = 0;
-
- val = bpf_map_lookup_elem(&hmap, &key);
- if (!val) {
- bpf_map_update_elem(&hmap, &key, &zero, 0);
- val = bpf_map_lookup_elem(&hmap, &key);
- if (!val) {
- err = 1;
- goto err;
- }
- }
- /* spin_lock in hash map run time test */
- bpf_spin_lock(&val->lock);
- if (val->cnt)
- val->cnt--;
- else
- val->cnt++;
- if (val->cnt != 0 && val->cnt != 1)
- err = 1;
- bpf_spin_unlock(&val->lock);
-
- /* spin_lock in array. virtual queue demo */
- q = bpf_map_lookup_elem(&vqueue, &key);
- if (!q)
- goto err;
- curtime = bpf_ktime_get_ns();
- bpf_spin_lock(&q->lock);
- q->credit += CREDIT_PER_NS(curtime - q->lasttime, q->rate);
- q->lasttime = curtime;
- if (q->credit > max_credit)
- q->credit = max_credit;
- q->credit -= pkt_len;
- credit = q->credit;
- bpf_spin_unlock(&q->lock);
-
- /* spin_lock in cgroup local storage */
- cls = bpf_get_local_storage(&cls_map, 0);
- bpf_spin_lock(&cls->lock);
- cls->cnt++;
- bpf_spin_unlock(&cls->lock);
-
-err:
- return err;
-}
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Politecnico di Torino
-#define MAP_TYPE BPF_MAP_TYPE_STACK
-#include "test_queue_stack_map.h"
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-
-#ifndef PERF_MAX_STACK_DEPTH
-#define PERF_MAX_STACK_DEPTH 127
-#endif
-
-struct bpf_map_def SEC("maps") control_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") stackid_hmap = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 16384,
-};
-
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct bpf_stack_build_id)
- * PERF_MAX_STACK_DEPTH,
- .max_entries = 128,
- .map_flags = BPF_F_STACK_BUILD_ID,
-};
-
-struct bpf_map_def SEC("maps") stack_amap = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct bpf_stack_build_id)
- * PERF_MAX_STACK_DEPTH,
- .max_entries = 128,
-};
-
-/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
-struct random_urandom_args {
- unsigned long long pad;
- int got_bits;
- int pool_left;
- int input_left;
-};
-
-SEC("tracepoint/random/urandom_read")
-int oncpu(struct random_urandom_args *args)
-{
- __u32 max_len = sizeof(struct bpf_stack_build_id)
- * PERF_MAX_STACK_DEPTH;
- __u32 key = 0, val = 0, *value_p;
- void *stack_p;
-
- value_p = bpf_map_lookup_elem(&control_map, &key);
- if (value_p && *value_p)
- return 0; /* skip if non-zero *value_p */
-
- /* The size of stackmap and stackid_hmap should be the same */
- key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
- if ((int)key >= 0) {
- bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
- stack_p = bpf_map_lookup_elem(&stack_amap, &key);
- if (stack_p)
- bpf_get_stack(args, stack_p, max_len,
- BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
- }
-
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-
-#ifndef PERF_MAX_STACK_DEPTH
-#define PERF_MAX_STACK_DEPTH 127
-#endif
-
-struct bpf_map_def SEC("maps") control_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") stackid_hmap = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 16384,
-};
-
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
- .max_entries = 16384,
-};
-
-struct bpf_map_def SEC("maps") stack_amap = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
- .max_entries = 16384,
-};
-
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
-struct sched_switch_args {
- unsigned long long pad;
- char prev_comm[16];
- int prev_pid;
- int prev_prio;
- long long prev_state;
- char next_comm[16];
- int next_pid;
- int next_prio;
-};
-
-SEC("tracepoint/sched/sched_switch")
-int oncpu(struct sched_switch_args *ctx)
-{
- __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
- __u32 key = 0, val = 0, *value_p;
- void *stack_p;
-
- value_p = bpf_map_lookup_elem(&control_map, &key);
- if (value_p && *value_p)
- return 0; /* skip if non-zero *value_p */
-
- /* The size of stackmap and stackid_hmap should be the same */
- key = bpf_get_stackid(ctx, &stackmap, 0);
- if ((int)key >= 0) {
- bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
- stack_p = bpf_map_lookup_elem(&stack_amap, &key);
- if (stack_p)
- bpf_get_stack(ctx, stack_p, max_len, 0);
- }
-
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
+++ /dev/null
-/* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-
-/* This program shows clang/llvm is able to generate code pattern
- * like:
- * _tcp_send_active_reset:
- * 0: bf 16 00 00 00 00 00 00 r6 = r1
- * ......
- * 335: b7 01 00 00 0f 00 00 00 r1 = 15
- * 336: 05 00 48 00 00 00 00 00 goto 72
- *
- * LBB0_3:
- * 337: b7 01 00 00 01 00 00 00 r1 = 1
- * 338: 63 1a d0 ff 00 00 00 00 *(u32 *)(r10 - 48) = r1
- * 408: b7 01 00 00 03 00 00 00 r1 = 3
- *
- * LBB0_4:
- * 409: 71 a2 fe ff 00 00 00 00 r2 = *(u8 *)(r10 - 2)
- * 410: bf a7 00 00 00 00 00 00 r7 = r10
- * 411: 07 07 00 00 b8 ff ff ff r7 += -72
- * 412: bf 73 00 00 00 00 00 00 r3 = r7
- * 413: 0f 13 00 00 00 00 00 00 r3 += r1
- * 414: 73 23 2d 00 00 00 00 00 *(u8 *)(r3 + 45) = r2
- *
- * From the above code snippet, the code generated by the compiler
- * is reasonable. The "r1" is assigned to different values in basic
- * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4".
- * The verifier should be able to handle such code patterns.
- */
-#include <string.h>
-#include <linux/bpf.h>
-#include <linux/ipv6.h>
-#include <linux/version.h>
-#include <sys/socket.h>
-#include "bpf_helpers.h"
-
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
-#define TCP_ESTATS_MAGIC 0xBAADBEEF
-
-/* This test case needs "sock" and "pt_regs" data structure.
- * Recursively, "sock" needs "sock_common" and "inet_sock".
- * However, this is a unit test case only for
- * verifier purpose without bpf program execution.
- * We can safely mock much simpler data structures, basically
- * only taking the necessary fields from kernel headers.
- */
-typedef __u32 __bitwise __portpair;
-typedef __u64 __bitwise __addrpair;
-
-struct sock_common {
- unsigned short skc_family;
- union {
- __addrpair skc_addrpair;
- struct {
- __be32 skc_daddr;
- __be32 skc_rcv_saddr;
- };
- };
- union {
- __portpair skc_portpair;
- struct {
- __be16 skc_dport;
- __u16 skc_num;
- };
- };
- struct in6_addr skc_v6_daddr;
- struct in6_addr skc_v6_rcv_saddr;
-};
-
-struct sock {
- struct sock_common __sk_common;
-#define sk_family __sk_common.skc_family
-#define sk_v6_daddr __sk_common.skc_v6_daddr
-#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
-};
-
-struct inet_sock {
- struct sock sk;
-#define inet_daddr sk.__sk_common.skc_daddr
-#define inet_dport sk.__sk_common.skc_dport
- __be32 inet_saddr;
- __be16 inet_sport;
-};
-
-struct pt_regs {
- long di;
-};
-
-static inline struct inet_sock *inet_sk(const struct sock *sk)
-{
- return (struct inet_sock *)sk;
-}
-
-/* Define various data structures for state recording.
- * Some fields are not used due to test simplification.
- */
-enum tcp_estats_addrtype {
- TCP_ESTATS_ADDRTYPE_IPV4 = 1,
- TCP_ESTATS_ADDRTYPE_IPV6 = 2
-};
-
-enum tcp_estats_event_type {
- TCP_ESTATS_ESTABLISH,
- TCP_ESTATS_PERIODIC,
- TCP_ESTATS_TIMEOUT,
- TCP_ESTATS_RETRANSMIT_TIMEOUT,
- TCP_ESTATS_RETRANSMIT_OTHER,
- TCP_ESTATS_SYN_RETRANSMIT,
- TCP_ESTATS_SYNACK_RETRANSMIT,
- TCP_ESTATS_TERM,
- TCP_ESTATS_TX_RESET,
- TCP_ESTATS_RX_RESET,
- TCP_ESTATS_WRITE_TIMEOUT,
- TCP_ESTATS_CONN_TIMEOUT,
- TCP_ESTATS_ACK_LATENCY,
- TCP_ESTATS_NEVENTS,
-};
-
-struct tcp_estats_event {
- int pid;
- int cpu;
- unsigned long ts;
- unsigned int magic;
- enum tcp_estats_event_type event_type;
-};
-
-/* The below data structure is packed in order for
- * llvm compiler to generate expected code.
- */
-struct tcp_estats_conn_id {
- unsigned int localaddressType;
- struct {
- unsigned char data[16];
- } localaddress;
- struct {
- unsigned char data[16];
- } remaddress;
- unsigned short localport;
- unsigned short remport;
-} __attribute__((__packed__));
-
-struct tcp_estats_basic_event {
- struct tcp_estats_event event;
- struct tcp_estats_conn_id conn_id;
-};
-
-struct bpf_map_def SEC("maps") ev_record_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct tcp_estats_basic_event),
- .max_entries = 1024,
-};
-
-struct dummy_tracepoint_args {
- unsigned long long pad;
- struct sock *sock;
-};
-
-static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event,
- enum tcp_estats_event_type type)
-{
- event->magic = TCP_ESTATS_MAGIC;
- event->ts = bpf_ktime_get_ns();
- event->event_type = type;
-}
-
-static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from)
-{
- to[0] = _(from[0]);
- to[1] = _(from[1]);
- to[2] = _(from[2]);
- to[3] = _(from[3]);
-}
-
-static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id,
- __be32 *saddr, __be32 *daddr)
-{
- conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4;
-
- unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
- unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr);
-}
-
-static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id,
- __be32 *saddr, __be32 *daddr)
-{
- conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6;
-
- unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
- unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32),
- (__u8 *)(saddr + 1));
- unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2,
- (__u8 *)(saddr + 2));
- unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3,
- (__u8 *)(saddr + 3));
-
- unaligned_u32_set(conn_id->remaddress.data,
- (__u8 *)(daddr));
- unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32),
- (__u8 *)(daddr + 1));
- unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2,
- (__u8 *)(daddr + 2));
- unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3,
- (__u8 *)(daddr + 3));
-}
-
-static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id,
- struct sock *sk)
-{
- conn_id->localport = _(inet_sk(sk)->inet_sport);
- conn_id->remport = _(inet_sk(sk)->inet_dport);
-
- if (_(sk->sk_family) == AF_INET6)
- conn_id_ipv6_init(conn_id,
- sk->sk_v6_rcv_saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32);
- else
- conn_id_ipv4_init(conn_id,
- &inet_sk(sk)->inet_saddr,
- &inet_sk(sk)->inet_daddr);
-}
-
-static __always_inline void tcp_estats_init(struct sock *sk,
- struct tcp_estats_event *event,
- struct tcp_estats_conn_id *conn_id,
- enum tcp_estats_event_type type)
-{
- tcp_estats_ev_init(event, type);
- tcp_estats_conn_id_init(conn_id, sk);
-}
-
-static __always_inline void send_basic_event(struct sock *sk,
- enum tcp_estats_event_type type)
-{
- struct tcp_estats_basic_event ev;
- __u32 key = bpf_get_prandom_u32();
-
- memset(&ev, 0, sizeof(ev));
- tcp_estats_init(sk, &ev.event, &ev.conn_id, type);
- bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY);
-}
-
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
-{
- if (!arg->sock)
- return 0;
-
- send_basic_event(arg->sock, TCP_ESTATS_TX_RESET);
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-#include <stddef.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/tcp.h>
-#include <netinet/in.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-#include "test_tcpbpf.h"
-
-struct bpf_map_def SEC("maps") global_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct tcpbpf_globals),
- .max_entries = 4,
-};
-
-struct bpf_map_def SEC("maps") sockopt_results = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(int),
- .max_entries = 2,
-};
-
-static inline void update_event_map(int event)
-{
- __u32 key = 0;
- struct tcpbpf_globals g, *gp;
-
- gp = bpf_map_lookup_elem(&global_map, &key);
- if (gp == NULL) {
- struct tcpbpf_globals g = {0};
-
- g.event_map |= (1 << event);
- bpf_map_update_elem(&global_map, &key, &g,
- BPF_ANY);
- } else {
- g = *gp;
- g.event_map |= (1 << event);
- bpf_map_update_elem(&global_map, &key, &g,
- BPF_ANY);
- }
-}
-
-int _version SEC("version") = 1;
-
-SEC("sockops")
-int bpf_testcb(struct bpf_sock_ops *skops)
-{
- char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
- struct tcphdr *thdr;
- int good_call_rv = 0;
- int bad_call_rv = 0;
- int save_syn = 1;
- int rv = -1;
- int v = 0;
- int op;
-
- op = (int) skops->op;
-
- update_event_map(op);
-
- switch (op) {
- case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
- /* Test failure to set largest cb flag (assumes not defined) */
- bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
- /* Set callback */
- good_call_rv = bpf_sock_ops_cb_flags_set(skops,
- BPF_SOCK_OPS_STATE_CB_FLAG);
- /* Update results */
- {
- __u32 key = 0;
- struct tcpbpf_globals g, *gp;
-
- gp = bpf_map_lookup_elem(&global_map, &key);
- if (!gp)
- break;
- g = *gp;
- g.bad_cb_test_rv = bad_call_rv;
- g.good_cb_test_rv = good_call_rv;
- bpf_map_update_elem(&global_map, &key, &g,
- BPF_ANY);
- }
- break;
- case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
- skops->sk_txhash = 0x12345f;
- v = 0xff;
- rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
- sizeof(v));
- if (skops->family == AF_INET6) {
- v = bpf_getsockopt(skops, IPPROTO_TCP, TCP_SAVED_SYN,
- header, (sizeof(struct ipv6hdr) +
- sizeof(struct tcphdr)));
- if (!v) {
- int offset = sizeof(struct ipv6hdr);
-
- thdr = (struct tcphdr *)(header + offset);
- v = thdr->syn;
- __u32 key = 1;
-
- bpf_map_update_elem(&sockopt_results, &key, &v,
- BPF_ANY);
- }
- }
- break;
- case BPF_SOCK_OPS_RTO_CB:
- break;
- case BPF_SOCK_OPS_RETRANS_CB:
- break;
- case BPF_SOCK_OPS_STATE_CB:
- if (skops->args[1] == BPF_TCP_CLOSE) {
- __u32 key = 0;
- struct tcpbpf_globals g, *gp;
-
- gp = bpf_map_lookup_elem(&global_map, &key);
- if (!gp)
- break;
- g = *gp;
- if (skops->args[0] == BPF_TCP_LISTEN) {
- g.num_listen++;
- } else {
- g.total_retrans = skops->total_retrans;
- g.data_segs_in = skops->data_segs_in;
- g.data_segs_out = skops->data_segs_out;
- g.bytes_received = skops->bytes_received;
- g.bytes_acked = skops->bytes_acked;
- }
- bpf_map_update_elem(&global_map, &key, &g,
- BPF_ANY);
- }
- break;
- case BPF_SOCK_OPS_TCP_LISTEN_CB:
- bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
- v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN,
- &save_syn, sizeof(save_syn));
- /* Update global map w/ result of setsock opt */
- __u32 key = 0;
-
- bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY);
- break;
- default:
- rv = -1;
- }
- skops->reply = rv;
- return 1;
-}
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-#include <stddef.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/tcp.h>
-#include <netinet/in.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-#include "test_tcpnotify.h"
-
-struct bpf_map_def SEC("maps") global_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct tcpnotify_globals),
- .max_entries = 4,
-};
-
-struct bpf_map_def SEC("maps") perf_event_map = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(__u32),
- .max_entries = 2,
-};
-
-int _version SEC("version") = 1;
-
-SEC("sockops")
-int bpf_testcb(struct bpf_sock_ops *skops)
-{
- int rv = -1;
- int op;
-
- op = (int) skops->op;
-
- if (bpf_ntohl(skops->remote_port) != TESTPORT) {
- skops->reply = -1;
- return 0;
- }
-
- switch (op) {
- case BPF_SOCK_OPS_TIMEOUT_INIT:
- case BPF_SOCK_OPS_RWND_INIT:
- case BPF_SOCK_OPS_NEEDS_ECN:
- case BPF_SOCK_OPS_BASE_RTT:
- case BPF_SOCK_OPS_RTO_CB:
- rv = 1;
- break;
-
- case BPF_SOCK_OPS_TCP_CONNECT_CB:
- case BPF_SOCK_OPS_TCP_LISTEN_CB:
- case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
- case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
- bpf_sock_ops_cb_flags_set(skops, (BPF_SOCK_OPS_RETRANS_CB_FLAG|
- BPF_SOCK_OPS_RTO_CB_FLAG));
- rv = 1;
- break;
- case BPF_SOCK_OPS_RETRANS_CB: {
- __u32 key = 0;
- struct tcpnotify_globals g, *gp;
- struct tcp_notifier msg = {
- .type = 0xde,
- .subtype = 0xad,
- .source = 0xbe,
- .hash = 0xef,
- };
-
- rv = 1;
-
- /* Update results */
- gp = bpf_map_lookup_elem(&global_map, &key);
- if (!gp)
- break;
- g = *gp;
- g.total_retrans = skops->total_retrans;
- g.ncalls++;
- bpf_map_update_elem(&global_map, &key, &g,
- BPF_ANY);
- bpf_perf_event_output(skops, &perf_event_map,
- BPF_F_CURRENT_CPU,
- &msg, sizeof(msg));
- }
- break;
- default:
- rv = -1;
- }
- skops->reply = rv;
- return 1;
-}
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2017 Facebook
-
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
-struct sched_switch_args {
- unsigned long long pad;
- char prev_comm[16];
- int prev_pid;
- int prev_prio;
- long long prev_state;
- char next_comm[16];
- int next_pid;
- int next_prio;
-};
-
-SEC("tracepoint/sched/sched_switch")
-int oncpu(struct sched_switch_args *ctx)
-{
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2016 VMware
- * Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <stddef.h>
-#include <string.h>
-#include <arpa/inet.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <linux/tcp.h>
-#include <linux/socket.h>
-#include <linux/pkt_cls.h>
-#include <linux/erspan.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-#define ERROR(ret) do {\
- char fmt[] = "ERROR line:%d ret:%d\n";\
- bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
- } while (0)
-
-int _version SEC("version") = 1;
-
-struct geneve_opt {
- __be16 opt_class;
- __u8 type;
- __u8 length:5;
- __u8 r3:1;
- __u8 r2:1;
- __u8 r1:1;
- __u8 opt_data[8]; /* hard-coded to 8 byte */
-};
-
-struct vxlan_metadata {
- __u32 gbp;
-};
-
-SEC("gre_set_tunnel")
-int _gre_set_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- key.tunnel_id = 2;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("gre_get_tunnel")
-int _gre_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- char fmt[] = "key %d remote ip 0x%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
- return TC_ACT_OK;
-}
-
-SEC("ip6gretap_set_tunnel")
-int _ip6gretap_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key;
- int ret;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
- key.tunnel_id = 2;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
- key.tunnel_label = 0xabcde;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
- BPF_F_SEQ_NUMBER);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("ip6gretap_get_tunnel")
-int _ip6gretap_get_tunnel(struct __sk_buff *skb)
-{
- char fmt[] = "key %d remote ip6 ::%x label %x\n";
- struct bpf_tunnel_key key;
- int ret;
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
-
- return TC_ACT_OK;
-}
-
-SEC("erspan_set_tunnel")
-int _erspan_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key;
- struct erspan_metadata md;
- int ret;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- key.tunnel_id = 2;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_ZERO_CSUM_TX);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- __builtin_memset(&md, 0, sizeof(md));
-#ifdef ERSPAN_V1
- md.version = 1;
- md.u.index = bpf_htonl(123);
-#else
- __u8 direction = 1;
- __u8 hwid = 7;
-
- md.version = 2;
- md.u.md2.dir = direction;
- md.u.md2.hwid = hwid & 0xf;
- md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
-#endif
-
- ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("erspan_get_tunnel")
-int _erspan_get_tunnel(struct __sk_buff *skb)
-{
- char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
- struct bpf_tunnel_key key;
- struct erspan_metadata md;
- __u32 index;
- int ret;
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, md.version);
-
-#ifdef ERSPAN_V1
- char fmt2[] = "\tindex %x\n";
-
- index = bpf_ntohl(md.u.index);
- bpf_trace_printk(fmt2, sizeof(fmt2), index);
-#else
- char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
-
- bpf_trace_printk(fmt2, sizeof(fmt2),
- md.u.md2.dir,
- (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
- bpf_ntohl(md.u.md2.timestamp));
-#endif
-
- return TC_ACT_OK;
-}
-
-SEC("ip4ip6erspan_set_tunnel")
-int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key;
- struct erspan_metadata md;
- int ret;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv6[3] = bpf_htonl(0x11);
- key.tunnel_id = 2;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- __builtin_memset(&md, 0, sizeof(md));
-
-#ifdef ERSPAN_V1
- md.u.index = bpf_htonl(123);
- md.version = 1;
-#else
- __u8 direction = 0;
- __u8 hwid = 17;
-
- md.version = 2;
- md.u.md2.dir = direction;
- md.u.md2.hwid = hwid & 0xf;
- md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
-#endif
-
- ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("ip4ip6erspan_get_tunnel")
-int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
-{
- char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
- struct bpf_tunnel_key key;
- struct erspan_metadata md;
- __u32 index;
- int ret;
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, md.version);
-
-#ifdef ERSPAN_V1
- char fmt2[] = "\tindex %x\n";
-
- index = bpf_ntohl(md.u.index);
- bpf_trace_printk(fmt2, sizeof(fmt2), index);
-#else
- char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
-
- bpf_trace_printk(fmt2, sizeof(fmt2),
- md.u.md2.dir,
- (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
- bpf_ntohl(md.u.md2.timestamp));
-#endif
-
- return TC_ACT_OK;
-}
-
-SEC("vxlan_set_tunnel")
-int _vxlan_set_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- struct vxlan_metadata md;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- key.tunnel_id = 2;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_ZERO_CSUM_TX);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
- ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("vxlan_get_tunnel")
-int _vxlan_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- struct vxlan_metadata md;
- char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, md.gbp);
-
- return TC_ACT_OK;
-}
-
-SEC("ip6vxlan_set_tunnel")
-int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key;
- int ret;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
- key.tunnel_id = 22;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("ip6vxlan_get_tunnel")
-int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
-{
- char fmt[] = "key %d remote ip6 ::%x label %x\n";
- struct bpf_tunnel_key key;
- int ret;
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
-
- return TC_ACT_OK;
-}
-
-SEC("geneve_set_tunnel")
-int _geneve_set_tunnel(struct __sk_buff *skb)
-{
- int ret, ret2;
- struct bpf_tunnel_key key;
- struct geneve_opt gopt;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- key.tunnel_id = 2;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
-
- __builtin_memset(&gopt, 0x0, sizeof(gopt));
- gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
- gopt.type = 0x08;
- gopt.r1 = 0;
- gopt.r2 = 0;
- gopt.r3 = 0;
- gopt.length = 2; /* 4-byte multiple */
- *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_ZERO_CSUM_TX);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("geneve_get_tunnel")
-int _geneve_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- struct geneve_opt gopt;
- char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, gopt.opt_class);
- return TC_ACT_OK;
-}
-
-SEC("ip6geneve_set_tunnel")
-int _ip6geneve_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key;
- struct geneve_opt gopt;
- int ret;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
- key.tunnel_id = 22;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- __builtin_memset(&gopt, 0x0, sizeof(gopt));
- gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
- gopt.type = 0x08;
- gopt.r1 = 0;
- gopt.r2 = 0;
- gopt.r3 = 0;
- gopt.length = 2; /* 4-byte multiple */
- *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
-
- ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("ip6geneve_get_tunnel")
-int _ip6geneve_get_tunnel(struct __sk_buff *skb)
-{
- char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
- struct bpf_tunnel_key key;
- struct geneve_opt gopt;
- int ret;
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, gopt.opt_class);
-
- return TC_ACT_OK;
-}
-
-SEC("ipip_set_tunnel")
-int _ipip_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key = {};
- void *data = (void *)(long)skb->data;
- struct iphdr *iph = data;
- struct tcphdr *tcp = data + sizeof(*iph);
- void *data_end = (void *)(long)skb->data_end;
- int ret;
-
- /* single length check */
- if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
- ERROR(1);
- return TC_ACT_SHOT;
- }
-
- key.tunnel_ttl = 64;
- if (iph->protocol == IPPROTO_ICMP) {
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- } else {
- if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
- return TC_ACT_SHOT;
-
- if (tcp->dest == bpf_htons(5200))
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- else if (tcp->dest == bpf_htons(5201))
- key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
- else
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("ipip_get_tunnel")
-int _ipip_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- char fmt[] = "remote ip 0x%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
- return TC_ACT_OK;
-}
-
-SEC("ipip6_set_tunnel")
-int _ipip6_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key = {};
- void *data = (void *)(long)skb->data;
- struct iphdr *iph = data;
- struct tcphdr *tcp = data + sizeof(*iph);
- void *data_end = (void *)(long)skb->data_end;
- int ret;
-
- /* single length check */
- if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
- ERROR(1);
- return TC_ACT_SHOT;
- }
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
- key.tunnel_ttl = 64;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("ipip6_get_tunnel")
-int _ipip6_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- char fmt[] = "remote ip6 %x::%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
- bpf_htonl(key.remote_ipv6[3]));
- return TC_ACT_OK;
-}
-
-SEC("ip6ip6_set_tunnel")
-int _ip6ip6_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key = {};
- void *data = (void *)(long)skb->data;
- struct ipv6hdr *iph = data;
- struct tcphdr *tcp = data + sizeof(*iph);
- void *data_end = (void *)(long)skb->data_end;
- int ret;
-
- /* single length check */
- if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
- ERROR(1);
- return TC_ACT_SHOT;
- }
-
- key.remote_ipv6[0] = bpf_htonl(0x2401db00);
- key.tunnel_ttl = 64;
-
- if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
- key.remote_ipv6[3] = bpf_htonl(1);
- } else {
- if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
- ERROR(iph->nexthdr);
- return TC_ACT_SHOT;
- }
-
- if (tcp->dest == bpf_htons(5200)) {
- key.remote_ipv6[3] = bpf_htonl(1);
- } else if (tcp->dest == bpf_htons(5201)) {
- key.remote_ipv6[3] = bpf_htonl(2);
- } else {
- ERROR(tcp->dest);
- return TC_ACT_SHOT;
- }
- }
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("ip6ip6_get_tunnel")
-int _ip6ip6_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- char fmt[] = "remote ip6 %x::%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
- bpf_htonl(key.remote_ipv6[3]));
- return TC_ACT_OK;
-}
-
-SEC("xfrm_get_state")
-int _xfrm_get_state(struct __sk_buff *skb)
-{
- struct bpf_xfrm_state x;
- char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
- int ret;
-
- ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
- if (ret < 0)
- return TC_ACT_OK;
-
- bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
- bpf_ntohl(x.remote_ipv4));
- return TC_ACT_OK;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-/* Copyright (c) 2016,2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <stddef.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/in.h>
-#include <linux/udp.h>
-#include <linux/tcp.h>
-#include <linux/pkt_cls.h>
-#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-#include "test_iptunnel_common.h"
-
-int _version SEC("version") = 1;
-
-struct bpf_map_def SEC("maps") rxcnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64),
- .max_entries = 256,
-};
-
-struct bpf_map_def SEC("maps") vip2tnl = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct vip),
- .value_size = sizeof(struct iptnl_info),
- .max_entries = MAX_IPTNL_ENTRIES,
-};
-
-static __always_inline void count_tx(__u32 protocol)
-{
- __u64 *rxcnt_count;
-
- rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
- if (rxcnt_count)
- *rxcnt_count += 1;
-}
-
-static __always_inline int get_dport(void *trans_data, void *data_end,
- __u8 protocol)
-{
- struct tcphdr *th;
- struct udphdr *uh;
-
- switch (protocol) {
- case IPPROTO_TCP:
- th = (struct tcphdr *)trans_data;
- if (th + 1 > data_end)
- return -1;
- return th->dest;
- case IPPROTO_UDP:
- uh = (struct udphdr *)trans_data;
- if (uh + 1 > data_end)
- return -1;
- return uh->dest;
- default:
- return 0;
- }
-}
-
-static __always_inline void set_ethhdr(struct ethhdr *new_eth,
- const struct ethhdr *old_eth,
- const struct iptnl_info *tnl,
- __be16 h_proto)
-{
- memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
- memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
- new_eth->h_proto = h_proto;
-}
-
-static __always_inline int handle_ipv4(struct xdp_md *xdp)
-{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
- struct iptnl_info *tnl;
- struct ethhdr *new_eth;
- struct ethhdr *old_eth;
- struct iphdr *iph = data + sizeof(struct ethhdr);
- __u16 *next_iph;
- __u16 payload_len;
- struct vip vip = {};
- int dport;
- __u32 csum = 0;
- int i;
-
- if (iph + 1 > data_end)
- return XDP_DROP;
-
- dport = get_dport(iph + 1, data_end, iph->protocol);
- if (dport == -1)
- return XDP_DROP;
-
- vip.protocol = iph->protocol;
- vip.family = AF_INET;
- vip.daddr.v4 = iph->daddr;
- vip.dport = dport;
- payload_len = bpf_ntohs(iph->tot_len);
-
- tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
- /* It only does v4-in-v4 */
- if (!tnl || tnl->family != AF_INET)
- return XDP_PASS;
-
- if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
- return XDP_DROP;
-
- data = (void *)(long)xdp->data;
- data_end = (void *)(long)xdp->data_end;
-
- new_eth = data;
- iph = data + sizeof(*new_eth);
- old_eth = data + sizeof(*iph);
-
- if (new_eth + 1 > data_end ||
- old_eth + 1 > data_end ||
- iph + 1 > data_end)
- return XDP_DROP;
-
- set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
-
- iph->version = 4;
- iph->ihl = sizeof(*iph) >> 2;
- iph->frag_off = 0;
- iph->protocol = IPPROTO_IPIP;
- iph->check = 0;
- iph->tos = 0;
- iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
- iph->daddr = tnl->daddr.v4;
- iph->saddr = tnl->saddr.v4;
- iph->ttl = 8;
-
- next_iph = (__u16 *)iph;
-#pragma clang loop unroll(full)
- for (i = 0; i < sizeof(*iph) >> 1; i++)
- csum += *next_iph++;
-
- iph->check = ~((csum & 0xffff) + (csum >> 16));
-
- count_tx(vip.protocol);
-
- return XDP_TX;
-}
-
-static __always_inline int handle_ipv6(struct xdp_md *xdp)
-{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
- struct iptnl_info *tnl;
- struct ethhdr *new_eth;
- struct ethhdr *old_eth;
- struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
- __u16 payload_len;
- struct vip vip = {};
- int dport;
-
- if (ip6h + 1 > data_end)
- return XDP_DROP;
-
- dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
- if (dport == -1)
- return XDP_DROP;
-
- vip.protocol = ip6h->nexthdr;
- vip.family = AF_INET6;
- memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
- vip.dport = dport;
- payload_len = ip6h->payload_len;
-
- tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
- /* It only does v6-in-v6 */
- if (!tnl || tnl->family != AF_INET6)
- return XDP_PASS;
-
- if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
- return XDP_DROP;
-
- data = (void *)(long)xdp->data;
- data_end = (void *)(long)xdp->data_end;
-
- new_eth = data;
- ip6h = data + sizeof(*new_eth);
- old_eth = data + sizeof(*ip6h);
-
- if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
- ip6h + 1 > data_end)
- return XDP_DROP;
-
- set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
-
- ip6h->version = 6;
- ip6h->priority = 0;
- memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
- ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
- ip6h->nexthdr = IPPROTO_IPV6;
- ip6h->hop_limit = 8;
- memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
- memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
-
- count_tx(vip.protocol);
-
- return XDP_TX;
-}
-
-SEC("xdp_tx_iptunnel")
-int _xdp_tx_iptunnel(struct xdp_md *xdp)
-{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
- struct ethhdr *eth = data;
- __u16 h_proto;
-
- if (eth + 1 > data_end)
- return XDP_DROP;
-
- h_proto = eth->h_proto;
-
- if (h_proto == bpf_htons(ETH_P_IP))
- return handle_ipv4(xdp);
- else if (h_proto == bpf_htons(ETH_P_IPV6))
-
- return handle_ipv6(xdp);
- else
- return XDP_DROP;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/pkt_cls.h>
-
-#include "bpf_helpers.h"
-
-#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
-#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
-#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
-
-SEC("t")
-int ing_cls(struct __sk_buff *ctx)
-{
- __u8 *data, *data_meta, *data_end;
- __u32 diff = 0;
-
- data_meta = ctx_ptr(ctx, data_meta);
- data_end = ctx_ptr(ctx, data_end);
- data = ctx_ptr(ctx, data);
-
- if (data + ETH_ALEN > data_end ||
- data_meta + round_up(ETH_ALEN, 4) > data)
- return TC_ACT_SHOT;
-
- diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
- diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
-
- return diff ? TC_ACT_SHOT : TC_ACT_OK;
-}
-
-SEC("x")
-int ing_xdp(struct xdp_md *ctx)
-{
- __u8 *data, *data_meta, *data_end;
- int ret;
-
- ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
- if (ret < 0)
- return XDP_DROP;
-
- data_meta = ctx_ptr(ctx, data_meta);
- data_end = ctx_ptr(ctx, data_end);
- data = ctx_ptr(ctx, data);
-
- if (data + ETH_ALEN > data_end ||
- data_meta + round_up(ETH_ALEN, 4) > data)
- return XDP_DROP;
-
- __builtin_memcpy(data_meta, data, ETH_ALEN);
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2017 Facebook
-#include <stddef.h>
-#include <stdbool.h>
-#include <string.h>
-#include <linux/pkt_cls.h>
-#include <linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/icmp.h>
-#include <linux/icmpv6.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include "bpf_helpers.h"
-
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
-static __u32 rol32(__u32 word, unsigned int shift)
-{
- return (word << shift) | (word >> ((-shift) & 31));
-}
-
-/* copy paste of jhash from kernel sources to make sure llvm
- * can compile it into valid sequence of bpf instructions
- */
-#define __jhash_mix(a, b, c) \
-{ \
- a -= c; a ^= rol32(c, 4); c += b; \
- b -= a; b ^= rol32(a, 6); a += c; \
- c -= b; c ^= rol32(b, 8); b += a; \
- a -= c; a ^= rol32(c, 16); c += b; \
- b -= a; b ^= rol32(a, 19); a += c; \
- c -= b; c ^= rol32(b, 4); b += a; \
-}
-
-#define __jhash_final(a, b, c) \
-{ \
- c ^= b; c -= rol32(b, 14); \
- a ^= c; a -= rol32(c, 11); \
- b ^= a; b -= rol32(a, 25); \
- c ^= b; c -= rol32(b, 16); \
- a ^= c; a -= rol32(c, 4); \
- b ^= a; b -= rol32(a, 14); \
- c ^= b; c -= rol32(b, 24); \
-}
-
-#define JHASH_INITVAL 0xdeadbeef
-
-typedef unsigned int u32;
-
-static __attribute__ ((noinline))
-u32 jhash(const void *key, u32 length, u32 initval)
-{
- u32 a, b, c;
- const unsigned char *k = key;
-
- a = b = c = JHASH_INITVAL + length + initval;
-
- while (length > 12) {
- a += *(u32 *)(k);
- b += *(u32 *)(k + 4);
- c += *(u32 *)(k + 8);
- __jhash_mix(a, b, c);
- length -= 12;
- k += 12;
- }
- switch (length) {
- case 12: c += (u32)k[11]<<24;
- case 11: c += (u32)k[10]<<16;
- case 10: c += (u32)k[9]<<8;
- case 9: c += k[8];
- case 8: b += (u32)k[7]<<24;
- case 7: b += (u32)k[6]<<16;
- case 6: b += (u32)k[5]<<8;
- case 5: b += k[4];
- case 4: a += (u32)k[3]<<24;
- case 3: a += (u32)k[2]<<16;
- case 2: a += (u32)k[1]<<8;
- case 1: a += k[0];
- __jhash_final(a, b, c);
- case 0: /* Nothing left to add */
- break;
- }
-
- return c;
-}
-
-static __attribute__ ((noinline))
-u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
-{
- a += initval;
- b += initval;
- c += initval;
- __jhash_final(a, b, c);
- return c;
-}
-
-static __attribute__ ((noinline))
-u32 jhash_2words(u32 a, u32 b, u32 initval)
-{
- return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
-}
-
-struct flow_key {
- union {
- __be32 src;
- __be32 srcv6[4];
- };
- union {
- __be32 dst;
- __be32 dstv6[4];
- };
- union {
- __u32 ports;
- __u16 port16[2];
- };
- __u8 proto;
-};
-
-struct packet_description {
- struct flow_key flow;
- __u8 flags;
-};
-
-struct ctl_value {
- union {
- __u64 value;
- __u32 ifindex;
- __u8 mac[6];
- };
-};
-
-struct vip_definition {
- union {
- __be32 vip;
- __be32 vipv6[4];
- };
- __u16 port;
- __u16 family;
- __u8 proto;
-};
-
-struct vip_meta {
- __u32 flags;
- __u32 vip_num;
-};
-
-struct real_pos_lru {
- __u32 pos;
- __u64 atime;
-};
-
-struct real_definition {
- union {
- __be32 dst;
- __be32 dstv6[4];
- };
- __u8 flags;
-};
-
-struct lb_stats {
- __u64 v2;
- __u64 v1;
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct vip_definition),
- .value_size = sizeof(struct vip_meta),
- .max_entries = 512,
- .map_flags = 0,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(struct flow_key),
- .value_size = sizeof(struct real_pos_lru),
- .max_entries = 300,
- .map_flags = 1U << 1,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 12 * 655,
- .map_flags = 0,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct real_definition),
- .max_entries = 40,
- .map_flags = 0,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct lb_stats),
- .max_entries = 515,
- .map_flags = 0,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct ctl_value),
- .max_entries = 16,
- .map_flags = 0,
-};
-
-struct eth_hdr {
- unsigned char eth_dest[6];
- unsigned char eth_source[6];
- unsigned short eth_proto;
-};
-
-static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
-{
- __u64 off = sizeof(struct eth_hdr);
- if (is_ipv6) {
- off += sizeof(struct ipv6hdr);
- if (is_icmp)
- off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
- } else {
- off += sizeof(struct iphdr);
- if (is_icmp)
- off += sizeof(struct icmphdr) + sizeof(struct iphdr);
- }
- return off;
-}
-
-static __attribute__ ((noinline))
-bool parse_udp(void *data, void *data_end,
- bool is_ipv6, struct packet_description *pckt)
-{
-
- bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
- __u64 off = calc_offset(is_ipv6, is_icmp);
- struct udphdr *udp;
- udp = data + off;
-
- if (udp + 1 > data_end)
- return 0;
- if (!is_icmp) {
- pckt->flow.port16[0] = udp->source;
- pckt->flow.port16[1] = udp->dest;
- } else {
- pckt->flow.port16[0] = udp->dest;
- pckt->flow.port16[1] = udp->source;
- }
- return 1;
-}
-
-static __attribute__ ((noinline))
-bool parse_tcp(void *data, void *data_end,
- bool is_ipv6, struct packet_description *pckt)
-{
-
- bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
- __u64 off = calc_offset(is_ipv6, is_icmp);
- struct tcphdr *tcp;
-
- tcp = data + off;
- if (tcp + 1 > data_end)
- return 0;
- if (tcp->syn)
- pckt->flags |= (1 << 1);
- if (!is_icmp) {
- pckt->flow.port16[0] = tcp->source;
- pckt->flow.port16[1] = tcp->dest;
- } else {
- pckt->flow.port16[0] = tcp->dest;
- pckt->flow.port16[1] = tcp->source;
- }
- return 1;
-}
-
-static __attribute__ ((noinline))
-bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
- struct packet_description *pckt,
- struct real_definition *dst, __u32 pkt_bytes)
-{
- struct eth_hdr *new_eth;
- struct eth_hdr *old_eth;
- struct ipv6hdr *ip6h;
- __u32 ip_suffix;
- void *data_end;
- void *data;
-
- if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
- return 0;
- data = (void *)(long)xdp->data;
- data_end = (void *)(long)xdp->data_end;
- new_eth = data;
- ip6h = data + sizeof(struct eth_hdr);
- old_eth = data + sizeof(struct ipv6hdr);
- if (new_eth + 1 > data_end ||
- old_eth + 1 > data_end || ip6h + 1 > data_end)
- return 0;
- memcpy(new_eth->eth_dest, cval->mac, 6);
- memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
- new_eth->eth_proto = 56710;
- ip6h->version = 6;
- ip6h->priority = 0;
- memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
-
- ip6h->nexthdr = IPPROTO_IPV6;
- ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
- ip6h->payload_len =
- __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
- ip6h->hop_limit = 4;
-
- ip6h->saddr.in6_u.u6_addr32[0] = 1;
- ip6h->saddr.in6_u.u6_addr32[1] = 2;
- ip6h->saddr.in6_u.u6_addr32[2] = 3;
- ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
- memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
- return 1;
-}
-
-static __attribute__ ((noinline))
-bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
- struct packet_description *pckt,
- struct real_definition *dst, __u32 pkt_bytes)
-{
-
- __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
- struct eth_hdr *new_eth;
- struct eth_hdr *old_eth;
- __u16 *next_iph_u16;
- struct iphdr *iph;
- __u32 csum = 0;
- void *data_end;
- void *data;
-
- ip_suffix <<= 15;
- ip_suffix ^= pckt->flow.src;
- if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
- return 0;
- data = (void *)(long)xdp->data;
- data_end = (void *)(long)xdp->data_end;
- new_eth = data;
- iph = data + sizeof(struct eth_hdr);
- old_eth = data + sizeof(struct iphdr);
- if (new_eth + 1 > data_end ||
- old_eth + 1 > data_end || iph + 1 > data_end)
- return 0;
- memcpy(new_eth->eth_dest, cval->mac, 6);
- memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
- new_eth->eth_proto = 8;
- iph->version = 4;
- iph->ihl = 5;
- iph->frag_off = 0;
- iph->protocol = IPPROTO_IPIP;
- iph->check = 0;
- iph->tos = 1;
- iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
- /* don't update iph->daddr, since it will overwrite old eth_proto
- * and multiple iterations of bpf_prog_run() will fail
- */
-
- iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
- iph->ttl = 4;
-
- next_iph_u16 = (__u16 *) iph;
-#pragma clang loop unroll(full)
- for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
- csum += *next_iph_u16++;
- iph->check = ~((csum & 0xffff) + (csum >> 16));
- if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
- return 0;
- return 1;
-}
-
-static __attribute__ ((noinline))
-bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
-{
- struct eth_hdr *new_eth;
- struct eth_hdr *old_eth;
-
- old_eth = *data;
- new_eth = *data + sizeof(struct ipv6hdr);
- memcpy(new_eth->eth_source, old_eth->eth_source, 6);
- memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
- if (inner_v4)
- new_eth->eth_proto = 8;
- else
- new_eth->eth_proto = 56710;
- if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
- return 0;
- *data = (void *)(long)xdp->data;
- *data_end = (void *)(long)xdp->data_end;
- return 1;
-}
-
-static __attribute__ ((noinline))
-bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
-{
- struct eth_hdr *new_eth;
- struct eth_hdr *old_eth;
-
- old_eth = *data;
- new_eth = *data + sizeof(struct iphdr);
- memcpy(new_eth->eth_source, old_eth->eth_source, 6);
- memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
- new_eth->eth_proto = 8;
- if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
- return 0;
- *data = (void *)(long)xdp->data;
- *data_end = (void *)(long)xdp->data_end;
- return 1;
-}
-
-static __attribute__ ((noinline))
-int swap_mac_and_send(void *data, void *data_end)
-{
- unsigned char tmp_mac[6];
- struct eth_hdr *eth;
-
- eth = data;
- memcpy(tmp_mac, eth->eth_source, 6);
- memcpy(eth->eth_source, eth->eth_dest, 6);
- memcpy(eth->eth_dest, tmp_mac, 6);
- return XDP_TX;
-}
-
-static __attribute__ ((noinline))
-int send_icmp_reply(void *data, void *data_end)
-{
- struct icmphdr *icmp_hdr;
- __u16 *next_iph_u16;
- __u32 tmp_addr = 0;
- struct iphdr *iph;
- __u32 csum1 = 0;
- __u32 csum = 0;
- __u64 off = 0;
-
- if (data + sizeof(struct eth_hdr)
- + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
- return XDP_DROP;
- off += sizeof(struct eth_hdr);
- iph = data + off;
- off += sizeof(struct iphdr);
- icmp_hdr = data + off;
- icmp_hdr->type = 0;
- icmp_hdr->checksum += 0x0007;
- iph->ttl = 4;
- tmp_addr = iph->daddr;
- iph->daddr = iph->saddr;
- iph->saddr = tmp_addr;
- iph->check = 0;
- next_iph_u16 = (__u16 *) iph;
-#pragma clang loop unroll(full)
- for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
- csum += *next_iph_u16++;
- iph->check = ~((csum & 0xffff) + (csum >> 16));
- return swap_mac_and_send(data, data_end);
-}
-
-static __attribute__ ((noinline))
-int send_icmp6_reply(void *data, void *data_end)
-{
- struct icmp6hdr *icmp_hdr;
- struct ipv6hdr *ip6h;
- __be32 tmp_addr[4];
- __u64 off = 0;
-
- if (data + sizeof(struct eth_hdr)
- + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
- return XDP_DROP;
- off += sizeof(struct eth_hdr);
- ip6h = data + off;
- off += sizeof(struct ipv6hdr);
- icmp_hdr = data + off;
- icmp_hdr->icmp6_type = 129;
- icmp_hdr->icmp6_cksum -= 0x0001;
- ip6h->hop_limit = 4;
- memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
- memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
- memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
- return swap_mac_and_send(data, data_end);
-}
-
-static __attribute__ ((noinline))
-int parse_icmpv6(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
-{
- struct icmp6hdr *icmp_hdr;
- struct ipv6hdr *ip6h;
-
- icmp_hdr = data + off;
- if (icmp_hdr + 1 > data_end)
- return XDP_DROP;
- if (icmp_hdr->icmp6_type == 128)
- return send_icmp6_reply(data, data_end);
- if (icmp_hdr->icmp6_type != 3)
- return XDP_PASS;
- off += sizeof(struct icmp6hdr);
- ip6h = data + off;
- if (ip6h + 1 > data_end)
- return XDP_DROP;
- pckt->flow.proto = ip6h->nexthdr;
- pckt->flags |= (1 << 0);
- memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
- memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
- return -1;
-}
-
-static __attribute__ ((noinline))
-int parse_icmp(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
-{
- struct icmphdr *icmp_hdr;
- struct iphdr *iph;
-
- icmp_hdr = data + off;
- if (icmp_hdr + 1 > data_end)
- return XDP_DROP;
- if (icmp_hdr->type == 8)
- return send_icmp_reply(data, data_end);
- if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
- return XDP_PASS;
- off += sizeof(struct icmphdr);
- iph = data + off;
- if (iph + 1 > data_end)
- return XDP_DROP;
- if (iph->ihl != 5)
- return XDP_DROP;
- pckt->flow.proto = iph->protocol;
- pckt->flags |= (1 << 0);
- pckt->flow.src = iph->daddr;
- pckt->flow.dst = iph->saddr;
- return -1;
-}
-
-static __attribute__ ((noinline))
-__u32 get_packet_hash(struct packet_description *pckt,
- bool hash_16bytes)
-{
- if (hash_16bytes)
- return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
- pckt->flow.ports, 24);
- else
- return jhash_2words(pckt->flow.src, pckt->flow.ports,
- 24);
-}
-
-__attribute__ ((noinline))
-static bool get_packet_dst(struct real_definition **real,
- struct packet_description *pckt,
- struct vip_meta *vip_info,
- bool is_ipv6, void *lru_map)
-{
- struct real_pos_lru new_dst_lru = { };
- bool hash_16bytes = is_ipv6;
- __u32 *real_pos, hash, key;
- __u64 cur_time;
-
- if (vip_info->flags & (1 << 2))
- hash_16bytes = 1;
- if (vip_info->flags & (1 << 3)) {
- pckt->flow.port16[0] = pckt->flow.port16[1];
- memset(pckt->flow.srcv6, 0, 16);
- }
- hash = get_packet_hash(pckt, hash_16bytes);
- if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
- hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
- return 0;
- key = 2 * vip_info->vip_num + hash % 2;
- real_pos = bpf_map_lookup_elem(&ch_rings, &key);
- if (!real_pos)
- return 0;
- key = *real_pos;
- *real = bpf_map_lookup_elem(&reals, &key);
- if (!(*real))
- return 0;
- if (!(vip_info->flags & (1 << 1))) {
- __u32 conn_rate_key = 512 + 2;
- struct lb_stats *conn_rate_stats =
- bpf_map_lookup_elem(&stats, &conn_rate_key);
-
- if (!conn_rate_stats)
- return 1;
- cur_time = bpf_ktime_get_ns();
- if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
- conn_rate_stats->v1 = 1;
- conn_rate_stats->v2 = cur_time;
- } else {
- conn_rate_stats->v1 += 1;
- if (conn_rate_stats->v1 >= 1)
- return 1;
- }
- if (pckt->flow.proto == IPPROTO_UDP)
- new_dst_lru.atime = cur_time;
- new_dst_lru.pos = key;
- bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
- }
- return 1;
-}
-
-__attribute__ ((noinline))
-static void connection_table_lookup(struct real_definition **real,
- struct packet_description *pckt,
- void *lru_map)
-{
-
- struct real_pos_lru *dst_lru;
- __u64 cur_time;
- __u32 key;
-
- dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
- if (!dst_lru)
- return;
- if (pckt->flow.proto == IPPROTO_UDP) {
- cur_time = bpf_ktime_get_ns();
- if (cur_time - dst_lru->atime > 300000)
- return;
- dst_lru->atime = cur_time;
- }
- key = dst_lru->pos;
- *real = bpf_map_lookup_elem(&reals, &key);
-}
-
-/* don't believe your eyes!
- * below function has 6 arguments whereas bpf and llvm allow maximum of 5
- * but since it's _static_ llvm can optimize one argument away
- */
-__attribute__ ((noinline))
-static int process_l3_headers_v6(struct packet_description *pckt,
- __u8 *protocol, __u64 off,
- __u16 *pkt_bytes, void *data,
- void *data_end)
-{
- struct ipv6hdr *ip6h;
- __u64 iph_len;
- int action;
-
- ip6h = data + off;
- if (ip6h + 1 > data_end)
- return XDP_DROP;
- iph_len = sizeof(struct ipv6hdr);
- *protocol = ip6h->nexthdr;
- pckt->flow.proto = *protocol;
- *pkt_bytes = __builtin_bswap16(ip6h->payload_len);
- off += iph_len;
- if (*protocol == 45) {
- return XDP_DROP;
- } else if (*protocol == 59) {
- action = parse_icmpv6(data, data_end, off, pckt);
- if (action >= 0)
- return action;
- } else {
- memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
- memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
- }
- return -1;
-}
-
-__attribute__ ((noinline))
-static int process_l3_headers_v4(struct packet_description *pckt,
- __u8 *protocol, __u64 off,
- __u16 *pkt_bytes, void *data,
- void *data_end)
-{
- struct iphdr *iph;
- __u64 iph_len;
- int action;
-
- iph = data + off;
- if (iph + 1 > data_end)
- return XDP_DROP;
- if (iph->ihl != 5)
- return XDP_DROP;
- *protocol = iph->protocol;
- pckt->flow.proto = *protocol;
- *pkt_bytes = __builtin_bswap16(iph->tot_len);
- off += 20;
- if (iph->frag_off & 65343)
- return XDP_DROP;
- if (*protocol == IPPROTO_ICMP) {
- action = parse_icmp(data, data_end, off, pckt);
- if (action >= 0)
- return action;
- } else {
- pckt->flow.src = iph->saddr;
- pckt->flow.dst = iph->daddr;
- }
- return -1;
-}
-
-__attribute__ ((noinline))
-static int process_packet(void *data, __u64 off, void *data_end,
- bool is_ipv6, struct xdp_md *xdp)
-{
-
- struct real_definition *dst = NULL;
- struct packet_description pckt = { };
- struct vip_definition vip = { };
- struct lb_stats *data_stats;
- struct eth_hdr *eth = data;
- void *lru_map = &lru_cache;
- struct vip_meta *vip_info;
- __u32 lru_stats_key = 513;
- __u32 mac_addr_pos = 0;
- __u32 stats_key = 512;
- struct ctl_value *cval;
- __u16 pkt_bytes;
- __u64 iph_len;
- __u8 protocol;
- __u32 vip_num;
- int action;
-
- if (is_ipv6)
- action = process_l3_headers_v6(&pckt, &protocol, off,
- &pkt_bytes, data, data_end);
- else
- action = process_l3_headers_v4(&pckt, &protocol, off,
- &pkt_bytes, data, data_end);
- if (action >= 0)
- return action;
- protocol = pckt.flow.proto;
- if (protocol == IPPROTO_TCP) {
- if (!parse_tcp(data, data_end, is_ipv6, &pckt))
- return XDP_DROP;
- } else if (protocol == IPPROTO_UDP) {
- if (!parse_udp(data, data_end, is_ipv6, &pckt))
- return XDP_DROP;
- } else {
- return XDP_TX;
- }
-
- if (is_ipv6)
- memcpy(vip.vipv6, pckt.flow.dstv6, 16);
- else
- vip.vip = pckt.flow.dst;
- vip.port = pckt.flow.port16[1];
- vip.proto = pckt.flow.proto;
- vip_info = bpf_map_lookup_elem(&vip_map, &vip);
- if (!vip_info) {
- vip.port = 0;
- vip_info = bpf_map_lookup_elem(&vip_map, &vip);
- if (!vip_info)
- return XDP_PASS;
- if (!(vip_info->flags & (1 << 4)))
- pckt.flow.port16[1] = 0;
- }
- if (data_end - data > 1400)
- return XDP_DROP;
- data_stats = bpf_map_lookup_elem(&stats, &stats_key);
- if (!data_stats)
- return XDP_DROP;
- data_stats->v1 += 1;
- if (!dst) {
- if (vip_info->flags & (1 << 0))
- pckt.flow.port16[0] = 0;
- if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
- connection_table_lookup(&dst, &pckt, lru_map);
- if (dst)
- goto out;
- if (pckt.flow.proto == IPPROTO_TCP) {
- struct lb_stats *lru_stats =
- bpf_map_lookup_elem(&stats, &lru_stats_key);
-
- if (!lru_stats)
- return XDP_DROP;
- if (pckt.flags & (1 << 1))
- lru_stats->v1 += 1;
- else
- lru_stats->v2 += 1;
- }
- if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
- return XDP_DROP;
- data_stats->v2 += 1;
- }
-out:
- cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
- if (!cval)
- return XDP_DROP;
- if (dst->flags & (1 << 0)) {
- if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
- return XDP_DROP;
- } else {
- if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
- return XDP_DROP;
- }
- vip_num = vip_info->vip_num;
- data_stats = bpf_map_lookup_elem(&stats, &vip_num);
- if (!data_stats)
- return XDP_DROP;
- data_stats->v1 += 1;
- data_stats->v2 += pkt_bytes;
-
- data = (void *)(long)xdp->data;
- data_end = (void *)(long)xdp->data_end;
- if (data + 4 > data_end)
- return XDP_DROP;
- *(u32 *)data = dst->dst;
- return XDP_DROP;
-}
-
-__attribute__ ((section("xdp-test"), used))
-int balancer_ingress(struct xdp_md *ctx)
-{
- void *data = (void *)(long)ctx->data;
- void *data_end = (void *)(long)ctx->data_end;
- struct eth_hdr *eth = data;
- __u32 eth_proto;
- __u32 nh_off;
-
- nh_off = sizeof(struct eth_hdr);
- if (data + nh_off > data_end)
- return XDP_DROP;
- eth_proto = eth->eth_proto;
- if (eth_proto == 8)
- return process_packet(data, nh_off, data_end, 0, ctx);
- else if (eth_proto == 56710)
- return process_packet(data, nh_off, data_end, 1, ctx);
- else
- return XDP_DROP;
-}
-
-char _license[] __attribute__ ((section("license"), used)) = "GPL";
-int _version __attribute__ ((section("version"), used)) = 1;
+++ /dev/null
-/* Copyright (c) 2017 VMware
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-
-int _version SEC("version") = 1;
-
-SEC("redirect_to_111")
-int xdp_redirect_to_111(struct xdp_md *xdp)
-{
- return bpf_redirect(111, 0);
-}
-SEC("redirect_to_222")
-int xdp_redirect_to_222(struct xdp_md *xdp)
-{
- return bpf_redirect(222, 0);
-}
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright(c) 2018 Jesper Dangaard Brouer.
- *
- * XDP/TC VLAN manipulation example
- *
- * GOTCHA: Remember to disable NIC hardware offloading of VLANs,
- * else the VLAN tags are NOT inlined in the packet payload:
- *
- * # ethtool -K ixgbe2 rxvlan off
- *
- * Verify setting:
- * # ethtool -k ixgbe2 | grep rx-vlan-offload
- * rx-vlan-offload: off
- *
- */
-#include <stddef.h>
-#include <stdbool.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <linux/in.h>
-#include <linux/pkt_cls.h>
-
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
-
-/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
- *
- * struct vlan_hdr - vlan header
- * @h_vlan_TCI: priority and VLAN ID
- * @h_vlan_encapsulated_proto: packet type ID or len
- */
-struct _vlan_hdr {
- __be16 h_vlan_TCI;
- __be16 h_vlan_encapsulated_proto;
-};
-#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */
-#define VLAN_PRIO_SHIFT 13
-#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */
-#define VLAN_TAG_PRESENT VLAN_CFI_MASK
-#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */
-#define VLAN_N_VID 4096
-
-struct parse_pkt {
- __u16 l3_proto;
- __u16 l3_offset;
- __u16 vlan_outer;
- __u16 vlan_inner;
- __u8 vlan_outer_offset;
- __u8 vlan_inner_offset;
-};
-
-char _license[] SEC("license") = "GPL";
-
-static __always_inline
-bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
-{
- __u16 eth_type;
- __u8 offset;
-
- offset = sizeof(*eth);
- /* Make sure packet is large enough for parsing eth + 2 VLAN headers */
- if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end)
- return false;
-
- eth_type = eth->h_proto;
-
- /* Handle outer VLAN tag */
- if (eth_type == bpf_htons(ETH_P_8021Q)
- || eth_type == bpf_htons(ETH_P_8021AD)) {
- struct _vlan_hdr *vlan_hdr;
-
- vlan_hdr = (void *)eth + offset;
- pkt->vlan_outer_offset = offset;
- pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI)
- & VLAN_VID_MASK;
- eth_type = vlan_hdr->h_vlan_encapsulated_proto;
- offset += sizeof(*vlan_hdr);
- }
-
- /* Handle inner (double) VLAN tag */
- if (eth_type == bpf_htons(ETH_P_8021Q)
- || eth_type == bpf_htons(ETH_P_8021AD)) {
- struct _vlan_hdr *vlan_hdr;
-
- vlan_hdr = (void *)eth + offset;
- pkt->vlan_inner_offset = offset;
- pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI)
- & VLAN_VID_MASK;
- eth_type = vlan_hdr->h_vlan_encapsulated_proto;
- offset += sizeof(*vlan_hdr);
- }
-
- pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */
- pkt->l3_offset = offset;
-
- return true;
-}
-
-/* Hint, VLANs are choosen to hit network-byte-order issues */
-#define TESTVLAN 4011 /* 0xFAB */
-// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */
-
-SEC("xdp_drop_vlan_4011")
-int xdp_prognum0(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct parse_pkt pkt = { 0 };
-
- if (!parse_eth_frame(data, data_end, &pkt))
- return XDP_ABORTED;
-
- /* Drop specific VLAN ID example */
- if (pkt.vlan_outer == TESTVLAN)
- return XDP_ABORTED;
- /*
- * Using XDP_ABORTED makes it possible to record this event,
- * via tracepoint xdp:xdp_exception like:
- * # perf record -a -e xdp:xdp_exception
- * # perf script
- */
- return XDP_PASS;
-}
-/*
-Commands to setup VLAN on Linux to test packets gets dropped:
-
- export ROOTDEV=ixgbe2
- export VLANID=4011
- ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID
- ip link set dev $ROOTDEV.$VLANID up
-
- ip link set dev $ROOTDEV mtu 1508
- ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID
-
-Load prog with ip tool:
-
- ip link set $ROOTDEV xdp off
- ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011
-
-*/
-
-/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
-#define TO_VLAN 0
-
-SEC("xdp_vlan_change")
-int xdp_prognum1(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct parse_pkt pkt = { 0 };
-
- if (!parse_eth_frame(data, data_end, &pkt))
- return XDP_ABORTED;
-
- /* Change specific VLAN ID */
- if (pkt.vlan_outer == TESTVLAN) {
- struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset;
-
- /* Modifying VLAN, preserve top 4 bits */
- vlan_hdr->h_vlan_TCI =
- bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
- | TO_VLAN);
- }
-
- return XDP_PASS;
-}
-
-/*
- * Show XDP+TC can cooperate, on creating a VLAN rewriter.
- * 1. Create a XDP prog that can "pop"/remove a VLAN header.
- * 2. Create a TC-bpf prog that egress can add a VLAN header.
- */
-
-#ifndef ETH_ALEN /* Ethernet MAC address length */
-#define ETH_ALEN 6 /* bytes */
-#endif
-#define VLAN_HDR_SZ 4 /* bytes */
-
-SEC("xdp_vlan_remove_outer")
-int xdp_prognum2(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct parse_pkt pkt = { 0 };
- char *dest;
-
- if (!parse_eth_frame(data, data_end, &pkt))
- return XDP_ABORTED;
-
- /* Skip packet if no outer VLAN was detected */
- if (pkt.vlan_outer_offset == 0)
- return XDP_PASS;
-
- /* Moving Ethernet header, dest overlap with src, memmove handle this */
- dest = data;
- dest+= VLAN_HDR_SZ;
- /*
- * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
- * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
- */
- __builtin_memmove(dest, data, ETH_ALEN * 2);
- /* Note: LLVM built-in memmove inlining require size to be constant */
-
- /* Move start of packet header seen by Linux kernel stack */
- bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
-
- return XDP_PASS;
-}
-
-static __always_inline
-void shift_mac_4bytes_16bit(void *data)
-{
- __u16 *p = data;
-
- p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
- p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
- p[5] = p[3];
- p[4] = p[2];
- p[3] = p[1];
- p[2] = p[0];
-}
-
-static __always_inline
-void shift_mac_4bytes_32bit(void *data)
-{
- __u32 *p = data;
-
- /* Assuming VLAN hdr present. The 4 bytes in p[3] that gets
- * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI.
- * The vlan_hdr->h_vlan_encapsulated_proto take over role as
- * ethhdr->h_proto.
- */
- p[3] = p[2];
- p[2] = p[1];
- p[1] = p[0];
-}
-
-SEC("xdp_vlan_remove_outer2")
-int xdp_prognum3(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ethhdr *orig_eth = data;
- struct parse_pkt pkt = { 0 };
-
- if (!parse_eth_frame(orig_eth, data_end, &pkt))
- return XDP_ABORTED;
-
- /* Skip packet if no outer VLAN was detected */
- if (pkt.vlan_outer_offset == 0)
- return XDP_PASS;
-
- /* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */
- shift_mac_4bytes_32bit(data);
-
- /* Move start of packet header seen by Linux kernel stack */
- bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
-
- return XDP_PASS;
-}
-
-/*=====================================
- * BELOW: TC-hook based ebpf programs
- * ====================================
- * The TC-clsact eBPF programs (currently) need to be attach via TC commands
- */
-
-SEC("tc_vlan_push")
-int _tc_progA(struct __sk_buff *ctx)
-{
- bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
-
- return TC_ACT_OK;
-}
-/*
-Commands to setup TC to use above bpf prog:
-
-export ROOTDEV=ixgbe2
-export FILE=xdp_vlan01_kern.o
-
-# Re-attach clsact to clear/flush existing role
-tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\
-tc qdisc add dev $ROOTDEV clsact
-
-# Attach BPF prog EGRESS
-tc filter add dev $ROOTDEV egress \
- prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
-
-tc filter show dev $ROOTDEV egress
-*/
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-#define KBUILD_MODNAME "xdp_dummy"
-#include <linux/bpf.h>
-#include "bpf_helpers.h"
-
-SEC("xdp_dummy")
-int xdp_dummy_prog(struct xdp_md *ctx)
-{
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";