bpf: sockmap, support for msg_peek in sk_msg with redirect ingress
authorJohn Fastabend <john.fastabend@gmail.com>
Tue, 16 Oct 2018 18:08:04 +0000 (11:08 -0700)
committerDaniel Borkmann <daniel@iogearbox.net>
Wed, 17 Oct 2018 00:30:32 +0000 (02:30 +0200)
This adds support for the MSG_PEEK flag when doing redirect to ingress
and receiving on the sk_msg psock queue. Previously the flag was
being ignored which could confuse applications if they expected the
flag to work as normal.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
include/net/tcp.h
net/ipv4/tcp_bpf.c
net/tls/tls_sw.c

index 3600ae0f25c3410450be21b7b7b67a5d355551ed..14fdd7ce9992b97b602dfa4965cdec9fc27beb9a 100644 (file)
@@ -2089,7 +2089,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
 int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                    int nonblock, int flags, int *addr_len);
 int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
-                     struct msghdr *msg, int len);
+                     struct msghdr *msg, int len, int flags);
 
 /* Call BPF_SOCK_OPS program that returns an int. If the return value
  * is < 0, then the BPF op failed (for example if the loaded BPF
index f9d3cf18582782d72e28bd0324a317cee96b05f6..b7918d4caa300a15bec2858065b8f73d71cf6eb0 100644 (file)
@@ -39,17 +39,19 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
 }
 
 int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
-                     struct msghdr *msg, int len)
+                     struct msghdr *msg, int len, int flags)
 {
        struct iov_iter *iter = &msg->msg_iter;
+       int peek = flags & MSG_PEEK;
        int i, ret, copied = 0;
+       struct sk_msg *msg_rx;
+
+       msg_rx = list_first_entry_or_null(&psock->ingress_msg,
+                                         struct sk_msg, list);
 
        while (copied != len) {
                struct scatterlist *sge;
-               struct sk_msg *msg_rx;
 
-               msg_rx = list_first_entry_or_null(&psock->ingress_msg,
-                                                 struct sk_msg, list);
                if (unlikely(!msg_rx))
                        break;
 
@@ -70,22 +72,30 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
                        }
 
                        copied += copy;
-                       sge->offset += copy;
-                       sge->length -= copy;
-                       sk_mem_uncharge(sk, copy);
-                       msg_rx->sg.size -= copy;
-                       if (!sge->length) {
-                               i++;
-                               if (i == MAX_SKB_FRAGS)
-                                       i = 0;
-                               if (!msg_rx->skb)
-                                       put_page(page);
+                       if (likely(!peek)) {
+                               sge->offset += copy;
+                               sge->length -= copy;
+                               sk_mem_uncharge(sk, copy);
+                               msg_rx->sg.size -= copy;
+
+                               if (!sge->length) {
+                                       sk_msg_iter_var_next(i);
+                                       if (!msg_rx->skb)
+                                               put_page(page);
+                               }
+                       } else {
+                               sk_msg_iter_var_next(i);
                        }
 
                        if (copied == len)
                                break;
                } while (i != msg_rx->sg.end);
 
+               if (unlikely(peek)) {
+                       msg_rx = list_next_entry(msg_rx, list);
+                       continue;
+               }
+
                msg_rx->sg.start = i;
                if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
                        list_del(&msg_rx->list);
@@ -93,6 +103,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
                                consume_skb(msg_rx->skb);
                        kfree(msg_rx);
                }
+               msg_rx = list_first_entry_or_null(&psock->ingress_msg,
+                                                 struct sk_msg, list);
        }
 
        return copied;
@@ -115,7 +127,7 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
        lock_sock(sk);
 msg_bytes_ready:
-       copied = __tcp_bpf_recvmsg(sk, psock, msg, len);
+       copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
        if (!copied) {
                int data, err = 0;
                long timeo;
index a525fc4c2a4b9a12496ff4ef86fdf82e5137ebc5..5cd88ba8acd175bc013cc95917120d8eb6899b99 100644 (file)
@@ -1478,7 +1478,8 @@ int tls_sw_recvmsg(struct sock *sk,
                skb = tls_wait_data(sk, psock, flags, timeo, &err);
                if (!skb) {
                        if (psock) {
-                               int ret = __tcp_bpf_recvmsg(sk, psock, msg, len);
+                               int ret = __tcp_bpf_recvmsg(sk, psock,
+                                                           msg, len, flags);
 
                                if (ret > 0) {
                                        copied += ret;