bpf: sk_msg program helper bpf_sk_msg_pull_data

author John Fastabend <john.fastabend@gmail.com>

Sun, 18 Mar 2018 19:57:25 +0000 (12:57 -0700)

committer Daniel Borkmann <daniel@iogearbox.net>

Mon, 19 Mar 2018 20:14:39 +0000 (21:14 +0100)
author John Fastabend <john.fastabend@gmail.com>
Sun, 18 Mar 2018 19:57:25 +0000 (12:57 -0700)
committer Daniel Borkmann <daniel@iogearbox.net>
Mon, 19 Mar 2018 20:14:39 +0000 (21:14 +0100)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 1765cfb16c998d9cbfa824dd3f108478a02d516b..18b7c510c511df9247a82e0bf40c199c14b229b4 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -793,7 +793,8 @@ union bpf_attr {
         FN(sock_ops_cb_flags_set),      \
         FN(msg_redirect_map),           \
         FN(msg_apply_bytes),            \
-       FN(msg_cork_bytes),
+       FN(msg_cork_bytes),             \
+       FN(msg_pull_data),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c

index 0c9daf6ee555449e8a3219ee5e4d87984ef1dd01..c86f03fd9ea5cd43d78297842ecbc75a6dd426cb 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1956,6 +1956,136 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
         .arg2_type      = ARG_ANYTHING,
  };
  
+BPF_CALL_4(bpf_msg_pull_data,
+          struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+{
+       unsigned int len = 0, offset = 0, copy = 0;
+       struct scatterlist *sg = msg->sg_data;
+       int first_sg, last_sg, i, shift;
+       unsigned char *p, *to, *from;
+       int bytes = end - start;
+       struct page *page;
+
+       if (unlikely(flags || end <= start))
+               return -EINVAL;
+
+       /* First find the starting scatterlist element */
+       i = msg->sg_start;
+       do {
+               len = sg[i].length;
+               offset += len;
+               if (start < offset + len)
+                       break;
+               i++;
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+       } while (i != msg->sg_end);
+
+       if (unlikely(start >= offset + len))
+               return -EINVAL;
+
+       if (!msg->sg_copy[i] && bytes <= len)
+               goto out;
+
+       first_sg = i;
+
+       /* At this point we need to linearize multiple scatterlist
+        * elements or a single shared page. Either way we need to
+        * copy into a linear buffer exclusively owned by BPF. Then
+        * place the buffer in the scatterlist and fixup the original
+        * entries by removing the entries now in the linear buffer
+        * and shifting the remaining entries. For now we do not try
+        * to copy partial entries to avoid complexity of running out
+        * of sg_entry slots. The downside is reading a single byte
+        * will copy the entire sg entry.
+        */
+       do {
+               copy += sg[i].length;
+               i++;
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+               if (bytes < copy)
+                       break;
+       } while (i != msg->sg_end);
+       last_sg = i;
+
+       if (unlikely(copy < end - start))
+               return -EINVAL;
+
+       page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
+       if (unlikely(!page))
+               return -ENOMEM;
+       p = page_address(page);
+       offset = 0;
+
+       i = first_sg;
+       do {
+               from = sg_virt(&sg[i]);
+               len = sg[i].length;
+               to = p + offset;
+
+               memcpy(to, from, len);
+               offset += len;
+               sg[i].length = 0;
+               put_page(sg_page(&sg[i]));
+
+               i++;
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+       } while (i != last_sg);
+
+       sg[first_sg].length = copy;
+       sg_set_page(&sg[first_sg], page, copy, 0);
+
+       /* To repair sg ring we need to shift entries. If we only
+        * had a single entry though we can just replace it and
+        * be done. Otherwise walk the ring and shift the entries.
+        */
+       shift = last_sg - first_sg - 1;
+       if (!shift)
+               goto out;
+
+       i = first_sg + 1;
+       do {
+               int move_from;
+
+               if (i + shift >= MAX_SKB_FRAGS)
+                       move_from = i + shift - MAX_SKB_FRAGS;
+               else
+                       move_from = i + shift;
+
+               if (move_from == msg->sg_end)
+                       break;
+
+               sg[i] = sg[move_from];
+               sg[move_from].length = 0;
+               sg[move_from].page_link = 0;
+               sg[move_from].offset = 0;
+
+               i++;
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+       } while (1);
+       msg->sg_end -= shift;
+       if (msg->sg_end < 0)
+               msg->sg_end += MAX_SKB_FRAGS;
+out:
+       msg->data = sg_virt(&sg[i]) + start - offset;
+       msg->data_end = msg->data + bytes;
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pull_data_proto = {
+       .func           = bpf_msg_pull_data,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_ANYTHING,
+};
+
  BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
  {
         return task_get_classid(skb);
@@ -2897,7 +3027,8 @@ bool bpf_helper_changes_pkt_data(void *func)
             func == bpf_l3_csum_replace ||
             func == bpf_l4_csum_replace ||
             func == bpf_xdp_adjust_head ||
-           func == bpf_xdp_adjust_meta)
+           func == bpf_xdp_adjust_meta ||
+           func == bpf_msg_pull_data)
                 return true;
  
         return false;
@@ -3666,6 +3797,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
                 return &bpf_msg_apply_bytes_proto;
         case BPF_FUNC_msg_cork_bytes:
                 return &bpf_msg_cork_bytes_proto;
+       case BPF_FUNC_msg_pull_data:
+               return &bpf_msg_pull_data_proto;
         default:
                 return bpf_base_func_proto(func_id);
         }
author	John Fastabend <john.fastabend@gmail.com>
	Sun, 18 Mar 2018 19:57:25 +0000 (12:57 -0700)
committer	Daniel Borkmann <daniel@iogearbox.net>
	Mon, 19 Mar 2018 20:14:39 +0000 (21:14 +0100)
include/uapi/linux/bpf.h		patch \| blob \| history
net/core/filter.c		patch \| blob \| history