bpf: extend cgroup bpf core to allow multiple cgroup storage types
authorRoman Gushchin <guro@fb.com>
Fri, 28 Sep 2018 14:45:36 +0000 (14:45 +0000)
committerDaniel Borkmann <daniel@iogearbox.net>
Mon, 1 Oct 2018 14:18:32 +0000 (16:18 +0200)
In order to introduce per-cpu cgroup storage, let's generalize
bpf cgroup core to support multiple cgroup storage types.
Potentially, per-node cgroup storage can be added later.

This commit is mostly a formal change that replaces
cgroup_storage pointer with a array of cgroup_storage pointers.
It doesn't actually introduce a new storage type,
it will be done later.

Each bpf program is now able to have one cgroup storage of each type.

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
include/linux/bpf-cgroup.h
include/linux/bpf.h
kernel/bpf/cgroup.c
kernel/bpf/helpers.c
kernel/bpf/local_storage.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
net/bpf/test_run.c

index f91b0f8ff3a927d3d8a9c950447278dea9adea28..e9871b012dac8011285362b3b18e4f7a2b2066c4 100644 (file)
@@ -2,6 +2,7 @@
 #ifndef _BPF_CGROUP_H
 #define _BPF_CGROUP_H
 
+#include <linux/bpf.h>
 #include <linux/errno.h>
 #include <linux/jump_label.h>
 #include <linux/percpu.h>
@@ -22,7 +23,10 @@ struct bpf_cgroup_storage;
 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
 
-DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+
+#define for_each_cgroup_storage_type(stype) \
+       for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
 
 struct bpf_cgroup_storage_map;
 
@@ -43,7 +47,7 @@ struct bpf_cgroup_storage {
 struct bpf_prog_list {
        struct list_head node;
        struct bpf_prog *prog;
-       struct bpf_cgroup_storage *storage;
+       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 };
 
 struct bpf_prog_array;
@@ -101,18 +105,29 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
                                      short access, enum bpf_attach_type type);
 
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
+static inline enum bpf_cgroup_storage_type cgroup_storage_type(
+       struct bpf_map *map)
 {
+       return BPF_CGROUP_STORAGE_SHARED;
+}
+
+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
+                                         *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
+{
+       enum bpf_cgroup_storage_type stype;
        struct bpf_storage_buffer *buf;
 
-       if (!storage)
-               return;
+       for_each_cgroup_storage_type(stype) {
+               if (!storage[stype])
+                       continue;
 
-       buf = READ_ONCE(storage->buf);
-       this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
+               buf = READ_ONCE(storage[stype]->buf);
+               this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]);
+       }
 }
 
-struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
+                                       enum bpf_cgroup_storage_type stype);
 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
                             struct cgroup *cgroup,
@@ -265,13 +280,14 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
        return -EINVAL;
 }
 
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
+static inline void bpf_cgroup_storage_set(
+       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
 static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
                                            struct bpf_map *map) { return 0; }
 static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
                                              struct bpf_map *map) {}
 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
-       struct bpf_prog *prog) { return 0; }
+       struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
 static inline void bpf_cgroup_storage_free(
        struct bpf_cgroup_storage *storage) {}
 
@@ -293,6 +309,8 @@ static inline void bpf_cgroup_storage_free(
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
 
+#define for_each_cgroup_storage_type(stype) for (; false; )
+
 #endif /* CONFIG_CGROUP_BPF */
 
 #endif /* _BPF_CGROUP_H */
index 988a00797bcd7dfed289fdf1975d5573559cfb18..b457fbe7b70bc910bf8144bb2aa209c9da26e3bc 100644 (file)
@@ -272,6 +272,13 @@ struct bpf_prog_offload {
        u32                     jited_len;
 };
 
+enum bpf_cgroup_storage_type {
+       BPF_CGROUP_STORAGE_SHARED,
+       __BPF_CGROUP_STORAGE_MAX
+};
+
+#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
+
 struct bpf_prog_aux {
        atomic_t refcnt;
        u32 used_map_cnt;
@@ -289,7 +296,7 @@ struct bpf_prog_aux {
        struct bpf_prog *prog;
        struct user_struct *user;
        u64 load_time; /* ns since boottime */
-       struct bpf_map *cgroup_storage;
+       struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
        char name[BPF_OBJ_NAME_LEN];
 #ifdef CONFIG_SECURITY
        void *security;
@@ -358,7 +365,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
  */
 struct bpf_prog_array_item {
        struct bpf_prog *prog;
-       struct bpf_cgroup_storage *cgroup_storage;
+       struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 };
 
 struct bpf_prog_array {
index 549f6fbcc4612c58ce773c53d183fea5a8219883..00f6ed2e4f9a82403cd3c6b6bdc80cbb43a9da0b 100644 (file)
@@ -25,6 +25,7 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
  */
 void cgroup_bpf_put(struct cgroup *cgrp)
 {
+       enum bpf_cgroup_storage_type stype;
        unsigned int type;
 
        for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -34,8 +35,10 @@ void cgroup_bpf_put(struct cgroup *cgrp)
                list_for_each_entry_safe(pl, tmp, progs, node) {
                        list_del(&pl->node);
                        bpf_prog_put(pl->prog);
-                       bpf_cgroup_storage_unlink(pl->storage);
-                       bpf_cgroup_storage_free(pl->storage);
+                       for_each_cgroup_storage_type(stype) {
+                               bpf_cgroup_storage_unlink(pl->storage[stype]);
+                               bpf_cgroup_storage_free(pl->storage[stype]);
+                       }
                        kfree(pl);
                        static_branch_dec(&cgroup_bpf_enabled_key);
                }
@@ -97,6 +100,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
                                   enum bpf_attach_type type,
                                   struct bpf_prog_array __rcu **array)
 {
+       enum bpf_cgroup_storage_type stype;
        struct bpf_prog_array *progs;
        struct bpf_prog_list *pl;
        struct cgroup *p = cgrp;
@@ -125,7 +129,9 @@ static int compute_effective_progs(struct cgroup *cgrp,
                                continue;
 
                        progs->items[cnt].prog = pl->prog;
-                       progs->items[cnt].cgroup_storage = pl->storage;
+                       for_each_cgroup_storage_type(stype)
+                               progs->items[cnt].cgroup_storage[stype] =
+                                       pl->storage[stype];
                        cnt++;
                }
        } while ((p = cgroup_parent(p)));
@@ -232,7 +238,9 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 {
        struct list_head *progs = &cgrp->bpf.progs[type];
        struct bpf_prog *old_prog = NULL;
-       struct bpf_cgroup_storage *storage, *old_storage = NULL;
+       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
+               *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
+       enum bpf_cgroup_storage_type stype;
        struct bpf_prog_list *pl;
        bool pl_was_allocated;
        int err;
@@ -254,34 +262,44 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
        if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
                return -E2BIG;
 
-       storage = bpf_cgroup_storage_alloc(prog);
-       if (IS_ERR(storage))
-               return -ENOMEM;
+       for_each_cgroup_storage_type(stype) {
+               storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+               if (IS_ERR(storage[stype])) {
+                       storage[stype] = NULL;
+                       for_each_cgroup_storage_type(stype)
+                               bpf_cgroup_storage_free(storage[stype]);
+                       return -ENOMEM;
+               }
+       }
 
        if (flags & BPF_F_ALLOW_MULTI) {
                list_for_each_entry(pl, progs, node) {
                        if (pl->prog == prog) {
                                /* disallow attaching the same prog twice */
-                               bpf_cgroup_storage_free(storage);
+                               for_each_cgroup_storage_type(stype)
+                                       bpf_cgroup_storage_free(storage[stype]);
                                return -EINVAL;
                        }
                }
 
                pl = kmalloc(sizeof(*pl), GFP_KERNEL);
                if (!pl) {
-                       bpf_cgroup_storage_free(storage);
+                       for_each_cgroup_storage_type(stype)
+                               bpf_cgroup_storage_free(storage[stype]);
                        return -ENOMEM;
                }
 
                pl_was_allocated = true;
                pl->prog = prog;
-               pl->storage = storage;
+               for_each_cgroup_storage_type(stype)
+                       pl->storage[stype] = storage[stype];
                list_add_tail(&pl->node, progs);
        } else {
                if (list_empty(progs)) {
                        pl = kmalloc(sizeof(*pl), GFP_KERNEL);
                        if (!pl) {
-                               bpf_cgroup_storage_free(storage);
+                               for_each_cgroup_storage_type(stype)
+                                       bpf_cgroup_storage_free(storage[stype]);
                                return -ENOMEM;
                        }
                        pl_was_allocated = true;
@@ -289,12 +307,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
                } else {
                        pl = list_first_entry(progs, typeof(*pl), node);
                        old_prog = pl->prog;
-                       old_storage = pl->storage;
-                       bpf_cgroup_storage_unlink(old_storage);
+                       for_each_cgroup_storage_type(stype) {
+                               old_storage[stype] = pl->storage[stype];
+                               bpf_cgroup_storage_unlink(old_storage[stype]);
+                       }
                        pl_was_allocated = false;
                }
                pl->prog = prog;
-               pl->storage = storage;
+               for_each_cgroup_storage_type(stype)
+                       pl->storage[stype] = storage[stype];
        }
 
        cgrp->bpf.flags[type] = flags;
@@ -304,21 +325,27 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
                goto cleanup;
 
        static_branch_inc(&cgroup_bpf_enabled_key);
-       if (old_storage)
-               bpf_cgroup_storage_free(old_storage);
+       for_each_cgroup_storage_type(stype) {
+               if (!old_storage[stype])
+                       continue;
+               bpf_cgroup_storage_free(old_storage[stype]);
+       }
        if (old_prog) {
                bpf_prog_put(old_prog);
                static_branch_dec(&cgroup_bpf_enabled_key);
        }
-       bpf_cgroup_storage_link(storage, cgrp, type);
+       for_each_cgroup_storage_type(stype)
+               bpf_cgroup_storage_link(storage[stype], cgrp, type);
        return 0;
 
 cleanup:
        /* and cleanup the prog list */
        pl->prog = old_prog;
-       bpf_cgroup_storage_free(pl->storage);
-       pl->storage = old_storage;
-       bpf_cgroup_storage_link(old_storage, cgrp, type);
+       for_each_cgroup_storage_type(stype) {
+               bpf_cgroup_storage_free(pl->storage[stype]);
+               pl->storage[stype] = old_storage[stype];
+               bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
+       }
        if (pl_was_allocated) {
                list_del(&pl->node);
                kfree(pl);
@@ -339,6 +366,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
                        enum bpf_attach_type type, u32 unused_flags)
 {
        struct list_head *progs = &cgrp->bpf.progs[type];
+       enum bpf_cgroup_storage_type stype;
        u32 flags = cgrp->bpf.flags[type];
        struct bpf_prog *old_prog = NULL;
        struct bpf_prog_list *pl;
@@ -385,8 +413,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 
        /* now can actually delete it from this cgroup list */
        list_del(&pl->node);
-       bpf_cgroup_storage_unlink(pl->storage);
-       bpf_cgroup_storage_free(pl->storage);
+       for_each_cgroup_storage_type(stype) {
+               bpf_cgroup_storage_unlink(pl->storage[stype]);
+               bpf_cgroup_storage_free(pl->storage[stype]);
+       }
        kfree(pl);
        if (list_empty(progs))
                /* last program was detached, reset flags to zero */
index 1991466b8327cf51b32c89ed3c10de4a23083b11..9070b2ace6aaf7fe80e91398681fbf1964982ecb 100644 (file)
@@ -194,16 +194,18 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
        .ret_type       = RET_INTEGER,
 };
 
-DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+#ifdef CONFIG_CGROUP_BPF
+DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 
 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 {
-       /* map and flags arguments are not used now,
-        * but provide an ability to extend the API
-        * for other types of local storages.
-        * verifier checks that their values are correct.
+       /* flags argument is not used now,
+        * but provides an ability to extend the API.
+        * verifier checks that its value is correct.
         */
-       return (unsigned long) this_cpu_read(bpf_cgroup_storage);
+       enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
+
+       return (unsigned long) this_cpu_read(bpf_cgroup_storage[stype]);
 }
 
 const struct bpf_func_proto bpf_get_local_storage_proto = {
@@ -214,3 +216,4 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 #endif
+#endif
index 22ad967d1e5f14b9bafd41a99117b97700e041f6..0bd9f19fc55754be6ff339df25a6beeede8f718d 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/rbtree.h>
 #include <linux/slab.h>
 
-DEFINE_PER_CPU(void*, bpf_cgroup_storage);
+DEFINE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 
 #ifdef CONFIG_CGROUP_BPF
 
@@ -251,6 +251,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
 
 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
 {
+       enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
        struct bpf_cgroup_storage_map *map = map_to_storage(_map);
        int ret = -EBUSY;
 
@@ -258,11 +259,12 @@ int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
 
        if (map->prog && map->prog != prog)
                goto unlock;
-       if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
+       if (prog->aux->cgroup_storage[stype] &&
+           prog->aux->cgroup_storage[stype] != _map)
                goto unlock;
 
        map->prog = prog;
-       prog->aux->cgroup_storage = _map;
+       prog->aux->cgroup_storage[stype] = _map;
        ret = 0;
 unlock:
        spin_unlock_bh(&map->lock);
@@ -272,24 +274,26 @@ unlock:
 
 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
 {
+       enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
        struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 
        spin_lock_bh(&map->lock);
        if (map->prog == prog) {
-               WARN_ON(prog->aux->cgroup_storage != _map);
+               WARN_ON(prog->aux->cgroup_storage[stype] != _map);
                map->prog = NULL;
-               prog->aux->cgroup_storage = NULL;
+               prog->aux->cgroup_storage[stype] = NULL;
        }
        spin_unlock_bh(&map->lock);
 }
 
-struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
+                                       enum bpf_cgroup_storage_type stype)
 {
        struct bpf_cgroup_storage *storage;
        struct bpf_map *map;
        u32 pages;
 
-       map = prog->aux->cgroup_storage;
+       map = prog->aux->cgroup_storage[stype];
        if (!map)
                return NULL;
 
index b3c2d09bcf7a8e0ce162806e858cdb8fea336154..8c91d2b41b1e67526faecbefab4d9687e60a3015 100644 (file)
@@ -988,10 +988,15 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 /* drop refcnt on maps used by eBPF program and free auxilary data */
 static void free_used_maps(struct bpf_prog_aux *aux)
 {
+       enum bpf_cgroup_storage_type stype;
        int i;
 
-       if (aux->cgroup_storage)
-               bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
+       for_each_cgroup_storage_type(stype) {
+               if (!aux->cgroup_storage[stype])
+                       continue;
+               bpf_cgroup_storage_release(aux->prog,
+                                          aux->cgroup_storage[stype]);
+       }
 
        for (i = 0; i < aux->used_map_cnt; i++)
                bpf_map_put(aux->used_maps[i]);
index e986518d7bc37b91e15b43c38cb493ba03a754e6..e90899df585d2065a1e4f8e839ffbd5988c02cbb 100644 (file)
@@ -5171,11 +5171,15 @@ next_insn:
 /* drop refcnt of maps used by the rejected program */
 static void release_maps(struct bpf_verifier_env *env)
 {
+       enum bpf_cgroup_storage_type stype;
        int i;
 
-       if (env->prog->aux->cgroup_storage)
+       for_each_cgroup_storage_type(stype) {
+               if (!env->prog->aux->cgroup_storage[stype])
+                       continue;
                bpf_cgroup_storage_release(env->prog,
-                                          env->prog->aux->cgroup_storage);
+                       env->prog->aux->cgroup_storage[stype]);
+       }
 
        for (i = 0; i < env->used_map_cnt; i++)
                bpf_map_put(env->used_maps[i]);
index f4078830ea505ee955d4bc83010bd2fec59583fe..0c423b8cd75cce9ada273d545216112c829964e0 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/sched/signal.h>
 
 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
-                                           struct bpf_cgroup_storage *storage)
+               struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
 {
        u32 ret;
 
@@ -28,13 +28,20 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
 
 static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
 {
-       struct bpf_cgroup_storage *storage = NULL;
+       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
+       enum bpf_cgroup_storage_type stype;
        u64 time_start, time_spent = 0;
        u32 ret = 0, i;
 
-       storage = bpf_cgroup_storage_alloc(prog);
-       if (IS_ERR(storage))
-               return PTR_ERR(storage);
+       for_each_cgroup_storage_type(stype) {
+               storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+               if (IS_ERR(storage[stype])) {
+                       storage[stype] = NULL;
+                       for_each_cgroup_storage_type(stype)
+                               bpf_cgroup_storage_free(storage[stype]);
+                       return -ENOMEM;
+               }
+       }
 
        if (!repeat)
                repeat = 1;
@@ -53,7 +60,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
        do_div(time_spent, repeat);
        *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
 
-       bpf_cgroup_storage_free(storage);
+       for_each_cgroup_storage_type(stype)
+               bpf_cgroup_storage_free(storage[stype]);
 
        return ret;
 }