From 8fe45924387be6b5c1be59a7eb330790c61d5d10 Mon Sep 17 00:00:00 2001 From: Teng Qin Date: Mon, 24 Apr 2017 19:00:37 -0700 Subject: [PATCH] bpf: map_get_next_key to return first key on NULL When iterating through a map, we need to find a key that does not exist in the map so map_get_next_key will give us the first key of the map. This often requires a lot of guessing in production systems. This patch makes map_get_next_key return the first key when the key pointer in the parameter is NULL. Signed-off-by: Teng Qin Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/trace/events/bpf.h | 10 ++++++--- kernel/bpf/arraymap.c | 2 +- kernel/bpf/hashtab.c | 9 ++++---- kernel/bpf/syscall.c | 20 ++++++++++------- tools/testing/selftests/bpf/test_maps.c | 29 +++++++++++++++++++++---- 5 files changed, 50 insertions(+), 20 deletions(-) diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h index c3a53fd47ff1..52c8425d144b 100644 --- a/include/trace/events/bpf.h +++ b/include/trace/events/bpf.h @@ -321,11 +321,14 @@ TRACE_EVENT(bpf_map_next_key, __dynamic_array(u8, key, map->key_size) __dynamic_array(u8, nxt, map->key_size) __field(bool, key_trunc) + __field(bool, key_null) __field(int, ufd) ), TP_fast_assign( - memcpy(__get_dynamic_array(key), key, map->key_size); + if (key) + memcpy(__get_dynamic_array(key), key, map->key_size); + __entry->key_null = !key; memcpy(__get_dynamic_array(nxt), key_next, map->key_size); __entry->type = map->map_type; __entry->key_len = min(map->key_size, 16U); @@ -336,8 +339,9 @@ TRACE_EVENT(bpf_map_next_key, TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]", __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), __entry->ufd, - __print_hex(__get_dynamic_array(key), __entry->key_len), - __entry->key_trunc ? " ..." : "", + __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key), + __entry->key_len), + __entry->key_trunc && !__entry->key_null ? " ..." : "", __print_hex(__get_dynamic_array(nxt), __entry->key_len), __entry->key_trunc ? " ..." : "") ); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index ec621df5a97a..5e00b2333c26 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -182,7 +182,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_array *array = container_of(map, struct bpf_array, map); - u32 index = *(u32 *)key; + u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = (u32 *)next_key; if (index >= array->map.max_entries) { diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index bc80c038e430..004334ea13ba 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -540,12 +540,15 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) struct hlist_nulls_head *head; struct htab_elem *l, *next_l; u32 hash, key_size; - int i; + int i = 0; WARN_ON_ONCE(!rcu_read_lock_held()); key_size = map->key_size; + if (!key) + goto find_first_elem; + hash = htab_map_hash(key, key_size); head = select_bucket(htab, hash); @@ -553,10 +556,8 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) /* lookup the key */ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); - if (!l) { - i = 0; + if (!l) goto find_first_elem; - } /* key was found, get next key in the same bucket */ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b89288e2b589..13642c73dca0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -536,14 +536,18 @@ static int map_get_next_key(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - err = -ENOMEM; - key = kmalloc(map->key_size, GFP_USER); - if (!key) - goto err_put; - - err = -EFAULT; - if (copy_from_user(key, ukey, map->key_size) != 0) - goto free_key; + if (ukey) { + err = -ENOMEM; + key = kmalloc(map->key_size, GFP_USER); + if (!key) + goto err_put; + + err = -EFAULT; + if (copy_from_user(key, ukey, map->key_size) != 0) + goto free_key; + } else { + key = NULL; + } err = -ENOMEM; next_key = kmalloc(map->key_size, GFP_USER); diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 20f1871874df..a977c4f7b0ce 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -28,7 +28,7 @@ static int map_flags; static void test_hashmap(int task, void *data) { - long long key, next_key, value; + long long key, next_key, first_key, value; int fd; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), @@ -89,10 +89,13 @@ static void test_hashmap(int task, void *data) assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 && + (first_key == 1 || first_key == 2)); assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 && - (next_key == 1 || next_key == 2)); + (next_key == first_key)); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && - (next_key == 1 || next_key == 2)); + (next_key == 1 || next_key == 2) && + (next_key != first_key)); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 && errno == ENOENT); @@ -105,6 +108,8 @@ static void test_hashmap(int task, void *data) key = 0; /* Check that map is empty. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 && + errno == ENOENT); assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 && errno == ENOENT); @@ -133,7 +138,7 @@ static void test_hashmap_percpu(int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); long long value[nr_cpus]; - long long key, next_key; + long long key, next_key, first_key; int expected_key_mask = 0; int fd, i; @@ -193,7 +198,13 @@ static void test_hashmap_percpu(int task, void *data) assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 && + ((expected_key_mask & first_key) == first_key)); while (!bpf_map_get_next_key(fd, &key, &next_key)) { + if (first_key) { + assert(next_key == first_key); + first_key = 0; + } assert((expected_key_mask & next_key) == next_key); expected_key_mask &= ~next_key; @@ -219,6 +230,8 @@ static void test_hashmap_percpu(int task, void *data) key = 0; /* Check that map is empty. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 && + errno == ENOENT); assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 && errno == ENOENT); @@ -264,6 +277,8 @@ static void test_arraymap(int task, void *data) assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 && + next_key == 0); assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 && next_key == 0); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && @@ -319,6 +334,8 @@ static void test_arraymap_percpu(int task, void *data) assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 && + next_key == 0); assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 && next_key == 0); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && @@ -400,6 +417,8 @@ static void test_map_large(void) errno == E2BIG); /* Iterate through all elements. */ + assert(bpf_map_get_next_key(fd, NULL, &key) == 0); + key.c = -1; for (i = 0; i < MAP_SIZE; i++) assert(bpf_map_get_next_key(fd, &key, &key) == 0); assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); @@ -499,6 +518,7 @@ static void test_map_parallel(void) errno == EEXIST); /* Check that all elements were inserted. */ + assert(bpf_map_get_next_key(fd, NULL, &key) == 0); key = -1; for (i = 0; i < MAP_SIZE; i++) assert(bpf_map_get_next_key(fd, &key, &key) == 0); @@ -518,6 +538,7 @@ static void test_map_parallel(void) /* Nothing should be left. */ key = -1; + assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT); assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); } -- 2.30.2