bpf: enable program stats

author Alexei Starovoitov <ast@kernel.org>

Mon, 25 Feb 2019 22:28:39 +0000 (14:28 -0800)

committer Daniel Borkmann <daniel@iogearbox.net>

Wed, 27 Feb 2019 16:22:50 +0000 (17:22 +0100)
author Alexei Starovoitov <ast@kernel.org>
Mon, 25 Feb 2019 22:28:39 +0000 (14:28 -0800)
committer Daniel Borkmann <daniel@iogearbox.net>
Wed, 27 Feb 2019 16:22:50 +0000 (17:22 +0100)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index de18227b3d95941c800bfbaaffb68b976d2e9b62..a2132e09dc1c422731e9533a2a2efa2f5b0132dd 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -16,6 +16,7 @@
  #include <linux/rbtree_latch.h>
  #include <linux/numa.h>
  #include <linux/wait.h>
+#include <linux/u64_stats_sync.h>
  
  struct bpf_verifier_env;
  struct perf_event;
@@ -340,6 +341,12 @@ enum bpf_cgroup_storage_type {
  
  #define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
  
+struct bpf_prog_stats {
+       u64 cnt;
+       u64 nsecs;
+       struct u64_stats_sync syncp;
+};
+
  struct bpf_prog_aux {
         atomic_t refcnt;
         u32 used_map_cnt;
@@ -389,6 +396,7 @@ struct bpf_prog_aux {
          * main prog always has linfo_idx == 0
          */
         u32 linfo_idx;
+       struct bpf_prog_stats __percpu *stats;
         union {
                 struct work_struct work;
                 struct rcu_head rcu;
@@ -559,6 +567,7 @@ void bpf_map_area_free(void *base);
  void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
  
  extern int sysctl_unprivileged_bpf_disabled;
+extern int sysctl_bpf_stats_enabled;
  
  int bpf_map_new_fd(struct bpf_map *map, int flags);
  int bpf_prog_new_fd(struct bpf_prog *prog);
diff --git a/include/linux/filter.h b/include/linux/filter.h

index f32b3eca5a0465e0dd4fbc1f0cd4728891d0d312..7e5e3db11106f719b8736ff5a81ab1da71a23993 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -533,7 +533,24 @@ struct sk_filter {
         struct bpf_prog *prog;
  };
  
-#define BPF_PROG_RUN(filter, ctx)  ({ cant_sleep(); (*(filter)->bpf_func)(ctx, (filter)->insnsi); })
+DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
+
+#define BPF_PROG_RUN(prog, ctx)        ({                              \
+       u32 ret;                                                \
+       cant_sleep();                                           \
+       if (static_branch_unlikely(&bpf_stats_enabled_key)) {   \
+               struct bpf_prog_stats *stats;                   \
+               u64 start = sched_clock();                      \
+               ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
+               stats = this_cpu_ptr(prog->aux->stats);         \
+               u64_stats_update_begin(&stats->syncp);          \
+               stats->cnt++;                                   \
+               stats->nsecs += sched_clock() - start;          \
+               u64_stats_update_end(&stats->syncp);            \
+       } else {                                                \
+               ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
+       }                                                       \
+       ret; })
  
  #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
  
@@ -764,6 +781,7 @@ void bpf_prog_free_jited_linfo(struct bpf_prog *prog);
  void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog);
  
  struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
+struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags);
  struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
                                   gfp_t gfp_extra_flags);
  void __bpf_prog_free(struct bpf_prog *fp);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index ef88b167959d64fe2bb8da87a3300e65825e21f2..1c14c347f3cfe1f7c0cf8a7eccff8135b16df81f 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -78,7 +78,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
         return NULL;
  }
  
-struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
  {
         gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
         struct bpf_prog_aux *aux;
@@ -104,6 +104,26 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
  
         return fp;
  }
+
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+{
+       gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+       struct bpf_prog *prog;
+
+       prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
+       if (!prog)
+               return NULL;
+
+       prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
+       if (!prog->aux->stats) {
+               kfree(prog->aux);
+               vfree(prog);
+               return NULL;
+       }
+
+       u64_stats_init(&prog->aux->stats->syncp);
+       return prog;
+}
  EXPORT_SYMBOL_GPL(bpf_prog_alloc);
  
  int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
@@ -231,7 +251,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
  
  void __bpf_prog_free(struct bpf_prog *fp)
  {
-       kfree(fp->aux);
+       if (fp->aux) {
+               free_percpu(fp->aux->stats);
+               kfree(fp->aux);
+       }
         vfree(fp);
  }
  
@@ -2069,6 +2092,10 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
         return -EFAULT;
  }
  
+DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
+EXPORT_SYMBOL(bpf_stats_enabled_key);
+int sysctl_bpf_stats_enabled __read_mostly;
+
  /* All definitions of tracepoints related to BPF. */
  #define CREATE_TRACE_POINTS
  #include <linux/bpf_trace.h>
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index ec7c552af76bccd24082a10401f43bd5d83146cf..31cf66fc3f5cfab93ea26265da363f6936ba6cc4 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1283,24 +1283,54 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
         return 0;
  }
  
+static void bpf_prog_get_stats(const struct bpf_prog *prog,
+                              struct bpf_prog_stats *stats)
+{
+       u64 nsecs = 0, cnt = 0;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               const struct bpf_prog_stats *st;
+               unsigned int start;
+               u64 tnsecs, tcnt;
+
+               st = per_cpu_ptr(prog->aux->stats, cpu);
+               do {
+                       start = u64_stats_fetch_begin_irq(&st->syncp);
+                       tnsecs = st->nsecs;
+                       tcnt = st->cnt;
+               } while (u64_stats_fetch_retry_irq(&st->syncp, start));
+               nsecs += tnsecs;
+               cnt += tcnt;
+       }
+       stats->nsecs = nsecs;
+       stats->cnt = cnt;
+}
+
  #ifdef CONFIG_PROC_FS
  static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
  {
         const struct bpf_prog *prog = filp->private_data;
         char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
+       struct bpf_prog_stats stats;
  
+       bpf_prog_get_stats(prog, &stats);
         bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
         seq_printf(m,
                    "prog_type:\t%u\n"
                    "prog_jited:\t%u\n"
                    "prog_tag:\t%s\n"
                    "memlock:\t%llu\n"
-                  "prog_id:\t%u\n",
+                  "prog_id:\t%u\n"
+                  "run_time_ns:\t%llu\n"
+                  "run_cnt:\t%llu\n",
                    prog->type,
                    prog->jited,
                    prog_tag,
                    prog->pages * 1ULL << PAGE_SHIFT,
-                  prog->aux->id);
+                  prog->aux->id,
+                  stats.nsecs,
+                  stats.cnt);
  }
  #endif
  
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 1b9496c413833ad1f88c98eeee387f349cf241e0..0e4edd7e3c5f1888f393910029d6c5fa0bfc8915 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7320,7 +7320,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                 subprog_end = env->subprog_info[i + 1].start;
  
                 len = subprog_end - subprog_start;
-               func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
+               /* BPF_PROG_RUN doesn't call subprogs directly,
+                * hence main prog stats include the runtime of subprogs.
+                * subprogs don't have IDs and not reachable via prog_get_next_id
+                * func[i]->aux->stats will never be accessed and stays NULL
+                */
+               func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
                 if (!func[i])
                         goto out_free;
                 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index ba4d9e85feb8ca69fb533049ab8a3a4a556bec9d..86e0771352f2452d68b87ef27ac77a7846cf152d 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -224,6 +224,9 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
  #endif
  static int proc_dopipe_max_size(struct ctl_table *table, int write,
                 void __user *buffer, size_t *lenp, loff_t *ppos);
+static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
+                                         void __user *buffer, size_t *lenp,
+                                         loff_t *ppos);
  
  #ifdef CONFIG_MAGIC_SYSRQ
  /* Note: sysrq code uses its own private copy */
@@ -1230,6 +1233,15 @@ static struct ctl_table kern_table[] = {
                 .extra2         = &one,
         },
  #endif
+       {
+               .procname       = "bpf_stats_enabled",
+               .data           = &sysctl_bpf_stats_enabled,
+               .maxlen         = sizeof(sysctl_bpf_stats_enabled),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax_bpf_stats,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
  #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
         {
                 .procname       = "panic_on_rcu_stall",
@@ -3260,6 +3272,28 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
  
  #endif /* CONFIG_PROC_SYSCTL */
  
+static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
+                                         void __user *buffer, size_t *lenp,
+                                         loff_t *ppos)
+{
+       int ret, bpf_stats = *(int *)table->data;
+       struct ctl_table tmp = *table;
+
+       if (write && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       tmp.data = &bpf_stats;
+       ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+       if (write && !ret) {
+               *(int *)table->data = bpf_stats;
+               if (bpf_stats)
+                       static_branch_enable(&bpf_stats_enabled_key);
+               else
+                       static_branch_disable(&bpf_stats_enabled_key);
+       }
+       return ret;
+}
+
  /*
   * No sense putting this after each symbol definition, twice,
   * exception granted :-)
author	Alexei Starovoitov <ast@kernel.org>
	Mon, 25 Feb 2019 22:28:39 +0000 (14:28 -0800)
committer	Daniel Borkmann <daniel@iogearbox.net>
	Wed, 27 Feb 2019 16:22:50 +0000 (17:22 +0100)
include/linux/bpf.h		patch \| blob \| history
include/linux/filter.h		patch \| blob \| history
kernel/bpf/core.c		patch \| blob \| history
kernel/bpf/syscall.c		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history