#include <linux/rbtree_latch.h>
#include <linux/numa.h>
#include <linux/wait.h>
+#include <linux/u64_stats_sync.h>
struct bpf_verifier_env;
struct perf_event;
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
+struct bpf_prog_stats {
+ u64 cnt;
+ u64 nsecs;
+ struct u64_stats_sync syncp;
+};
+
struct bpf_prog_aux {
atomic_t refcnt;
u32 used_map_cnt;
* main prog always has linfo_idx == 0
*/
u32 linfo_idx;
+ struct bpf_prog_stats __percpu *stats;
union {
struct work_struct work;
struct rcu_head rcu;
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
extern int sysctl_unprivileged_bpf_disabled;
+extern int sysctl_bpf_stats_enabled;
int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);
struct bpf_prog *prog;
};
-#define BPF_PROG_RUN(filter, ctx) ({ cant_sleep(); (*(filter)->bpf_func)(ctx, (filter)->insnsi); })
+DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
+
+#define BPF_PROG_RUN(prog, ctx) ({ \
+ u32 ret; \
+ cant_sleep(); \
+ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
+ struct bpf_prog_stats *stats; \
+ u64 start = sched_clock(); \
+ ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
+ stats = this_cpu_ptr(prog->aux->stats); \
+ u64_stats_update_begin(&stats->syncp); \
+ stats->cnt++; \
+ stats->nsecs += sched_clock() - start; \
+ u64_stats_update_end(&stats->syncp); \
+ } else { \
+ ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
+ } \
+ ret; })
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog);
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
+struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags);
void __bpf_prog_free(struct bpf_prog *fp);
return NULL;
}
-struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
{
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog_aux *aux;
return fp;
}
+
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+{
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+ struct bpf_prog *prog;
+
+ prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
+ if (!prog)
+ return NULL;
+
+ prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
+ if (!prog->aux->stats) {
+ kfree(prog->aux);
+ vfree(prog);
+ return NULL;
+ }
+
+ u64_stats_init(&prog->aux->stats->syncp);
+ return prog;
+}
EXPORT_SYMBOL_GPL(bpf_prog_alloc);
int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
void __bpf_prog_free(struct bpf_prog *fp)
{
- kfree(fp->aux);
+ if (fp->aux) {
+ free_percpu(fp->aux->stats);
+ kfree(fp->aux);
+ }
vfree(fp);
}
return -EFAULT;
}
+DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
+EXPORT_SYMBOL(bpf_stats_enabled_key);
+int sysctl_bpf_stats_enabled __read_mostly;
+
/* All definitions of tracepoints related to BPF. */
#define CREATE_TRACE_POINTS
#include <linux/bpf_trace.h>
return 0;
}
+static void bpf_prog_get_stats(const struct bpf_prog *prog,
+ struct bpf_prog_stats *stats)
+{
+ u64 nsecs = 0, cnt = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ const struct bpf_prog_stats *st;
+ unsigned int start;
+ u64 tnsecs, tcnt;
+
+ st = per_cpu_ptr(prog->aux->stats, cpu);
+ do {
+ start = u64_stats_fetch_begin_irq(&st->syncp);
+ tnsecs = st->nsecs;
+ tcnt = st->cnt;
+ } while (u64_stats_fetch_retry_irq(&st->syncp, start));
+ nsecs += tnsecs;
+ cnt += tcnt;
+ }
+ stats->nsecs = nsecs;
+ stats->cnt = cnt;
+}
+
#ifdef CONFIG_PROC_FS
static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct bpf_prog *prog = filp->private_data;
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
+ struct bpf_prog_stats stats;
+ bpf_prog_get_stats(prog, &stats);
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
seq_printf(m,
"prog_type:\t%u\n"
"prog_jited:\t%u\n"
"prog_tag:\t%s\n"
"memlock:\t%llu\n"
- "prog_id:\t%u\n",
+ "prog_id:\t%u\n"
+ "run_time_ns:\t%llu\n"
+ "run_cnt:\t%llu\n",
prog->type,
prog->jited,
prog_tag,
prog->pages * 1ULL << PAGE_SHIFT,
- prog->aux->id);
+ prog->aux->id,
+ stats.nsecs,
+ stats.cnt);
}
#endif
subprog_end = env->subprog_info[i + 1].start;
len = subprog_end - subprog_start;
- func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
+ /* BPF_PROG_RUN doesn't call subprogs directly,
+ * hence main prog stats include the runtime of subprogs.
+ * subprogs don't have IDs and not reachable via prog_get_next_id
+ * func[i]->aux->stats will never be accessed and stays NULL
+ */
+ func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
if (!func[i])
goto out_free;
memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
#endif
static int proc_dopipe_max_size(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
+static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
#ifdef CONFIG_MAGIC_SYSRQ
/* Note: sysrq code uses its own private copy */
.extra2 = &one,
},
#endif
+ {
+ .procname = "bpf_stats_enabled",
+ .data = &sysctl_bpf_stats_enabled,
+ .maxlen = sizeof(sysctl_bpf_stats_enabled),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_bpf_stats,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
{
.procname = "panic_on_rcu_stall",
#endif /* CONFIG_PROC_SYSCTL */
+static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret, bpf_stats = *(int *)table->data;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &bpf_stats;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ *(int *)table->data = bpf_stats;
+ if (bpf_stats)
+ static_branch_enable(&bpf_stats_enabled_key);
+ else
+ static_branch_disable(&bpf_stats_enabled_key);
+ }
+ return ret;
+}
+
/*
* No sense putting this after each symbol definition, twice,
* exception granted :-)