2a5025855a5515b83f927ad492fb2e2ab61f1b32
[openwrt/openwrt.git] /
1 From: Pablo Neira Ayuso <pablo@netfilter.org>
2 Date: Sun, 7 Jan 2018 01:04:07 +0100
3 Subject: [PATCH] netfilter: nf_tables: add flow table netlink frontend
4
5 This patch introduces a netlink control plane to create, delete and dump
6 flow tables. Flow tables are identified by name, this name is used from
7 rules to refer to an specific flow table. Flow tables use the rhashtable
8 class and a generic garbage collector to remove expired entries.
9
10 This also adds the infrastructure to add different flow table types, so
11 we can add one for each layer 3 protocol family.
12
13 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
14 ---
15 create mode 100644 include/net/netfilter/nf_flow_table.h
16
17 --- /dev/null
18 +++ b/include/net/netfilter/nf_flow_table.h
19 @@ -0,0 +1,23 @@
20 +#ifndef _NF_FLOW_TABLE_H
21 +#define _NF_FLOW_TABLE_H
22 +
23 +#include <linux/rhashtable.h>
24 +
25 +struct nf_flowtable;
26 +
27 +struct nf_flowtable_type {
28 + struct list_head list;
29 + int family;
30 + void (*gc)(struct work_struct *work);
31 + const struct rhashtable_params *params;
32 + nf_hookfn *hook;
33 + struct module *owner;
34 +};
35 +
36 +struct nf_flowtable {
37 + struct rhashtable rhashtable;
38 + const struct nf_flowtable_type *type;
39 + struct delayed_work gc_work;
40 +};
41 +
42 +#endif /* _FLOW_OFFLOAD_H */
43 --- a/include/net/netfilter/nf_tables.h
44 +++ b/include/net/netfilter/nf_tables.h
45 @@ -9,6 +9,7 @@
46 #include <linux/netfilter/x_tables.h>
47 #include <linux/netfilter/nf_tables.h>
48 #include <linux/u64_stats_sync.h>
49 +#include <net/netfilter/nf_flow_table.h>
50 #include <net/netlink.h>
51
52 #define NFT_JUMP_STACK_SIZE 16
53 @@ -933,6 +934,7 @@ unsigned int nft_do_chain(struct nft_pkt
54 * @chains: chains in the table
55 * @sets: sets in the table
56 * @objects: stateful objects in the table
57 + * @flowtables: flow tables in the table
58 * @hgenerator: handle generator state
59 * @use: number of chain references to this table
60 * @flags: table flag (see enum nft_table_flags)
61 @@ -944,6 +946,7 @@ struct nft_table {
62 struct list_head chains;
63 struct list_head sets;
64 struct list_head objects;
65 + struct list_head flowtables;
66 u64 hgenerator;
67 u32 use;
68 u16 flags:14,
69 @@ -1075,6 +1078,44 @@ int nft_register_obj(struct nft_object_t
70 void nft_unregister_obj(struct nft_object_type *obj_type);
71
72 /**
73 + * struct nft_flowtable - nf_tables flow table
74 + *
75 + * @list: flow table list node in table list
76 + * @table: the table the flow table is contained in
77 + * @name: name of this flow table
78 + * @hooknum: hook number
79 + * @priority: hook priority
80 + * @ops_len: number of hooks in array
81 + * @genmask: generation mask
82 + * @use: number of references to this flow table
83 + * @data: rhashtable and garbage collector
84 + * @ops: array of hooks
85 + */
86 +struct nft_flowtable {
87 + struct list_head list;
88 + struct nft_table *table;
89 + char *name;
90 + int hooknum;
91 + int priority;
92 + int ops_len;
93 + u32 genmask:2,
94 + use:30;
95 + /* runtime data below here */
96 + struct nf_hook_ops *ops ____cacheline_aligned;
97 + struct nf_flowtable data;
98 +};
99 +
100 +struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
101 + const struct nlattr *nla,
102 + u8 genmask);
103 +void nft_flow_table_iterate(struct net *net,
104 + void (*iter)(struct nf_flowtable *flowtable, void *data),
105 + void *data);
106 +
107 +void nft_register_flowtable_type(struct nf_flowtable_type *type);
108 +void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
109 +
110 +/**
111 * struct nft_traceinfo - nft tracing information and state
112 *
113 * @pkt: pktinfo currently processed
114 @@ -1310,4 +1351,11 @@ struct nft_trans_obj {
115 #define nft_trans_obj(trans) \
116 (((struct nft_trans_obj *)trans->data)->obj)
117
118 +struct nft_trans_flowtable {
119 + struct nft_flowtable *flowtable;
120 +};
121 +
122 +#define nft_trans_flowtable(trans) \
123 + (((struct nft_trans_flowtable *)trans->data)->flowtable)
124 +
125 #endif /* _NET_NF_TABLES_H */
126 --- a/include/uapi/linux/netfilter/nf_tables.h
127 +++ b/include/uapi/linux/netfilter/nf_tables.h
128 @@ -92,6 +92,9 @@ enum nft_verdicts {
129 * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes)
130 * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes)
131 * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes)
132 + * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes)
133 + * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes)
134 + * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes)
135 */
136 enum nf_tables_msg_types {
137 NFT_MSG_NEWTABLE,
138 @@ -116,6 +119,9 @@ enum nf_tables_msg_types {
139 NFT_MSG_GETOBJ,
140 NFT_MSG_DELOBJ,
141 NFT_MSG_GETOBJ_RESET,
142 + NFT_MSG_NEWFLOWTABLE,
143 + NFT_MSG_GETFLOWTABLE,
144 + NFT_MSG_DELFLOWTABLE,
145 NFT_MSG_MAX,
146 };
147
148 @@ -1310,6 +1316,53 @@ enum nft_object_attributes {
149 #define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1)
150
151 /**
152 + * enum nft_flowtable_attributes - nf_tables flow table netlink attributes
153 + *
154 + * @NFTA_FLOWTABLE_TABLE: name of the table containing the expression (NLA_STRING)
155 + * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING)
156 + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
157 + * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
158 + */
159 +enum nft_flowtable_attributes {
160 + NFTA_FLOWTABLE_UNSPEC,
161 + NFTA_FLOWTABLE_TABLE,
162 + NFTA_FLOWTABLE_NAME,
163 + NFTA_FLOWTABLE_HOOK,
164 + NFTA_FLOWTABLE_USE,
165 + __NFTA_FLOWTABLE_MAX
166 +};
167 +#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1)
168 +
169 +/**
170 + * enum nft_flowtable_hook_attributes - nf_tables flow table hook netlink attributes
171 + *
172 + * @NFTA_FLOWTABLE_HOOK_NUM: netfilter hook number (NLA_U32)
173 + * @NFTA_FLOWTABLE_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
174 + * @NFTA_FLOWTABLE_HOOK_DEVS: input devices this flow table is bound to (NLA_NESTED)
175 + */
176 +enum nft_flowtable_hook_attributes {
177 + NFTA_FLOWTABLE_HOOK_UNSPEC,
178 + NFTA_FLOWTABLE_HOOK_NUM,
179 + NFTA_FLOWTABLE_HOOK_PRIORITY,
180 + NFTA_FLOWTABLE_HOOK_DEVS,
181 + __NFTA_FLOWTABLE_HOOK_MAX
182 +};
183 +#define NFTA_FLOWTABLE_HOOK_MAX (__NFTA_FLOWTABLE_HOOK_MAX - 1)
184 +
185 +/**
186 + * enum nft_device_attributes - nf_tables device netlink attributes
187 + *
188 + * @NFTA_DEVICE_NAME: name of this device (NLA_STRING)
189 + */
190 +enum nft_devices_attributes {
191 + NFTA_DEVICE_UNSPEC,
192 + NFTA_DEVICE_NAME,
193 + __NFTA_DEVICE_MAX
194 +};
195 +#define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1)
196 +
197 +
198 +/**
199 * enum nft_trace_attributes - nf_tables trace netlink attributes
200 *
201 * @NFTA_TRACE_TABLE: name of the table (NLA_STRING)
202 --- a/net/netfilter/nf_tables_api.c
203 +++ b/net/netfilter/nf_tables_api.c
204 @@ -17,6 +17,7 @@
205 #include <linux/netfilter.h>
206 #include <linux/netfilter/nfnetlink.h>
207 #include <linux/netfilter/nf_tables.h>
208 +#include <net/netfilter/nf_flow_table.h>
209 #include <net/netfilter/nf_tables_core.h>
210 #include <net/netfilter/nf_tables.h>
211 #include <net/net_namespace.h>
212 @@ -24,6 +25,7 @@
213
214 static LIST_HEAD(nf_tables_expressions);
215 static LIST_HEAD(nf_tables_objects);
216 +static LIST_HEAD(nf_tables_flowtables);
217
218 /**
219 * nft_register_afinfo - register nf_tables address family info
220 @@ -345,6 +347,40 @@ static int nft_delobj(struct nft_ctx *ct
221 return err;
222 }
223
224 +static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
225 + struct nft_flowtable *flowtable)
226 +{
227 + struct nft_trans *trans;
228 +
229 + trans = nft_trans_alloc(ctx, msg_type,
230 + sizeof(struct nft_trans_flowtable));
231 + if (trans == NULL)
232 + return -ENOMEM;
233 +
234 + if (msg_type == NFT_MSG_NEWFLOWTABLE)
235 + nft_activate_next(ctx->net, flowtable);
236 +
237 + nft_trans_flowtable(trans) = flowtable;
238 + list_add_tail(&trans->list, &ctx->net->nft.commit_list);
239 +
240 + return 0;
241 +}
242 +
243 +static int nft_delflowtable(struct nft_ctx *ctx,
244 + struct nft_flowtable *flowtable)
245 +{
246 + int err;
247 +
248 + err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
249 + if (err < 0)
250 + return err;
251 +
252 + nft_deactivate_next(ctx->net, flowtable);
253 + ctx->table->use--;
254 +
255 + return err;
256 +}
257 +
258 /*
259 * Tables
260 */
261 @@ -728,6 +764,7 @@ static int nf_tables_newtable(struct net
262 INIT_LIST_HEAD(&table->chains);
263 INIT_LIST_HEAD(&table->sets);
264 INIT_LIST_HEAD(&table->objects);
265 + INIT_LIST_HEAD(&table->flowtables);
266 table->flags = flags;
267
268 nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
269 @@ -749,10 +786,11 @@ err1:
270
271 static int nft_flush_table(struct nft_ctx *ctx)
272 {
273 - int err;
274 + struct nft_flowtable *flowtable, *nft;
275 struct nft_chain *chain, *nc;
276 struct nft_object *obj, *ne;
277 struct nft_set *set, *ns;
278 + int err;
279
280 list_for_each_entry(chain, &ctx->table->chains, list) {
281 if (!nft_is_active_next(ctx->net, chain))
282 @@ -778,6 +816,12 @@ static int nft_flush_table(struct nft_ct
283 goto out;
284 }
285
286 + list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) {
287 + err = nft_delflowtable(ctx, flowtable);
288 + if (err < 0)
289 + goto out;
290 + }
291 +
292 list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
293 err = nft_delobj(ctx, obj);
294 if (err < 0)
295 @@ -4767,6 +4811,605 @@ static void nf_tables_obj_notify(const s
296 ctx->afi->family, ctx->report, GFP_KERNEL);
297 }
298
299 +/*
300 + * Flow tables
301 + */
302 +void nft_register_flowtable_type(struct nf_flowtable_type *type)
303 +{
304 + nfnl_lock(NFNL_SUBSYS_NFTABLES);
305 + list_add_tail_rcu(&type->list, &nf_tables_flowtables);
306 + nfnl_unlock(NFNL_SUBSYS_NFTABLES);
307 +}
308 +EXPORT_SYMBOL_GPL(nft_register_flowtable_type);
309 +
310 +void nft_unregister_flowtable_type(struct nf_flowtable_type *type)
311 +{
312 + nfnl_lock(NFNL_SUBSYS_NFTABLES);
313 + list_del_rcu(&type->list);
314 + nfnl_unlock(NFNL_SUBSYS_NFTABLES);
315 +}
316 +EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type);
317 +
318 +static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
319 + [NFTA_FLOWTABLE_TABLE] = { .type = NLA_STRING,
320 + .len = NFT_NAME_MAXLEN - 1 },
321 + [NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING,
322 + .len = NFT_NAME_MAXLEN - 1 },
323 + [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
324 +};
325 +
326 +struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
327 + const struct nlattr *nla,
328 + u8 genmask)
329 +{
330 + struct nft_flowtable *flowtable;
331 +
332 + list_for_each_entry(flowtable, &table->flowtables, list) {
333 + if (!nla_strcmp(nla, flowtable->name) &&
334 + nft_active_genmask(flowtable, genmask))
335 + return flowtable;
336 + }
337 + return ERR_PTR(-ENOENT);
338 +}
339 +EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
340 +
341 +#define NFT_FLOWTABLE_DEVICE_MAX 8
342 +
343 +static int nf_tables_parse_devices(const struct nft_ctx *ctx,
344 + const struct nlattr *attr,
345 + struct net_device *dev_array[], int *len)
346 +{
347 + const struct nlattr *tmp;
348 + struct net_device *dev;
349 + char ifname[IFNAMSIZ];
350 + int rem, n = 0, err;
351 +
352 + nla_for_each_nested(tmp, attr, rem) {
353 + if (nla_type(tmp) != NFTA_DEVICE_NAME) {
354 + err = -EINVAL;
355 + goto err1;
356 + }
357 +
358 + nla_strlcpy(ifname, tmp, IFNAMSIZ);
359 + dev = dev_get_by_name(ctx->net, ifname);
360 + if (!dev) {
361 + err = -ENOENT;
362 + goto err1;
363 + }
364 +
365 + dev_array[n++] = dev;
366 + if (n == NFT_FLOWTABLE_DEVICE_MAX) {
367 + err = -EFBIG;
368 + goto err1;
369 + }
370 + }
371 + if (!len)
372 + return -EINVAL;
373 +
374 + err = 0;
375 +err1:
376 + *len = n;
377 + return err;
378 +}
379 +
380 +static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
381 + [NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 },
382 + [NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 },
383 + [NFTA_FLOWTABLE_HOOK_DEVS] = { .type = NLA_NESTED },
384 +};
385 +
386 +static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
387 + const struct nlattr *attr,
388 + struct nft_flowtable *flowtable)
389 +{
390 + struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
391 + struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
392 + struct nf_hook_ops *ops;
393 + int hooknum, priority;
394 + int err, n = 0, i;
395 +
396 + err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
397 + nft_flowtable_hook_policy, NULL);
398 + if (err < 0)
399 + return err;
400 +
401 + if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
402 + !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] ||
403 + !tb[NFTA_FLOWTABLE_HOOK_DEVS])
404 + return -EINVAL;
405 +
406 + hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
407 + if (hooknum >= ctx->afi->nhooks)
408 + return -EINVAL;
409 +
410 + priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
411 +
412 + err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
413 + dev_array, &n);
414 + if (err < 0)
415 + goto err1;
416 +
417 + ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
418 + if (!ops) {
419 + err = -ENOMEM;
420 + goto err1;
421 + }
422 +
423 + flowtable->ops = ops;
424 + flowtable->ops_len = n;
425 +
426 + for (i = 0; i < n; i++) {
427 + flowtable->ops[i].pf = NFPROTO_NETDEV;
428 + flowtable->ops[i].hooknum = hooknum;
429 + flowtable->ops[i].priority = priority;
430 + flowtable->ops[i].priv = &flowtable->data.rhashtable;
431 + flowtable->ops[i].hook = flowtable->data.type->hook;
432 + flowtable->ops[i].dev = dev_array[i];
433 + }
434 +
435 + err = 0;
436 +err1:
437 + for (i = 0; i < n; i++)
438 + dev_put(dev_array[i]);
439 +
440 + return err;
441 +}
442 +
443 +static const struct nf_flowtable_type *
444 +__nft_flowtable_type_get(const struct nft_af_info *afi)
445 +{
446 + const struct nf_flowtable_type *type;
447 +
448 + list_for_each_entry(type, &nf_tables_flowtables, list) {
449 + if (afi->family == type->family)
450 + return type;
451 + }
452 + return NULL;
453 +}
454 +
455 +static const struct nf_flowtable_type *
456 +nft_flowtable_type_get(const struct nft_af_info *afi)
457 +{
458 + const struct nf_flowtable_type *type;
459 +
460 + type = __nft_flowtable_type_get(afi);
461 + if (type != NULL && try_module_get(type->owner))
462 + return type;
463 +
464 +#ifdef CONFIG_MODULES
465 + if (type == NULL) {
466 + nfnl_unlock(NFNL_SUBSYS_NFTABLES);
467 + request_module("nf-flowtable-%u", afi->family);
468 + nfnl_lock(NFNL_SUBSYS_NFTABLES);
469 + if (__nft_flowtable_type_get(afi))
470 + return ERR_PTR(-EAGAIN);
471 + }
472 +#endif
473 + return ERR_PTR(-ENOENT);
474 +}
475 +
476 +void nft_flow_table_iterate(struct net *net,
477 + void (*iter)(struct nf_flowtable *flowtable, void *data),
478 + void *data)
479 +{
480 + struct nft_flowtable *flowtable;
481 + const struct nft_af_info *afi;
482 + const struct nft_table *table;
483 +
484 + rcu_read_lock();
485 + list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
486 + list_for_each_entry_rcu(table, &afi->tables, list) {
487 + list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
488 + iter(&flowtable->data, data);
489 + }
490 + }
491 + }
492 + rcu_read_unlock();
493 +}
494 +EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
495 +
496 +static void nft_unregister_flowtable_net_hooks(struct net *net,
497 + struct nft_flowtable *flowtable)
498 +{
499 + int i;
500 +
501 + for (i = 0; i < flowtable->ops_len; i++) {
502 + if (!flowtable->ops[i].dev)
503 + continue;
504 +
505 + nf_unregister_net_hook(net, &flowtable->ops[i]);
506 + }
507 +}
508 +
509 +static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
510 + struct sk_buff *skb,
511 + const struct nlmsghdr *nlh,
512 + const struct nlattr * const nla[],
513 + struct netlink_ext_ack *extack)
514 +{
515 + const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
516 + const struct nf_flowtable_type *type;
517 + u8 genmask = nft_genmask_next(net);
518 + int family = nfmsg->nfgen_family;
519 + struct nft_flowtable *flowtable;
520 + struct nft_af_info *afi;
521 + struct nft_table *table;
522 + struct nft_ctx ctx;
523 + int err, i, k;
524 +
525 + if (!nla[NFTA_FLOWTABLE_TABLE] ||
526 + !nla[NFTA_FLOWTABLE_NAME] ||
527 + !nla[NFTA_FLOWTABLE_HOOK])
528 + return -EINVAL;
529 +
530 + afi = nf_tables_afinfo_lookup(net, family, true);
531 + if (IS_ERR(afi))
532 + return PTR_ERR(afi);
533 +
534 + table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
535 + if (IS_ERR(table))
536 + return PTR_ERR(table);
537 +
538 + flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
539 + genmask);
540 + if (IS_ERR(flowtable)) {
541 + err = PTR_ERR(flowtable);
542 + if (err != -ENOENT)
543 + return err;
544 + } else {
545 + if (nlh->nlmsg_flags & NLM_F_EXCL)
546 + return -EEXIST;
547 +
548 + return 0;
549 + }
550 +
551 + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
552 +
553 + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
554 + if (!flowtable)
555 + return -ENOMEM;
556 +
557 + flowtable->table = table;
558 + flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
559 + if (!flowtable->name) {
560 + err = -ENOMEM;
561 + goto err1;
562 + }
563 +
564 + type = nft_flowtable_type_get(afi);
565 + if (IS_ERR(type)) {
566 + err = PTR_ERR(type);
567 + goto err2;
568 + }
569 +
570 + flowtable->data.type = type;
571 + err = rhashtable_init(&flowtable->data.rhashtable, type->params);
572 + if (err < 0)
573 + goto err3;
574 +
575 + err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
576 + flowtable);
577 + if (err < 0)
578 + goto err3;
579 +
580 + for (i = 0; i < flowtable->ops_len; i++) {
581 + err = nf_register_net_hook(net, &flowtable->ops[i]);
582 + if (err < 0)
583 + goto err4;
584 + }
585 +
586 + err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
587 + if (err < 0)
588 + goto err5;
589 +
590 + INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
591 + queue_delayed_work(system_power_efficient_wq,
592 + &flowtable->data.gc_work, HZ);
593 +
594 + list_add_tail_rcu(&flowtable->list, &table->flowtables);
595 + table->use++;
596 +
597 + return 0;
598 +err5:
599 + i = flowtable->ops_len;
600 +err4:
601 + for (k = i - 1; k >= 0; k--)
602 + nf_unregister_net_hook(net, &flowtable->ops[i]);
603 +
604 + kfree(flowtable->ops);
605 +err3:
606 + module_put(type->owner);
607 +err2:
608 + kfree(flowtable->name);
609 +err1:
610 + kfree(flowtable);
611 + return err;
612 +}
613 +
614 +static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
615 + struct sk_buff *skb,
616 + const struct nlmsghdr *nlh,
617 + const struct nlattr * const nla[],
618 + struct netlink_ext_ack *extack)
619 +{
620 + const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
621 + u8 genmask = nft_genmask_next(net);
622 + int family = nfmsg->nfgen_family;
623 + struct nft_flowtable *flowtable;
624 + struct nft_af_info *afi;
625 + struct nft_table *table;
626 + struct nft_ctx ctx;
627 +
628 + afi = nf_tables_afinfo_lookup(net, family, true);
629 + if (IS_ERR(afi))
630 + return PTR_ERR(afi);
631 +
632 + table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
633 + if (IS_ERR(table))
634 + return PTR_ERR(table);
635 +
636 + flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
637 + genmask);
638 + if (IS_ERR(flowtable))
639 + return PTR_ERR(flowtable);
640 + if (flowtable->use > 0)
641 + return -EBUSY;
642 +
643 + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
644 +
645 + return nft_delflowtable(&ctx, flowtable);
646 +}
647 +
648 +static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
649 + u32 portid, u32 seq, int event,
650 + u32 flags, int family,
651 + struct nft_flowtable *flowtable)
652 +{
653 + struct nlattr *nest, *nest_devs;
654 + struct nfgenmsg *nfmsg;
655 + struct nlmsghdr *nlh;
656 + int i;
657 +
658 + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
659 + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
660 + if (nlh == NULL)
661 + goto nla_put_failure;
662 +
663 + nfmsg = nlmsg_data(nlh);
664 + nfmsg->nfgen_family = family;
665 + nfmsg->version = NFNETLINK_V0;
666 + nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
667 +
668 + if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
669 + nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
670 + nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)))
671 + goto nla_put_failure;
672 +
673 + nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
674 + if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
675 + nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
676 + goto nla_put_failure;
677 +
678 + nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS);
679 + if (!nest_devs)
680 + goto nla_put_failure;
681 +
682 + for (i = 0; i < flowtable->ops_len; i++) {
683 + if (flowtable->ops[i].dev &&
684 + nla_put_string(skb, NFTA_DEVICE_NAME,
685 + flowtable->ops[i].dev->name))
686 + goto nla_put_failure;
687 + }
688 + nla_nest_end(skb, nest_devs);
689 + nla_nest_end(skb, nest);
690 +
691 + nlmsg_end(skb, nlh);
692 + return 0;
693 +
694 +nla_put_failure:
695 + nlmsg_trim(skb, nlh);
696 + return -1;
697 +}
698 +
699 +struct nft_flowtable_filter {
700 + char *table;
701 +};
702 +
703 +static int nf_tables_dump_flowtable(struct sk_buff *skb,
704 + struct netlink_callback *cb)
705 +{
706 + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
707 + struct nft_flowtable_filter *filter = cb->data;
708 + unsigned int idx = 0, s_idx = cb->args[0];
709 + struct net *net = sock_net(skb->sk);
710 + int family = nfmsg->nfgen_family;
711 + struct nft_flowtable *flowtable;
712 + const struct nft_af_info *afi;
713 + const struct nft_table *table;
714 +
715 + rcu_read_lock();
716 + cb->seq = net->nft.base_seq;
717 +
718 + list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
719 + if (family != NFPROTO_UNSPEC && family != afi->family)
720 + continue;
721 +
722 + list_for_each_entry_rcu(table, &afi->tables, list) {
723 + list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
724 + if (!nft_is_active(net, flowtable))
725 + goto cont;
726 + if (idx < s_idx)
727 + goto cont;
728 + if (idx > s_idx)
729 + memset(&cb->args[1], 0,
730 + sizeof(cb->args) - sizeof(cb->args[0]));
731 + if (filter && filter->table[0] &&
732 + strcmp(filter->table, table->name))
733 + goto cont;
734 +
735 + if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
736 + cb->nlh->nlmsg_seq,
737 + NFT_MSG_NEWFLOWTABLE,
738 + NLM_F_MULTI | NLM_F_APPEND,
739 + afi->family, flowtable) < 0)
740 + goto done;
741 +
742 + nl_dump_check_consistent(cb, nlmsg_hdr(skb));
743 +cont:
744 + idx++;
745 + }
746 + }
747 + }
748 +done:
749 + rcu_read_unlock();
750 +
751 + cb->args[0] = idx;
752 + return skb->len;
753 +}
754 +
755 +static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
756 +{
757 + struct nft_flowtable_filter *filter = cb->data;
758 +
759 + if (!filter)
760 + return 0;
761 +
762 + kfree(filter->table);
763 + kfree(filter);
764 +
765 + return 0;
766 +}
767 +
768 +static struct nft_flowtable_filter *
769 +nft_flowtable_filter_alloc(const struct nlattr * const nla[])
770 +{
771 + struct nft_flowtable_filter *filter;
772 +
773 + filter = kzalloc(sizeof(*filter), GFP_KERNEL);
774 + if (!filter)
775 + return ERR_PTR(-ENOMEM);
776 +
777 + if (nla[NFTA_FLOWTABLE_TABLE]) {
778 + filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
779 + GFP_KERNEL);
780 + if (!filter->table) {
781 + kfree(filter);
782 + return ERR_PTR(-ENOMEM);
783 + }
784 + }
785 + return filter;
786 +}
787 +
788 +static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
789 + struct sk_buff *skb,
790 + const struct nlmsghdr *nlh,
791 + const struct nlattr * const nla[],
792 + struct netlink_ext_ack *extack)
793 +{
794 + const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
795 + u8 genmask = nft_genmask_cur(net);
796 + int family = nfmsg->nfgen_family;
797 + struct nft_flowtable *flowtable;
798 + const struct nft_af_info *afi;
799 + const struct nft_table *table;
800 + struct sk_buff *skb2;
801 + int err;
802 +
803 + if (nlh->nlmsg_flags & NLM_F_DUMP) {
804 + struct netlink_dump_control c = {
805 + .dump = nf_tables_dump_flowtable,
806 + .done = nf_tables_dump_flowtable_done,
807 + };
808 +
809 + if (nla[NFTA_FLOWTABLE_TABLE]) {
810 + struct nft_flowtable_filter *filter;
811 +
812 + filter = nft_flowtable_filter_alloc(nla);
813 + if (IS_ERR(filter))
814 + return -ENOMEM;
815 +
816 + c.data = filter;
817 + }
818 + return netlink_dump_start(nlsk, skb, nlh, &c);
819 + }
820 +
821 + if (!nla[NFTA_FLOWTABLE_NAME])
822 + return -EINVAL;
823 +
824 + afi = nf_tables_afinfo_lookup(net, family, false);
825 + if (IS_ERR(afi))
826 + return PTR_ERR(afi);
827 +
828 + table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
829 + if (IS_ERR(table))
830 + return PTR_ERR(table);
831 +
832 + flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
833 + genmask);
834 + if (IS_ERR(table))
835 + return PTR_ERR(flowtable);
836 +
837 + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
838 + if (!skb2)
839 + return -ENOMEM;
840 +
841 + err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
842 + nlh->nlmsg_seq,
843 + NFT_MSG_NEWFLOWTABLE, 0, family,
844 + flowtable);
845 + if (err < 0)
846 + goto err;
847 +
848 + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
849 +err:
850 + kfree_skb(skb2);
851 + return err;
852 +}
853 +
854 +static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
855 + struct nft_flowtable *flowtable,
856 + int event)
857 +{
858 + struct sk_buff *skb;
859 + int err;
860 +
861 + if (ctx->report &&
862 + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
863 + return;
864 +
865 + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
866 + if (skb == NULL)
867 + goto err;
868 +
869 + err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
870 + ctx->seq, event, 0,
871 + ctx->afi->family, flowtable);
872 + if (err < 0) {
873 + kfree_skb(skb);
874 + goto err;
875 + }
876 +
877 + nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
878 + ctx->report, GFP_KERNEL);
879 + return;
880 +err:
881 + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
882 +}
883 +
884 +static void nft_flowtable_destroy(void *ptr, void *arg)
885 +{
886 + kfree(ptr);
887 +}
888 +
889 +static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
890 +{
891 + cancel_delayed_work_sync(&flowtable->data.gc_work);
892 + kfree(flowtable->name);
893 + rhashtable_free_and_destroy(&flowtable->data.rhashtable,
894 + nft_flowtable_destroy, NULL);
895 + module_put(flowtable->data.type->owner);
896 +}
897 +
898 static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
899 u32 portid, u32 seq)
900 {
901 @@ -4797,6 +5440,49 @@ nla_put_failure:
902 return -EMSGSIZE;
903 }
904
905 +static void nft_flowtable_event(unsigned long event, struct net_device *dev,
906 + struct nft_flowtable *flowtable)
907 +{
908 + int i;
909 +
910 + for (i = 0; i < flowtable->ops_len; i++) {
911 + if (flowtable->ops[i].dev != dev)
912 + continue;
913 +
914 + nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
915 + flowtable->ops[i].dev = NULL;
916 + break;
917 + }
918 +}
919 +
920 +static int nf_tables_flowtable_event(struct notifier_block *this,
921 + unsigned long event, void *ptr)
922 +{
923 + struct net_device *dev = netdev_notifier_info_to_dev(ptr);
924 + struct nft_flowtable *flowtable;
925 + struct nft_table *table;
926 + struct nft_af_info *afi;
927 +
928 + if (event != NETDEV_UNREGISTER)
929 + return 0;
930 +
931 + nfnl_lock(NFNL_SUBSYS_NFTABLES);
932 + list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
933 + list_for_each_entry(table, &afi->tables, list) {
934 + list_for_each_entry(flowtable, &table->flowtables, list) {
935 + nft_flowtable_event(event, dev, flowtable);
936 + }
937 + }
938 + }
939 + nfnl_unlock(NFNL_SUBSYS_NFTABLES);
940 +
941 + return NOTIFY_DONE;
942 +}
943 +
944 +static struct notifier_block nf_tables_flowtable_notifier = {
945 + .notifier_call = nf_tables_flowtable_event,
946 +};
947 +
948 static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
949 int event)
950 {
951 @@ -4949,6 +5635,21 @@ static const struct nfnl_callback nf_tab
952 .attr_count = NFTA_OBJ_MAX,
953 .policy = nft_obj_policy,
954 },
955 + [NFT_MSG_NEWFLOWTABLE] = {
956 + .call_batch = nf_tables_newflowtable,
957 + .attr_count = NFTA_FLOWTABLE_MAX,
958 + .policy = nft_flowtable_policy,
959 + },
960 + [NFT_MSG_GETFLOWTABLE] = {
961 + .call = nf_tables_getflowtable,
962 + .attr_count = NFTA_FLOWTABLE_MAX,
963 + .policy = nft_flowtable_policy,
964 + },
965 + [NFT_MSG_DELFLOWTABLE] = {
966 + .call_batch = nf_tables_delflowtable,
967 + .attr_count = NFTA_FLOWTABLE_MAX,
968 + .policy = nft_flowtable_policy,
969 + },
970 };
971
972 static void nft_chain_commit_update(struct nft_trans *trans)
973 @@ -4994,6 +5695,9 @@ static void nf_tables_commit_release(str
974 case NFT_MSG_DELOBJ:
975 nft_obj_destroy(nft_trans_obj(trans));
976 break;
977 + case NFT_MSG_DELFLOWTABLE:
978 + nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
979 + break;
980 }
981 kfree(trans);
982 }
983 @@ -5111,6 +5815,21 @@ static int nf_tables_commit(struct net *
984 nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
985 NFT_MSG_DELOBJ);
986 break;
987 + case NFT_MSG_NEWFLOWTABLE:
988 + nft_clear(net, nft_trans_flowtable(trans));
989 + nf_tables_flowtable_notify(&trans->ctx,
990 + nft_trans_flowtable(trans),
991 + NFT_MSG_NEWFLOWTABLE);
992 + nft_trans_destroy(trans);
993 + break;
994 + case NFT_MSG_DELFLOWTABLE:
995 + list_del_rcu(&nft_trans_flowtable(trans)->list);
996 + nf_tables_flowtable_notify(&trans->ctx,
997 + nft_trans_flowtable(trans),
998 + NFT_MSG_DELFLOWTABLE);
999 + nft_unregister_flowtable_net_hooks(net,
1000 + nft_trans_flowtable(trans));
1001 + break;
1002 }
1003 }
1004
1005 @@ -5148,6 +5867,9 @@ static void nf_tables_abort_release(stru
1006 case NFT_MSG_NEWOBJ:
1007 nft_obj_destroy(nft_trans_obj(trans));
1008 break;
1009 + case NFT_MSG_NEWFLOWTABLE:
1010 + nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
1011 + break;
1012 }
1013 kfree(trans);
1014 }
1015 @@ -5237,6 +5959,17 @@ static int nf_tables_abort(struct net *n
1016 nft_clear(trans->ctx.net, nft_trans_obj(trans));
1017 nft_trans_destroy(trans);
1018 break;
1019 + case NFT_MSG_NEWFLOWTABLE:
1020 + trans->ctx.table->use--;
1021 + list_del_rcu(&nft_trans_flowtable(trans)->list);
1022 + nft_unregister_flowtable_net_hooks(net,
1023 + nft_trans_flowtable(trans));
1024 + break;
1025 + case NFT_MSG_DELFLOWTABLE:
1026 + trans->ctx.table->use++;
1027 + nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
1028 + nft_trans_destroy(trans);
1029 + break;
1030 }
1031 }
1032
1033 @@ -5787,6 +6520,7 @@ EXPORT_SYMBOL_GPL(__nft_release_basechai
1034 /* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
1035 static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
1036 {
1037 + struct nft_flowtable *flowtable, *nf;
1038 struct nft_table *table, *nt;
1039 struct nft_chain *chain, *nc;
1040 struct nft_object *obj, *ne;
1041 @@ -5800,6 +6534,9 @@ static void __nft_release_afinfo(struct
1042 list_for_each_entry_safe(table, nt, &afi->tables, list) {
1043 list_for_each_entry(chain, &table->chains, list)
1044 nf_tables_unregister_hook(net, table, chain);
1045 + list_for_each_entry(flowtable, &table->flowtables, list)
1046 + nf_unregister_net_hooks(net, flowtable->ops,
1047 + flowtable->ops_len);
1048 /* No packets are walking on these chains anymore. */
1049 ctx.table = table;
1050 list_for_each_entry(chain, &table->chains, list) {
1051 @@ -5810,6 +6547,11 @@ static void __nft_release_afinfo(struct
1052 nf_tables_rule_destroy(&ctx, rule);
1053 }
1054 }
1055 + list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
1056 + list_del(&flowtable->list);
1057 + table->use--;
1058 + nf_tables_flowtable_destroy(flowtable);
1059 + }
1060 list_for_each_entry_safe(set, ns, &table->sets, list) {
1061 list_del(&set->list);
1062 table->use--;
1063 @@ -5853,6 +6595,8 @@ static int __init nf_tables_module_init(
1064 if (err < 0)
1065 goto err3;
1066
1067 + register_netdevice_notifier(&nf_tables_flowtable_notifier);
1068 +
1069 pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
1070 return register_pernet_subsys(&nf_tables_net_ops);
1071 err3:
1072 @@ -5867,6 +6611,7 @@ static void __exit nf_tables_module_exit
1073 {
1074 unregister_pernet_subsys(&nf_tables_net_ops);
1075 nfnetlink_subsys_unregister(&nf_tables_subsys);
1076 + unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
1077 rcu_barrier();
1078 nf_tables_core_module_exit();
1079 kfree(info);