5d6ddcb
[openwrt/staging/blogic.git] /
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/slab.h>
18 #include <asm/uaccess.h>
19 #include <linux/skbuff.h>
20 #include <linux/netdevice.h>
21 #include <linux/in.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/if_arp.h>
25 #include <linux/mroute.h>
26 #include <linux/init.h>
27 #include <linux/in6.h>
28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h>
30 #include <linux/netfilter_ipv4.h>
31 #include <linux/etherdevice.h>
32 #include <linux/if_ether.h>
33
34 #include <net/sock.h>
35 #include <net/ip.h>
36 #include <net/icmp.h>
37 #include <net/protocol.h>
38 #include <net/ipip.h>
39 #include <net/arp.h>
40 #include <net/checksum.h>
41 #include <net/dsfield.h>
42 #include <net/inet_ecn.h>
43 #include <net/xfrm.h>
44 #include <net/net_namespace.h>
45 #include <net/netns/generic.h>
46 #include <net/rtnetlink.h>
47 #include <net/gre.h>
48
49 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #endif
54
55 /*
56 Problems & solutions
57 --------------------
58
59 1. The most important issue is detecting local dead loops.
60 They would cause complete host lockup in transmit, which
61 would be "resolved" by stack overflow or, if queueing is enabled,
62 with infinite looping in net_bh.
63
64 We cannot track such dead loops during route installation,
65 it is infeasible task. The most general solutions would be
66 to keep skb->encapsulation counter (sort of local ttl),
67 and silently drop packet when it expires. It is the best
68 solution, but it supposes maintaing new variable in ALL
69 skb, even if no tunneling is used.
70
71 Current solution: HARD_TX_LOCK lock breaks dead loops.
72
73
74
75 2. Networking dead loops would not kill routers, but would really
76 kill network. IP hop limit plays role of "t->recursion" in this case,
77 if we copy it from packet being encapsulated to upper header.
78 It is very good solution, but it introduces two problems:
79
80 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
81 do not work over tunnels.
82 - traceroute does not work. I planned to relay ICMP from tunnel,
83 so that this problem would be solved and traceroute output
84 would even more informative. This idea appeared to be wrong:
85 only Linux complies to rfc1812 now (yes, guys, Linux is the only
86 true router now :-)), all routers (at least, in neighbourhood of mine)
87 return only 8 bytes of payload. It is the end.
88
89 Hence, if we want that OSPF worked or traceroute said something reasonable,
90 we should search for another solution.
91
92 One of them is to parse packet trying to detect inner encapsulation
93 made by our node. It is difficult or even impossible, especially,
94 taking into account fragmentation. TO be short, tt is not solution at all.
95
96 Current solution: The solution was UNEXPECTEDLY SIMPLE.
97 We force DF flag on tunnels with preconfigured hop limit,
98 that is ALL. :-) Well, it does not remove the problem completely,
99 but exponential growth of network traffic is changed to linear
100 (branches, that exceed pmtu are pruned) and tunnel mtu
101 fastly degrades to value <68, where looping stops.
102 Yes, it is not good if there exists a router in the loop,
103 which does not force DF, even when encapsulating packets have DF set.
104 But it is not our problem! Nobody could accuse us, we made
105 all that we could make. Even if it is your gated who injected
106 fatal route to network, even if it were you who configured
107 fatal static route: you are innocent. :-)
108
109
110
111 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
112 practically identical code. It would be good to glue them
113 together, but it is not very evident, how to make them modular.
114 sit is integral part of IPv6, ipip and gre are naturally modular.
115 We could extract common parts (hash table, ioctl etc)
116 to a separate module (ip_tunnel.c).
117
118 Alexey Kuznetsov.
119 */
120
121 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
122 static int ipgre_tunnel_init(struct net_device *dev);
123 static void ipgre_tunnel_setup(struct net_device *dev);
124 static int ipgre_tunnel_bind_dev(struct net_device *dev);
125
126 /* Fallback tunnel: no source, no destination, no key, no options */
127
128 #define HASH_SIZE 16
129
130 static int ipgre_net_id __read_mostly;
131 struct ipgre_net {
132 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
133
134 struct net_device *fb_tunnel_dev;
135 };
136
137 /* Tunnel hash table */
138
139 /*
140 4 hash tables:
141
142 3: (remote,local)
143 2: (remote,*)
144 1: (*,local)
145 0: (*,*)
146
147 We require exact key match i.e. if a key is present in packet
148 it will match only tunnel with the same key; if it is not present,
149 it will match only keyless tunnel.
150
151 All keysless packets, if not matched configured keyless tunnels
152 will match fallback tunnel.
153 */
154
155 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
156
157 #define tunnels_r_l tunnels[3]
158 #define tunnels_r tunnels[2]
159 #define tunnels_l tunnels[1]
160 #define tunnels_wc tunnels[0]
161 /*
162 * Locking : hash tables are protected by RCU and RTNL
163 */
164
165 #define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
167
168 /* Given src, dst and key, find appropriate for input tunnel. */
169
170 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
171 __be32 remote, __be32 local,
172 __be32 key, __be16 gre_proto)
173 {
174 struct net *net = dev_net(dev);
175 int link = dev->ifindex;
176 unsigned int h0 = HASH(remote);
177 unsigned int h1 = HASH(key);
178 struct ip_tunnel *t, *cand = NULL;
179 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
181 ARPHRD_ETHER : ARPHRD_IPGRE;
182 int score, cand_score = 4;
183
184 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
185 if (local != t->parms.iph.saddr ||
186 remote != t->parms.iph.daddr ||
187 key != t->parms.i_key ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (t->dev->type != ARPHRD_IPGRE &&
192 t->dev->type != dev_type)
193 continue;
194
195 score = 0;
196 if (t->parms.link != link)
197 score |= 1;
198 if (t->dev->type != dev_type)
199 score |= 2;
200 if (score == 0)
201 return t;
202
203 if (score < cand_score) {
204 cand = t;
205 cand_score = score;
206 }
207 }
208
209 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
210 if (remote != t->parms.iph.daddr ||
211 key != t->parms.i_key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->dev->type != ARPHRD_IPGRE &&
216 t->dev->type != dev_type)
217 continue;
218
219 score = 0;
220 if (t->parms.link != link)
221 score |= 1;
222 if (t->dev->type != dev_type)
223 score |= 2;
224 if (score == 0)
225 return t;
226
227 if (score < cand_score) {
228 cand = t;
229 cand_score = score;
230 }
231 }
232
233 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
234 if ((local != t->parms.iph.saddr &&
235 (local != t->parms.iph.daddr ||
236 !ipv4_is_multicast(local))) ||
237 key != t->parms.i_key ||
238 !(t->dev->flags & IFF_UP))
239 continue;
240
241 if (t->dev->type != ARPHRD_IPGRE &&
242 t->dev->type != dev_type)
243 continue;
244
245 score = 0;
246 if (t->parms.link != link)
247 score |= 1;
248 if (t->dev->type != dev_type)
249 score |= 2;
250 if (score == 0)
251 return t;
252
253 if (score < cand_score) {
254 cand = t;
255 cand_score = score;
256 }
257 }
258
259 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
260 if (t->parms.i_key != key ||
261 !(t->dev->flags & IFF_UP))
262 continue;
263
264 if (t->dev->type != ARPHRD_IPGRE &&
265 t->dev->type != dev_type)
266 continue;
267
268 score = 0;
269 if (t->parms.link != link)
270 score |= 1;
271 if (t->dev->type != dev_type)
272 score |= 2;
273 if (score == 0)
274 return t;
275
276 if (score < cand_score) {
277 cand = t;
278 cand_score = score;
279 }
280 }
281
282 if (cand != NULL)
283 return cand;
284
285 dev = ign->fb_tunnel_dev;
286 if (dev->flags & IFF_UP)
287 return netdev_priv(dev);
288
289 return NULL;
290 }
291
292 static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms)
294 {
295 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key;
298 unsigned int h = HASH(key);
299 int prio = 0;
300
301 if (local)
302 prio |= 1;
303 if (remote && !ipv4_is_multicast(remote)) {
304 prio |= 2;
305 h ^= HASH(remote);
306 }
307
308 return &ign->tunnels[prio][h];
309 }
310
311 static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t)
313 {
314 return __ipgre_bucket(ign, &t->parms);
315 }
316
317 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
318 {
319 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
320
321 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
322 rcu_assign_pointer(*tp, t);
323 }
324
325 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
326 {
327 struct ip_tunnel __rcu **tp;
328 struct ip_tunnel *iter;
329
330 for (tp = ipgre_bucket(ign, t);
331 (iter = rtnl_dereference(*tp)) != NULL;
332 tp = &iter->next) {
333 if (t == iter) {
334 rcu_assign_pointer(*tp, t->next);
335 break;
336 }
337 }
338 }
339
340 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
341 struct ip_tunnel_parm *parms,
342 int type)
343 {
344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
347 int link = parms->link;
348 struct ip_tunnel *t;
349 struct ip_tunnel __rcu **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351
352 for (tp = __ipgre_bucket(ign, parms);
353 (t = rtnl_dereference(*tp)) != NULL;
354 tp = &t->next)
355 if (local == t->parms.iph.saddr &&
356 remote == t->parms.iph.daddr &&
357 key == t->parms.i_key &&
358 link == t->parms.link &&
359 type == t->dev->type)
360 break;
361
362 return t;
363 }
364
365 static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
366 struct ip_tunnel_parm *parms, int create)
367 {
368 struct ip_tunnel *t, *nt;
369 struct net_device *dev;
370 char name[IFNAMSIZ];
371 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
372
373 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
374 if (t || !create)
375 return t;
376
377 if (parms->name[0])
378 strlcpy(name, parms->name, IFNAMSIZ);
379 else
380 sprintf(name, "gre%%d");
381
382 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
383 if (!dev)
384 return NULL;
385
386 dev_net_set(dev, net);
387
388 if (strchr(name, '%')) {
389 if (dev_alloc_name(dev, name) < 0)
390 goto failed_free;
391 }
392
393 nt = netdev_priv(dev);
394 nt->parms = *parms;
395 dev->rtnl_link_ops = &ipgre_link_ops;
396
397 dev->mtu = ipgre_tunnel_bind_dev(dev);
398
399 if (register_netdevice(dev) < 0)
400 goto failed_free;
401
402 dev_hold(dev);
403 ipgre_tunnel_link(ign, nt);
404 return nt;
405
406 failed_free:
407 free_netdev(dev);
408 return NULL;
409 }
410
411 static void ipgre_tunnel_uninit(struct net_device *dev)
412 {
413 struct net *net = dev_net(dev);
414 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
415
416 ipgre_tunnel_unlink(ign, netdev_priv(dev));
417 dev_put(dev);
418 }
419
420
421 static void ipgre_err(struct sk_buff *skb, u32 info)
422 {
423
424 /* All the routers (except for Linux) return only
425 8 bytes of packet payload. It means, that precise relaying of
426 ICMP in the real Internet is absolutely infeasible.
427
428 Moreover, Cisco "wise men" put GRE key to the third word
429 in GRE header. It makes impossible maintaining even soft state for keyed
430 GRE tunnels with enabled checksum. Tell them "thank you".
431
432 Well, I wonder, rfc1812 was written by Cisco employee,
433 what the hell these idiots break standrads established
434 by themself???
435 */
436
437 struct iphdr *iph = (struct iphdr *)skb->data;
438 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
439 int grehlen = (iph->ihl<<2) + 4;
440 const int type = icmp_hdr(skb)->type;
441 const int code = icmp_hdr(skb)->code;
442 struct ip_tunnel *t;
443 __be16 flags;
444
445 flags = p[0];
446 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
447 if (flags&(GRE_VERSION|GRE_ROUTING))
448 return;
449 if (flags&GRE_KEY) {
450 grehlen += 4;
451 if (flags&GRE_CSUM)
452 grehlen += 4;
453 }
454 }
455
456 /* If only 8 bytes returned, keyed message will be dropped here */
457 if (skb_headlen(skb) < grehlen)
458 return;
459
460 switch (type) {
461 default:
462 case ICMP_PARAMETERPROB:
463 return;
464
465 case ICMP_DEST_UNREACH:
466 switch (code) {
467 case ICMP_SR_FAILED:
468 case ICMP_PORT_UNREACH:
469 /* Impossible event. */
470 return;
471 case ICMP_FRAG_NEEDED:
472 /* Soft state for pmtu is maintained by IP core. */
473 return;
474 default:
475 /* All others are translated to HOST_UNREACH.
476 rfc2003 contains "deep thoughts" about NET_UNREACH,
477 I believe they are just ether pollution. --ANK
478 */
479 break;
480 }
481 break;
482 case ICMP_TIME_EXCEEDED:
483 if (code != ICMP_EXC_TTL)
484 return;
485 break;
486 }
487
488 rcu_read_lock();
489 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
490 flags & GRE_KEY ?
491 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
492 p[1]);
493 if (t == NULL || t->parms.iph.daddr == 0 ||
494 ipv4_is_multicast(t->parms.iph.daddr))
495 goto out;
496
497 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
498 goto out;
499
500 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
501 t->err_count++;
502 else
503 t->err_count = 1;
504 t->err_time = jiffies;
505 out:
506 rcu_read_unlock();
507 }
508
509 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
510 {
511 if (INET_ECN_is_ce(iph->tos)) {
512 if (skb->protocol == htons(ETH_P_IP)) {
513 IP_ECN_set_ce(ip_hdr(skb));
514 } else if (skb->protocol == htons(ETH_P_IPV6)) {
515 IP6_ECN_set_ce(ipv6_hdr(skb));
516 }
517 }
518 }
519
520 static inline u8
521 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
522 {
523 u8 inner = 0;
524 if (skb->protocol == htons(ETH_P_IP))
525 inner = old_iph->tos;
526 else if (skb->protocol == htons(ETH_P_IPV6))
527 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
528 return INET_ECN_encapsulate(tos, inner);
529 }
530
531 static int ipgre_rcv(struct sk_buff *skb)
532 {
533 struct iphdr *iph;
534 u8 *h;
535 __be16 flags;
536 __sum16 csum = 0;
537 __be32 key = 0;
538 u32 seqno = 0;
539 struct ip_tunnel *tunnel;
540 int offset = 4;
541 __be16 gre_proto;
542
543 if (!pskb_may_pull(skb, 16))
544 goto drop_nolock;
545
546 iph = ip_hdr(skb);
547 h = skb->data;
548 flags = *(__be16*)h;
549
550 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
551 /* - Version must be 0.
552 - We do not support routing headers.
553 */
554 if (flags&(GRE_VERSION|GRE_ROUTING))
555 goto drop_nolock;
556
557 if (flags&GRE_CSUM) {
558 switch (skb->ip_summed) {
559 case CHECKSUM_COMPLETE:
560 csum = csum_fold(skb->csum);
561 if (!csum)
562 break;
563 /* fall through */
564 case CHECKSUM_NONE:
565 skb->csum = 0;
566 csum = __skb_checksum_complete(skb);
567 skb->ip_summed = CHECKSUM_COMPLETE;
568 }
569 offset += 4;
570 }
571 if (flags&GRE_KEY) {
572 key = *(__be32*)(h + offset);
573 offset += 4;
574 }
575 if (flags&GRE_SEQ) {
576 seqno = ntohl(*(__be32*)(h + offset));
577 offset += 4;
578 }
579 }
580
581 gre_proto = *(__be16 *)(h + 2);
582
583 rcu_read_lock();
584 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
585 iph->saddr, iph->daddr, key,
586 gre_proto))) {
587 struct net_device_stats *stats = &tunnel->dev->stats;
588
589 secpath_reset(skb);
590
591 skb->protocol = gre_proto;
592 /* WCCP version 1 and 2 protocol decoding.
593 * - Change protocol to IP
594 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
595 */
596 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
597 skb->protocol = htons(ETH_P_IP);
598 if ((*(h + offset) & 0xF0) != 0x40)
599 offset += 4;
600 }
601
602 skb->mac_header = skb->network_header;
603 __pskb_pull(skb, offset);
604 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
605 skb->pkt_type = PACKET_HOST;
606 #ifdef CONFIG_NET_IPGRE_BROADCAST
607 if (ipv4_is_multicast(iph->daddr)) {
608 /* Looped back packet, drop it! */
609 if (skb_rtable(skb)->fl.iif == 0)
610 goto drop;
611 stats->multicast++;
612 skb->pkt_type = PACKET_BROADCAST;
613 }
614 #endif
615
616 if (((flags&GRE_CSUM) && csum) ||
617 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
618 stats->rx_crc_errors++;
619 stats->rx_errors++;
620 goto drop;
621 }
622 if (tunnel->parms.i_flags&GRE_SEQ) {
623 if (!(flags&GRE_SEQ) ||
624 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
625 stats->rx_fifo_errors++;
626 stats->rx_errors++;
627 goto drop;
628 }
629 tunnel->i_seqno = seqno + 1;
630 }
631
632 /* Warning: All skb pointers will be invalidated! */
633 if (tunnel->dev->type == ARPHRD_ETHER) {
634 if (!pskb_may_pull(skb, ETH_HLEN)) {
635 stats->rx_length_errors++;
636 stats->rx_errors++;
637 goto drop;
638 }
639
640 iph = ip_hdr(skb);
641 skb->protocol = eth_type_trans(skb, tunnel->dev);
642 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
643 }
644
645 skb_tunnel_rx(skb, tunnel->dev);
646
647 skb_reset_network_header(skb);
648 ipgre_ecn_decapsulate(iph, skb);
649
650 if (netif_rx(skb) == NET_RX_DROP)
651 stats->rx_dropped++;
652
653 rcu_read_unlock();
654 return 0;
655 }
656 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
657
658 drop:
659 rcu_read_unlock();
660 drop_nolock:
661 kfree_skb(skb);
662 return 0;
663 }
664
665 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
666 {
667 struct ip_tunnel *tunnel = netdev_priv(dev);
668 struct net_device_stats *stats = &dev->stats;
669 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
670 struct iphdr *old_iph = ip_hdr(skb);
671 struct iphdr *tiph;
672 u8 tos;
673 __be16 df;
674 struct rtable *rt; /* Route to the other host */
675 struct net_device *tdev; /* Device to other host */
676 struct iphdr *iph; /* Our new IP header */
677 unsigned int max_headroom; /* The extra header space needed */
678 int gre_hlen;
679 __be32 dst;
680 int mtu;
681
682 if (dev->type == ARPHRD_ETHER)
683 IPCB(skb)->flags = 0;
684
685 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
686 gre_hlen = 0;
687 tiph = (struct iphdr *)skb->data;
688 } else {
689 gre_hlen = tunnel->hlen;
690 tiph = &tunnel->parms.iph;
691 }
692
693 if ((dst = tiph->daddr) == 0) {
694 /* NBMA tunnel */
695
696 if (skb_dst(skb) == NULL) {
697 stats->tx_fifo_errors++;
698 goto tx_error;
699 }
700
701 if (skb->protocol == htons(ETH_P_IP)) {
702 rt = skb_rtable(skb);
703 if ((dst = rt->rt_gateway) == 0)
704 goto tx_error_icmp;
705 }
706 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
707 else if (skb->protocol == htons(ETH_P_IPV6)) {
708 struct in6_addr *addr6;
709 int addr_type;
710 struct neighbour *neigh = skb_dst(skb)->neighbour;
711
712 if (neigh == NULL)
713 goto tx_error;
714
715 addr6 = (struct in6_addr *)&neigh->primary_key;
716 addr_type = ipv6_addr_type(addr6);
717
718 if (addr_type == IPV6_ADDR_ANY) {
719 addr6 = &ipv6_hdr(skb)->daddr;
720 addr_type = ipv6_addr_type(addr6);
721 }
722
723 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
724 goto tx_error_icmp;
725
726 dst = addr6->s6_addr32[3];
727 }
728 #endif
729 else
730 goto tx_error;
731 }
732
733 tos = tiph->tos;
734 if (tos == 1) {
735 tos = 0;
736 if (skb->protocol == htons(ETH_P_IP))
737 tos = old_iph->tos;
738 else if (skb->protocol == htons(ETH_P_IPV6))
739 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
740 }
741
742 {
743 struct flowi fl = { .oif = tunnel->parms.link,
744 .nl_u = { .ip4_u =
745 { .daddr = dst,
746 .saddr = tiph->saddr,
747 .tos = RT_TOS(tos) } },
748 .proto = IPPROTO_GRE };
749 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
750 stats->tx_carrier_errors++;
751 goto tx_error;
752 }
753 }
754 tdev = rt->dst.dev;
755
756 if (tdev == dev) {
757 ip_rt_put(rt);
758 stats->collisions++;
759 goto tx_error;
760 }
761
762 df = tiph->frag_off;
763 if (df)
764 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
765 else
766 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
767
768 if (skb_dst(skb))
769 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
770
771 if (skb->protocol == htons(ETH_P_IP)) {
772 df |= (old_iph->frag_off&htons(IP_DF));
773
774 if ((old_iph->frag_off&htons(IP_DF)) &&
775 mtu < ntohs(old_iph->tot_len)) {
776 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
777 ip_rt_put(rt);
778 goto tx_error;
779 }
780 }
781 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
782 else if (skb->protocol == htons(ETH_P_IPV6)) {
783 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
784
785 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
786 if ((tunnel->parms.iph.daddr &&
787 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
788 rt6->rt6i_dst.plen == 128) {
789 rt6->rt6i_flags |= RTF_MODIFIED;
790 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
791 }
792 }
793
794 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
795 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
796 ip_rt_put(rt);
797 goto tx_error;
798 }
799 }
800 #endif
801
802 if (tunnel->err_count > 0) {
803 if (time_before(jiffies,
804 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
805 tunnel->err_count--;
806
807 dst_link_failure(skb);
808 } else
809 tunnel->err_count = 0;
810 }
811
812 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
813
814 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
815 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
816 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
817 if (max_headroom > dev->needed_headroom)
818 dev->needed_headroom = max_headroom;
819 if (!new_skb) {
820 ip_rt_put(rt);
821 txq->tx_dropped++;
822 dev_kfree_skb(skb);
823 return NETDEV_TX_OK;
824 }
825 if (skb->sk)
826 skb_set_owner_w(new_skb, skb->sk);
827 dev_kfree_skb(skb);
828 skb = new_skb;
829 old_iph = ip_hdr(skb);
830 }
831
832 skb_reset_transport_header(skb);
833 skb_push(skb, gre_hlen);
834 skb_reset_network_header(skb);
835 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
836 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
837 IPSKB_REROUTED);
838 skb_dst_drop(skb);
839 skb_dst_set(skb, &rt->dst);
840
841 /*
842 * Push down and install the IPIP header.
843 */
844
845 iph = ip_hdr(skb);
846 iph->version = 4;
847 iph->ihl = sizeof(struct iphdr) >> 2;
848 iph->frag_off = df;
849 iph->protocol = IPPROTO_GRE;
850 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
851 iph->daddr = rt->rt_dst;
852 iph->saddr = rt->rt_src;
853
854 if ((iph->ttl = tiph->ttl) == 0) {
855 if (skb->protocol == htons(ETH_P_IP))
856 iph->ttl = old_iph->ttl;
857 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
858 else if (skb->protocol == htons(ETH_P_IPV6))
859 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
860 #endif
861 else
862 iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
863 }
864
865 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
866 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
867 htons(ETH_P_TEB) : skb->protocol;
868
869 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
870 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
871
872 if (tunnel->parms.o_flags&GRE_SEQ) {
873 ++tunnel->o_seqno;
874 *ptr = htonl(tunnel->o_seqno);
875 ptr--;
876 }
877 if (tunnel->parms.o_flags&GRE_KEY) {
878 *ptr = tunnel->parms.o_key;
879 ptr--;
880 }
881 if (tunnel->parms.o_flags&GRE_CSUM) {
882 *ptr = 0;
883 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
884 }
885 }
886
887 nf_reset(skb);
888
889 IPTUNNEL_XMIT();
890 return NETDEV_TX_OK;
891
892 tx_error_icmp:
893 dst_link_failure(skb);
894
895 tx_error:
896 stats->tx_errors++;
897 dev_kfree_skb(skb);
898 return NETDEV_TX_OK;
899 }
900
901 static int ipgre_tunnel_bind_dev(struct net_device *dev)
902 {
903 struct net_device *tdev = NULL;
904 struct ip_tunnel *tunnel;
905 struct iphdr *iph;
906 int hlen = LL_MAX_HEADER;
907 int mtu = ETH_DATA_LEN;
908 int addend = sizeof(struct iphdr) + 4;
909
910 tunnel = netdev_priv(dev);
911 iph = &tunnel->parms.iph;
912
913 /* Guess output device to choose reasonable mtu and needed_headroom */
914
915 if (iph->daddr) {
916 struct flowi fl = { .oif = tunnel->parms.link,
917 .nl_u = { .ip4_u =
918 { .daddr = iph->daddr,
919 .saddr = iph->saddr,
920 .tos = RT_TOS(iph->tos) } },
921 .proto = IPPROTO_GRE };
922 struct rtable *rt;
923 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
924 tdev = rt->dst.dev;
925 ip_rt_put(rt);
926 }
927
928 if (dev->type != ARPHRD_ETHER)
929 dev->flags |= IFF_POINTOPOINT;
930 }
931
932 if (!tdev && tunnel->parms.link)
933 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
934
935 if (tdev) {
936 hlen = tdev->hard_header_len + tdev->needed_headroom;
937 mtu = tdev->mtu;
938 }
939 dev->iflink = tunnel->parms.link;
940
941 /* Precalculate GRE options length */
942 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
943 if (tunnel->parms.o_flags&GRE_CSUM)
944 addend += 4;
945 if (tunnel->parms.o_flags&GRE_KEY)
946 addend += 4;
947 if (tunnel->parms.o_flags&GRE_SEQ)
948 addend += 4;
949 }
950 dev->needed_headroom = addend + hlen;
951 mtu -= dev->hard_header_len + addend;
952
953 if (mtu < 68)
954 mtu = 68;
955
956 tunnel->hlen = addend;
957
958 return mtu;
959 }
960
961 static int
962 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
963 {
964 int err = 0;
965 struct ip_tunnel_parm p;
966 struct ip_tunnel *t;
967 struct net *net = dev_net(dev);
968 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
969
970 switch (cmd) {
971 case SIOCGETTUNNEL:
972 t = NULL;
973 if (dev == ign->fb_tunnel_dev) {
974 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
975 err = -EFAULT;
976 break;
977 }
978 t = ipgre_tunnel_locate(net, &p, 0);
979 }
980 if (t == NULL)
981 t = netdev_priv(dev);
982 memcpy(&p, &t->parms, sizeof(p));
983 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
984 err = -EFAULT;
985 break;
986
987 case SIOCADDTUNNEL:
988 case SIOCCHGTUNNEL:
989 err = -EPERM;
990 if (!capable(CAP_NET_ADMIN))
991 goto done;
992
993 err = -EFAULT;
994 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
995 goto done;
996
997 err = -EINVAL;
998 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
999 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1000 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1001 goto done;
1002 if (p.iph.ttl)
1003 p.iph.frag_off |= htons(IP_DF);
1004
1005 if (!(p.i_flags&GRE_KEY))
1006 p.i_key = 0;
1007 if (!(p.o_flags&GRE_KEY))
1008 p.o_key = 0;
1009
1010 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1011
1012 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1013 if (t != NULL) {
1014 if (t->dev != dev) {
1015 err = -EEXIST;
1016 break;
1017 }
1018 } else {
1019 unsigned int nflags = 0;
1020
1021 t = netdev_priv(dev);
1022
1023 if (ipv4_is_multicast(p.iph.daddr))
1024 nflags = IFF_BROADCAST;
1025 else if (p.iph.daddr)
1026 nflags = IFF_POINTOPOINT;
1027
1028 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1029 err = -EINVAL;
1030 break;
1031 }
1032 ipgre_tunnel_unlink(ign, t);
1033 t->parms.iph.saddr = p.iph.saddr;
1034 t->parms.iph.daddr = p.iph.daddr;
1035 t->parms.i_key = p.i_key;
1036 t->parms.o_key = p.o_key;
1037 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1038 memcpy(dev->broadcast, &p.iph.daddr, 4);
1039 ipgre_tunnel_link(ign, t);
1040 netdev_state_change(dev);
1041 }
1042 }
1043
1044 if (t) {
1045 err = 0;
1046 if (cmd == SIOCCHGTUNNEL) {
1047 t->parms.iph.ttl = p.iph.ttl;
1048 t->parms.iph.tos = p.iph.tos;
1049 t->parms.iph.frag_off = p.iph.frag_off;
1050 if (t->parms.link != p.link) {
1051 t->parms.link = p.link;
1052 dev->mtu = ipgre_tunnel_bind_dev(dev);
1053 netdev_state_change(dev);
1054 }
1055 }
1056 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1057 err = -EFAULT;
1058 } else
1059 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1060 break;
1061
1062 case SIOCDELTUNNEL:
1063 err = -EPERM;
1064 if (!capable(CAP_NET_ADMIN))
1065 goto done;
1066
1067 if (dev == ign->fb_tunnel_dev) {
1068 err = -EFAULT;
1069 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1070 goto done;
1071 err = -ENOENT;
1072 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1073 goto done;
1074 err = -EPERM;
1075 if (t == netdev_priv(ign->fb_tunnel_dev))
1076 goto done;
1077 dev = t->dev;
1078 }
1079 unregister_netdevice(dev);
1080 err = 0;
1081 break;
1082
1083 default:
1084 err = -EINVAL;
1085 }
1086
1087 done:
1088 return err;
1089 }
1090
1091 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1092 {
1093 struct ip_tunnel *tunnel = netdev_priv(dev);
1094 if (new_mtu < 68 ||
1095 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1096 return -EINVAL;
1097 dev->mtu = new_mtu;
1098 return 0;
1099 }
1100
1101 /* Nice toy. Unfortunately, useless in real life :-)
1102 It allows to construct virtual multiprotocol broadcast "LAN"
1103 over the Internet, provided multicast routing is tuned.
1104
1105
1106 I have no idea was this bicycle invented before me,
1107 so that I had to set ARPHRD_IPGRE to a random value.
1108 I have an impression, that Cisco could make something similar,
1109 but this feature is apparently missing in IOS<=11.2(8).
1110
1111 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1112 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1113
1114 ping -t 255 224.66.66.66
1115
1116 If nobody answers, mbone does not work.
1117
1118 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1119 ip addr add 10.66.66.<somewhat>/24 dev Universe
1120 ifconfig Universe up
1121 ifconfig Universe add fe80::<Your_real_addr>/10
1122 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1123 ftp 10.66.66.66
1124 ...
1125 ftp fec0:6666:6666::193.233.7.65
1126 ...
1127
1128 */
1129
1130 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1131 unsigned short type,
1132 const void *daddr, const void *saddr, unsigned int len)
1133 {
1134 struct ip_tunnel *t = netdev_priv(dev);
1135 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1136 __be16 *p = (__be16*)(iph+1);
1137
1138 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1139 p[0] = t->parms.o_flags;
1140 p[1] = htons(type);
1141
1142 /*
1143 * Set the source hardware address.
1144 */
1145
1146 if (saddr)
1147 memcpy(&iph->saddr, saddr, 4);
1148 if (daddr)
1149 memcpy(&iph->daddr, daddr, 4);
1150 if (iph->daddr)
1151 return t->hlen;
1152
1153 return -t->hlen;
1154 }
1155
1156 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1157 {
1158 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1159 memcpy(haddr, &iph->saddr, 4);
1160 return 4;
1161 }
1162
1163 static const struct header_ops ipgre_header_ops = {
1164 .create = ipgre_header,
1165 .parse = ipgre_header_parse,
1166 };
1167
1168 #ifdef CONFIG_NET_IPGRE_BROADCAST
1169 static int ipgre_open(struct net_device *dev)
1170 {
1171 struct ip_tunnel *t = netdev_priv(dev);
1172
1173 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1174 struct flowi fl = { .oif = t->parms.link,
1175 .nl_u = { .ip4_u =
1176 { .daddr = t->parms.iph.daddr,
1177 .saddr = t->parms.iph.saddr,
1178 .tos = RT_TOS(t->parms.iph.tos) } },
1179 .proto = IPPROTO_GRE };
1180 struct rtable *rt;
1181 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1182 return -EADDRNOTAVAIL;
1183 dev = rt->dst.dev;
1184 ip_rt_put(rt);
1185 if (__in_dev_get_rtnl(dev) == NULL)
1186 return -EADDRNOTAVAIL;
1187 t->mlink = dev->ifindex;
1188 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1189 }
1190 return 0;
1191 }
1192
1193 static int ipgre_close(struct net_device *dev)
1194 {
1195 struct ip_tunnel *t = netdev_priv(dev);
1196
1197 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1198 struct in_device *in_dev;
1199 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1200 if (in_dev) {
1201 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1202 in_dev_put(in_dev);
1203 }
1204 }
1205 return 0;
1206 }
1207
1208 #endif
1209
1210 static const struct net_device_ops ipgre_netdev_ops = {
1211 .ndo_init = ipgre_tunnel_init,
1212 .ndo_uninit = ipgre_tunnel_uninit,
1213 #ifdef CONFIG_NET_IPGRE_BROADCAST
1214 .ndo_open = ipgre_open,
1215 .ndo_stop = ipgre_close,
1216 #endif
1217 .ndo_start_xmit = ipgre_tunnel_xmit,
1218 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1219 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1220 };
1221
1222 static void ipgre_tunnel_setup(struct net_device *dev)
1223 {
1224 dev->netdev_ops = &ipgre_netdev_ops;
1225 dev->destructor = free_netdev;
1226
1227 dev->type = ARPHRD_IPGRE;
1228 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1229 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1230 dev->flags = IFF_NOARP;
1231 dev->iflink = 0;
1232 dev->addr_len = 4;
1233 dev->features |= NETIF_F_NETNS_LOCAL;
1234 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1235 }
1236
1237 static int ipgre_tunnel_init(struct net_device *dev)
1238 {
1239 struct ip_tunnel *tunnel;
1240 struct iphdr *iph;
1241
1242 tunnel = netdev_priv(dev);
1243 iph = &tunnel->parms.iph;
1244
1245 tunnel->dev = dev;
1246 strcpy(tunnel->parms.name, dev->name);
1247
1248 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1249 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1250
1251 if (iph->daddr) {
1252 #ifdef CONFIG_NET_IPGRE_BROADCAST
1253 if (ipv4_is_multicast(iph->daddr)) {
1254 if (!iph->saddr)
1255 return -EINVAL;
1256 dev->flags = IFF_BROADCAST;
1257 dev->header_ops = &ipgre_header_ops;
1258 }
1259 #endif
1260 } else
1261 dev->header_ops = &ipgre_header_ops;
1262
1263 return 0;
1264 }
1265
1266 static void ipgre_fb_tunnel_init(struct net_device *dev)
1267 {
1268 struct ip_tunnel *tunnel = netdev_priv(dev);
1269 struct iphdr *iph = &tunnel->parms.iph;
1270 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1271
1272 tunnel->dev = dev;
1273 strcpy(tunnel->parms.name, dev->name);
1274
1275 iph->version = 4;
1276 iph->protocol = IPPROTO_GRE;
1277 iph->ihl = 5;
1278 tunnel->hlen = sizeof(struct iphdr) + 4;
1279
1280 dev_hold(dev);
1281 rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
1282 }
1283
1284
1285 static const struct gre_protocol ipgre_protocol = {
1286 .handler = ipgre_rcv,
1287 .err_handler = ipgre_err,
1288 };
1289
1290 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1291 {
1292 int prio;
1293
1294 for (prio = 0; prio < 4; prio++) {
1295 int h;
1296 for (h = 0; h < HASH_SIZE; h++) {
1297 struct ip_tunnel *t;
1298
1299 t = rtnl_dereference(ign->tunnels[prio][h]);
1300
1301 while (t != NULL) {
1302 unregister_netdevice_queue(t->dev, head);
1303 t = rtnl_dereference(t->next);
1304 }
1305 }
1306 }
1307 }
1308
1309 static int __net_init ipgre_init_net(struct net *net)
1310 {
1311 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1312 int err;
1313
1314 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1315 ipgre_tunnel_setup);
1316 if (!ign->fb_tunnel_dev) {
1317 err = -ENOMEM;
1318 goto err_alloc_dev;
1319 }
1320 dev_net_set(ign->fb_tunnel_dev, net);
1321
1322 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1323 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1324
1325 if ((err = register_netdev(ign->fb_tunnel_dev)))
1326 goto err_reg_dev;
1327
1328 return 0;
1329
1330 err_reg_dev:
1331 free_netdev(ign->fb_tunnel_dev);
1332 err_alloc_dev:
1333 return err;
1334 }
1335
1336 static void __net_exit ipgre_exit_net(struct net *net)
1337 {
1338 struct ipgre_net *ign;
1339 LIST_HEAD(list);
1340
1341 ign = net_generic(net, ipgre_net_id);
1342 rtnl_lock();
1343 ipgre_destroy_tunnels(ign, &list);
1344 unregister_netdevice_many(&list);
1345 rtnl_unlock();
1346 }
1347
1348 static struct pernet_operations ipgre_net_ops = {
1349 .init = ipgre_init_net,
1350 .exit = ipgre_exit_net,
1351 .id = &ipgre_net_id,
1352 .size = sizeof(struct ipgre_net),
1353 };
1354
1355 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1356 {
1357 __be16 flags;
1358
1359 if (!data)
1360 return 0;
1361
1362 flags = 0;
1363 if (data[IFLA_GRE_IFLAGS])
1364 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1365 if (data[IFLA_GRE_OFLAGS])
1366 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1367 if (flags & (GRE_VERSION|GRE_ROUTING))
1368 return -EINVAL;
1369
1370 return 0;
1371 }
1372
1373 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1374 {
1375 __be32 daddr;
1376
1377 if (tb[IFLA_ADDRESS]) {
1378 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1379 return -EINVAL;
1380 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1381 return -EADDRNOTAVAIL;
1382 }
1383
1384 if (!data)
1385 goto out;
1386
1387 if (data[IFLA_GRE_REMOTE]) {
1388 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1389 if (!daddr)
1390 return -EINVAL;
1391 }
1392
1393 out:
1394 return ipgre_tunnel_validate(tb, data);
1395 }
1396
1397 static void ipgre_netlink_parms(struct nlattr *data[],
1398 struct ip_tunnel_parm *parms)
1399 {
1400 memset(parms, 0, sizeof(*parms));
1401
1402 parms->iph.protocol = IPPROTO_GRE;
1403
1404 if (!data)
1405 return;
1406
1407 if (data[IFLA_GRE_LINK])
1408 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1409
1410 if (data[IFLA_GRE_IFLAGS])
1411 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1412
1413 if (data[IFLA_GRE_OFLAGS])
1414 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1415
1416 if (data[IFLA_GRE_IKEY])
1417 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1418
1419 if (data[IFLA_GRE_OKEY])
1420 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1421
1422 if (data[IFLA_GRE_LOCAL])
1423 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1424
1425 if (data[IFLA_GRE_REMOTE])
1426 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1427
1428 if (data[IFLA_GRE_TTL])
1429 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1430
1431 if (data[IFLA_GRE_TOS])
1432 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1433
1434 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1435 parms->iph.frag_off = htons(IP_DF);
1436 }
1437
1438 static int ipgre_tap_init(struct net_device *dev)
1439 {
1440 struct ip_tunnel *tunnel;
1441
1442 tunnel = netdev_priv(dev);
1443
1444 tunnel->dev = dev;
1445 strcpy(tunnel->parms.name, dev->name);
1446
1447 ipgre_tunnel_bind_dev(dev);
1448
1449 return 0;
1450 }
1451
1452 static const struct net_device_ops ipgre_tap_netdev_ops = {
1453 .ndo_init = ipgre_tap_init,
1454 .ndo_uninit = ipgre_tunnel_uninit,
1455 .ndo_start_xmit = ipgre_tunnel_xmit,
1456 .ndo_set_mac_address = eth_mac_addr,
1457 .ndo_validate_addr = eth_validate_addr,
1458 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1459 };
1460
1461 static void ipgre_tap_setup(struct net_device *dev)
1462 {
1463
1464 ether_setup(dev);
1465
1466 dev->netdev_ops = &ipgre_tap_netdev_ops;
1467 dev->destructor = free_netdev;
1468
1469 dev->iflink = 0;
1470 dev->features |= NETIF_F_NETNS_LOCAL;
1471 }
1472
1473 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1474 struct nlattr *data[])
1475 {
1476 struct ip_tunnel *nt;
1477 struct net *net = dev_net(dev);
1478 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1479 int mtu;
1480 int err;
1481
1482 nt = netdev_priv(dev);
1483 ipgre_netlink_parms(data, &nt->parms);
1484
1485 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1486 return -EEXIST;
1487
1488 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1489 random_ether_addr(dev->dev_addr);
1490
1491 mtu = ipgre_tunnel_bind_dev(dev);
1492 if (!tb[IFLA_MTU])
1493 dev->mtu = mtu;
1494
1495 err = register_netdevice(dev);
1496 if (err)
1497 goto out;
1498
1499 dev_hold(dev);
1500 ipgre_tunnel_link(ign, nt);
1501
1502 out:
1503 return err;
1504 }
1505
1506 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1507 struct nlattr *data[])
1508 {
1509 struct ip_tunnel *t, *nt;
1510 struct net *net = dev_net(dev);
1511 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1512 struct ip_tunnel_parm p;
1513 int mtu;
1514
1515 if (dev == ign->fb_tunnel_dev)
1516 return -EINVAL;
1517
1518 nt = netdev_priv(dev);
1519 ipgre_netlink_parms(data, &p);
1520
1521 t = ipgre_tunnel_locate(net, &p, 0);
1522
1523 if (t) {
1524 if (t->dev != dev)
1525 return -EEXIST;
1526 } else {
1527 t = nt;
1528
1529 if (dev->type != ARPHRD_ETHER) {
1530 unsigned int nflags = 0;
1531
1532 if (ipv4_is_multicast(p.iph.daddr))
1533 nflags = IFF_BROADCAST;
1534 else if (p.iph.daddr)
1535 nflags = IFF_POINTOPOINT;
1536
1537 if ((dev->flags ^ nflags) &
1538 (IFF_POINTOPOINT | IFF_BROADCAST))
1539 return -EINVAL;
1540 }
1541
1542 ipgre_tunnel_unlink(ign, t);
1543 t->parms.iph.saddr = p.iph.saddr;
1544 t->parms.iph.daddr = p.iph.daddr;
1545 t->parms.i_key = p.i_key;
1546 if (dev->type != ARPHRD_ETHER) {
1547 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1548 memcpy(dev->broadcast, &p.iph.daddr, 4);
1549 }
1550 ipgre_tunnel_link(ign, t);
1551 netdev_state_change(dev);
1552 }
1553
1554 t->parms.o_key = p.o_key;
1555 t->parms.iph.ttl = p.iph.ttl;
1556 t->parms.iph.tos = p.iph.tos;
1557 t->parms.iph.frag_off = p.iph.frag_off;
1558
1559 if (t->parms.link != p.link) {
1560 t->parms.link = p.link;
1561 mtu = ipgre_tunnel_bind_dev(dev);
1562 if (!tb[IFLA_MTU])
1563 dev->mtu = mtu;
1564 netdev_state_change(dev);
1565 }
1566
1567 return 0;
1568 }
1569
1570 static size_t ipgre_get_size(const struct net_device *dev)
1571 {
1572 return
1573 /* IFLA_GRE_LINK */
1574 nla_total_size(4) +
1575 /* IFLA_GRE_IFLAGS */
1576 nla_total_size(2) +
1577 /* IFLA_GRE_OFLAGS */
1578 nla_total_size(2) +
1579 /* IFLA_GRE_IKEY */
1580 nla_total_size(4) +
1581 /* IFLA_GRE_OKEY */
1582 nla_total_size(4) +
1583 /* IFLA_GRE_LOCAL */
1584 nla_total_size(4) +
1585 /* IFLA_GRE_REMOTE */
1586 nla_total_size(4) +
1587 /* IFLA_GRE_TTL */
1588 nla_total_size(1) +
1589 /* IFLA_GRE_TOS */
1590 nla_total_size(1) +
1591 /* IFLA_GRE_PMTUDISC */
1592 nla_total_size(1) +
1593 0;
1594 }
1595
1596 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1597 {
1598 struct ip_tunnel *t = netdev_priv(dev);
1599 struct ip_tunnel_parm *p = &t->parms;
1600
1601 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1602 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1603 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1604 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1605 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1606 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1607 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1608 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1609 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1610 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1611
1612 return 0;
1613
1614 nla_put_failure:
1615 return -EMSGSIZE;
1616 }
1617
1618 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1619 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1620 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1621 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1622 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1623 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1624 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1625 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1626 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1627 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1628 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1629 };
1630
1631 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1632 .kind = "gre",
1633 .maxtype = IFLA_GRE_MAX,
1634 .policy = ipgre_policy,
1635 .priv_size = sizeof(struct ip_tunnel),
1636 .setup = ipgre_tunnel_setup,
1637 .validate = ipgre_tunnel_validate,
1638 .newlink = ipgre_newlink,
1639 .changelink = ipgre_changelink,
1640 .get_size = ipgre_get_size,
1641 .fill_info = ipgre_fill_info,
1642 };
1643
1644 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1645 .kind = "gretap",
1646 .maxtype = IFLA_GRE_MAX,
1647 .policy = ipgre_policy,
1648 .priv_size = sizeof(struct ip_tunnel),
1649 .setup = ipgre_tap_setup,
1650 .validate = ipgre_tap_validate,
1651 .newlink = ipgre_newlink,
1652 .changelink = ipgre_changelink,
1653 .get_size = ipgre_get_size,
1654 .fill_info = ipgre_fill_info,
1655 };
1656
1657 /*
1658 * And now the modules code and kernel interface.
1659 */
1660
1661 static int __init ipgre_init(void)
1662 {
1663 int err;
1664
1665 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1666
1667 err = register_pernet_device(&ipgre_net_ops);
1668 if (err < 0)
1669 return err;
1670
1671 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1672 if (err < 0) {
1673 printk(KERN_INFO "ipgre init: can't add protocol\n");
1674 goto add_proto_failed;
1675 }
1676
1677 err = rtnl_link_register(&ipgre_link_ops);
1678 if (err < 0)
1679 goto rtnl_link_failed;
1680
1681 err = rtnl_link_register(&ipgre_tap_ops);
1682 if (err < 0)
1683 goto tap_ops_failed;
1684
1685 out:
1686 return err;
1687
1688 tap_ops_failed:
1689 rtnl_link_unregister(&ipgre_link_ops);
1690 rtnl_link_failed:
1691 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1692 add_proto_failed:
1693 unregister_pernet_device(&ipgre_net_ops);
1694 goto out;
1695 }
1696
1697 static void __exit ipgre_fini(void)
1698 {
1699 rtnl_link_unregister(&ipgre_tap_ops);
1700 rtnl_link_unregister(&ipgre_link_ops);
1701 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1702 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1703 unregister_pernet_device(&ipgre_net_ops);
1704 }
1705
1706 module_init(ipgre_init);
1707 module_exit(ipgre_fini);
1708 MODULE_LICENSE("GPL");
1709 MODULE_ALIAS_RTNL_LINK("gre");
1710 MODULE_ALIAS_RTNL_LINK("gretap");