xref: /openbsd-src/sys/net/if_gre.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: if_gre.c,v 1.160 2020/08/28 12:01:48 mvs Exp $ */
2 /*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3 
4 /*
5  * Copyright (c) 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
37  * See gre(4) for more details.
38  * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
39  */
40 
41 #include "bpfilter.h"
42 #include "pf.h"
43 
44 #include <sys/param.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/kernel.h>
49 #include <sys/systm.h>
50 #include <sys/errno.h>
51 #include <sys/timeout.h>
52 #include <sys/queue.h>
53 #include <sys/tree.h>
54 #include <sys/pool.h>
55 #include <sys/rwlock.h>
56 
57 #include <crypto/siphash.h>
58 
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_types.h>
62 #include <net/if_media.h>
63 #include <net/route.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/ip_ecn.h>
71 
72 #ifdef INET6
73 #include <netinet/ip6.h>
74 #include <netinet6/ip6_var.h>
75 #include <netinet6/in6_var.h>
76 #endif
77 
78 #ifdef PIPEX
79 #include <net/pipex.h>
80 #endif
81 
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif /* MPLS */
85 
86 #if NBPFILTER > 0
87 #include <net/bpf.h>
88 #endif
89 
90 #if NPF > 0
91 #include <net/pfvar.h>
92 #endif
93 
94 #include <net/if_gre.h>
95 
96 #include <netinet/ip_gre.h>
97 #include <sys/sysctl.h>
98 
99 /* for nvgre bridge shizz */
100 #include <sys/socket.h>
101 #include <net/if_bridge.h>
102 
103 /*
104  * packet formats
105  */
106 struct gre_header {
107 	uint16_t		gre_flags;
108 #define GRE_CP				0x8000  /* Checksum Present */
109 #define GRE_KP				0x2000  /* Key Present */
110 #define GRE_SP				0x1000  /* Sequence Present */
111 
112 #define GRE_VERS_MASK			0x0007
113 #define GRE_VERS_0			0x0000
114 #define GRE_VERS_1			0x0001
115 
116 	uint16_t		gre_proto;
117 } __packed __aligned(4);
118 
119 struct gre_h_cksum {
120 	uint16_t		gre_cksum;
121 	uint16_t		gre_reserved1;
122 } __packed __aligned(4);
123 
124 struct gre_h_key {
125 	uint32_t		gre_key;
126 } __packed __aligned(4);
127 
128 #define GRE_EOIP		0x6400
129 
130 struct gre_h_key_eoip {
131 	uint16_t		eoip_len;	/* network order */
132 	uint16_t		eoip_tunnel_id;	/* little endian */
133 } __packed __aligned(4);
134 
135 #define NVGRE_VSID_RES_MIN	0x000000 /* reserved for future use */
136 #define NVGRE_VSID_RES_MAX	0x000fff
137 #define NVGRE_VSID_NVE2NVE	0xffffff /* vendor specific NVE-to-NVE comms */
138 
139 struct gre_h_seq {
140 	uint32_t		gre_seq;
141 } __packed __aligned(4);
142 
143 struct gre_h_wccp {
144 	uint8_t			wccp_flags;
145 	uint8_t			service_id;
146 	uint8_t			alt_bucket;
147 	uint8_t			pri_bucket;
148 } __packed __aligned(4);
149 
150 #define GRE_WCCP 0x883e
151 
152 #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
153 
154 /*
155  * GRE tunnel metadata
156  */
157 
158 #define GRE_KA_NONE		0
159 #define GRE_KA_DOWN		1
160 #define GRE_KA_HOLD		2
161 #define GRE_KA_UP		3
162 
163 union gre_addr {
164 	struct in_addr		in4;
165 	struct in6_addr		in6;
166 };
167 
168 static inline int
169 		gre_ip_cmp(int, const union gre_addr *,
170 		    const union gre_addr *);
171 
172 #define GRE_KEY_MIN		0x00000000U
173 #define GRE_KEY_MAX		0xffffffffU
174 #define GRE_KEY_SHIFT		0
175 
176 #define GRE_KEY_ENTROPY_MIN	0x00000000U
177 #define GRE_KEY_ENTROPY_MAX	0x00ffffffU
178 #define GRE_KEY_ENTROPY_SHIFT	8
179 
180 struct gre_tunnel {
181 	uint32_t		t_key_mask;
182 #define GRE_KEY_NONE			htonl(0x00000000U)
183 #define GRE_KEY_ENTROPY			htonl(0xffffff00U)
184 #define GRE_KEY_MASK			htonl(0xffffffffU)
185 	uint32_t		t_key;
186 
187 	u_int			t_rtableid;
188 	union gre_addr		t_src;
189 #define t_src4	t_src.in4
190 #define t_src6	t_src.in6
191 	union gre_addr		t_dst;
192 #define t_dst4	t_dst.in4
193 #define t_dst6	t_dst.in6
194 	int			t_ttl;
195 	int			t_txhprio;
196 	int			t_rxhprio;
197 	int			t_ecn;
198 	uint16_t		t_df;
199 	sa_family_t		t_af;
200 };
201 
202 static int
203 		gre_cmp_src(const struct gre_tunnel *,
204 		    const struct gre_tunnel *);
205 static int
206 		gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
207 
208 static int	gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int);
209 static int	gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
210 static int	gre_del_tunnel(struct gre_tunnel *);
211 
212 static int	gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
213 static int	gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
214 static int	gre_del_vnetid(struct gre_tunnel *);
215 
216 static int	gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *);
217 static int	gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *);
218 
219 static struct mbuf *
220 		gre_encap_dst(const struct gre_tunnel *, const union gre_addr *,
221 		    struct mbuf *, uint16_t, uint8_t, uint8_t);
222 #define gre_encap(_t, _m, _p, _ttl, _tos) \
223 		gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos))
224 
225 static struct mbuf *
226 		gre_encap_dst_ip(const struct gre_tunnel *,
227 		    const union gre_addr *, struct mbuf *, uint8_t, uint8_t);
228 #define gre_encap_ip(_t, _m, _ttl, _tos) \
229 		gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos))
230 
231 static int
232 		gre_ip_output(const struct gre_tunnel *, struct mbuf *);
233 
234 static int	gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *,
235 		    u_long, void *);
236 
237 static uint8_t	gre_l2_tos(const struct gre_tunnel *, const struct mbuf *);
238 static uint8_t	gre_l3_tos(const struct gre_tunnel *,
239 		    const struct mbuf *, uint8_t);
240 
241 /*
242  * layer 3 GRE tunnels
243  */
244 
245 struct gre_softc {
246 	struct gre_tunnel	sc_tunnel; /* must be first */
247 	TAILQ_ENTRY(gre_softc)	sc_entry;
248 
249 	struct ifnet		sc_if;
250 
251 	struct timeout		sc_ka_send;
252 	struct timeout		sc_ka_hold;
253 
254 	unsigned int		sc_ka_state;
255 	unsigned int		sc_ka_timeo;
256 	unsigned int		sc_ka_count;
257 
258 	unsigned int		sc_ka_holdmax;
259 	unsigned int		sc_ka_holdcnt;
260 
261 	SIPHASH_KEY		sc_ka_key;
262 	uint32_t		sc_ka_bias;
263 	int			sc_ka_recvtm;
264 };
265 
266 TAILQ_HEAD(gre_list, gre_softc);
267 
268 struct gre_keepalive {
269 	uint32_t		gk_uptime;
270 	uint32_t		gk_random;
271 	uint8_t			gk_digest[SIPHASH_DIGEST_LENGTH];
272 } __packed __aligned(4);
273 
274 static int	gre_clone_create(struct if_clone *, int);
275 static int	gre_clone_destroy(struct ifnet *);
276 
277 struct if_clone gre_cloner =
278     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
279 
280 /* protected by NET_LOCK */
281 struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list);
282 
283 static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
284 		    struct rtentry *);
285 static void	gre_start(struct ifnet *);
286 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
287 
288 static int	gre_up(struct gre_softc *);
289 static int	gre_down(struct gre_softc *);
290 static void	gre_link_state(struct ifnet *, unsigned int);
291 
292 static int	gre_input_key(struct mbuf **, int *, int, int, uint8_t,
293 		    struct gre_tunnel *);
294 
295 static struct mbuf *
296 		gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *,
297 		    uint8_t *, uint8_t);
298 #ifdef INET6
299 static struct mbuf *
300 		gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *,
301 		    uint8_t *, uint8_t);
302 #endif
303 #ifdef MPLS
304 static struct mbuf *
305 		gre_mpls_patch(const struct gre_tunnel *, struct mbuf *,
306 		    uint8_t *, uint8_t);
307 #endif
308 static void	gre_keepalive_send(void *);
309 static void	gre_keepalive_recv(struct ifnet *ifp, struct mbuf *);
310 static void	gre_keepalive_hold(void *);
311 
312 static struct mbuf *
313 		gre_l3_encap_dst(const struct gre_tunnel *, const void *,
314 		    struct mbuf *m, sa_family_t);
315 
316 #define gre_l3_encap(_t, _m, _af) \
317 		gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af))
318 
319 struct mgre_softc {
320 	struct gre_tunnel	sc_tunnel; /* must be first */
321 	RBT_ENTRY(mgre_softc)	sc_entry;
322 
323 	struct ifnet		sc_if;
324 };
325 
326 RBT_HEAD(mgre_tree, mgre_softc);
327 
328 static inline int
329 		mgre_cmp(const struct mgre_softc *, const struct mgre_softc *);
330 
331 RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
332 
333 static int	mgre_clone_create(struct if_clone *, int);
334 static int	mgre_clone_destroy(struct ifnet *);
335 
336 struct if_clone mgre_cloner =
337     IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy);
338 
339 static void	mgre_rtrequest(struct ifnet *, int, struct rtentry *);
340 static int	mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
341 		    struct rtentry *);
342 static void	mgre_start(struct ifnet *);
343 static int	mgre_ioctl(struct ifnet *, u_long, caddr_t);
344 
345 static int	mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *);
346 static int	mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *);
347 static int	mgre_up(struct mgre_softc *);
348 static int	mgre_down(struct mgre_softc *);
349 
350 /* protected by NET_LOCK */
351 struct mgre_tree mgre_tree = RBT_INITIALIZER();
352 
353 /*
354  * Ethernet GRE tunnels
355  */
356 
357 static struct mbuf *
358 		gre_ether_align(struct mbuf *, int);
359 
360 struct egre_softc {
361 	struct gre_tunnel	sc_tunnel; /* must be first */
362 	RBT_ENTRY(egre_softc)	sc_entry;
363 
364 	struct arpcom		sc_ac;
365 	struct ifmedia		sc_media;
366 };
367 
368 RBT_HEAD(egre_tree, egre_softc);
369 
370 static inline int
371 		egre_cmp(const struct egre_softc *, const struct egre_softc *);
372 
373 RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp);
374 
375 static int	egre_clone_create(struct if_clone *, int);
376 static int	egre_clone_destroy(struct ifnet *);
377 
378 static void	egre_start(struct ifnet *);
379 static int	egre_ioctl(struct ifnet *, u_long, caddr_t);
380 static int	egre_media_change(struct ifnet *);
381 static void	egre_media_status(struct ifnet *, struct ifmediareq *);
382 
383 static int	egre_up(struct egre_softc *);
384 static int	egre_down(struct egre_softc *);
385 
386 static int	egre_input(const struct gre_tunnel *, struct mbuf *, int,
387 		    uint8_t);
388 struct if_clone egre_cloner =
389     IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy);
390 
391 /* protected by NET_LOCK */
392 struct egre_tree egre_tree = RBT_INITIALIZER();
393 
394 /*
395  * Network Virtualisation Using Generic Routing Encapsulation (NVGRE)
396  */
397 
398 #define NVGRE_AGE_TMO		100	/* seconds */
399 
400 struct nvgre_entry {
401 	RB_ENTRY(nvgre_entry)	 nv_entry;
402 	struct ether_addr	 nv_dst;
403 	uint8_t			 nv_type;
404 #define NVGRE_ENTRY_DYNAMIC		0
405 #define NVGRE_ENTRY_STATIC		1
406 	union gre_addr		 nv_gateway;
407 	struct refcnt		 nv_refs;
408 	int			 nv_age;
409 };
410 
411 RBT_HEAD(nvgre_map, nvgre_entry);
412 
413 static inline int
414 		nvgre_entry_cmp(const struct nvgre_entry *,
415 		    const struct nvgre_entry *);
416 
417 RBT_PROTOTYPE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp);
418 
419 struct nvgre_softc {
420 	struct gre_tunnel	 sc_tunnel; /* must be first */
421 	unsigned int		 sc_ifp0;
422 	RBT_ENTRY(nvgre_softc)	 sc_uentry;
423 	RBT_ENTRY(nvgre_softc)	 sc_mentry;
424 
425 	struct arpcom		 sc_ac;
426 	struct ifmedia		 sc_media;
427 
428 	struct mbuf_queue	 sc_send_list;
429 	struct task		 sc_send_task;
430 
431 	void			*sc_inm;
432 	struct task		 sc_ltask;
433 	struct task		 sc_dtask;
434 
435 	struct rwlock		 sc_ether_lock;
436 	struct nvgre_map	 sc_ether_map;
437 	unsigned int		 sc_ether_num;
438 	unsigned int		 sc_ether_max;
439 	int			 sc_ether_tmo;
440 	struct timeout		 sc_ether_age;
441 };
442 
443 RBT_HEAD(nvgre_ucast_tree, nvgre_softc);
444 RBT_HEAD(nvgre_mcast_tree, nvgre_softc);
445 
446 static inline int
447 		nvgre_cmp_ucast(const struct nvgre_softc *,
448 		    const struct nvgre_softc *);
449 static int
450 		nvgre_cmp_mcast(const struct gre_tunnel *,
451 		    const union gre_addr *, unsigned int,
452 		    const struct gre_tunnel *, const union gre_addr *,
453 		    unsigned int);
454 static inline int
455 		nvgre_cmp_mcast_sc(const struct nvgre_softc *,
456 		    const struct nvgre_softc *);
457 
458 RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
459 RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
460 
461 static int	nvgre_clone_create(struct if_clone *, int);
462 static int	nvgre_clone_destroy(struct ifnet *);
463 
464 static void	nvgre_start(struct ifnet *);
465 static int	nvgre_ioctl(struct ifnet *, u_long, caddr_t);
466 
467 static int	nvgre_up(struct nvgre_softc *);
468 static int	nvgre_down(struct nvgre_softc *);
469 static int	nvgre_set_parent(struct nvgre_softc *, const char *);
470 static void	nvgre_link_change(void *);
471 static void	nvgre_detach(void *);
472 
473 static int	nvgre_input(const struct gre_tunnel *, struct mbuf *, int,
474 		    uint8_t);
475 static void	nvgre_send(void *);
476 
477 static int	nvgre_rtfind(struct nvgre_softc *, struct ifbaconf *);
478 static void	nvgre_flush_map(struct nvgre_softc *);
479 static void	nvgre_input_map(struct nvgre_softc *,
480 		    const struct gre_tunnel *, const struct ether_header *);
481 static void	nvgre_age(void *);
482 
483 struct if_clone nvgre_cloner =
484     IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy);
485 
486 struct pool nvgre_pool;
487 
488 /* protected by NET_LOCK */
489 struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER();
490 struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER();
491 
492 /*
493  * MikroTik Ethernet over IP protocol (eoip)
494  */
495 
496 struct eoip_softc {
497 	struct gre_tunnel	sc_tunnel; /* must be first */
498 	uint16_t		sc_tunnel_id;
499 	RBT_ENTRY(eoip_softc)	sc_entry;
500 
501 	struct arpcom		sc_ac;
502 	struct ifmedia		sc_media;
503 
504 	struct timeout		sc_ka_send;
505 	struct timeout		sc_ka_hold;
506 
507 	unsigned int		sc_ka_state;
508 	unsigned int		sc_ka_timeo;
509 	unsigned int		sc_ka_count;
510 
511 	unsigned int		sc_ka_holdmax;
512 	unsigned int		sc_ka_holdcnt;
513 };
514 
515 RBT_HEAD(eoip_tree, eoip_softc);
516 
517 static inline int
518 		eoip_cmp(const struct eoip_softc *, const struct eoip_softc *);
519 
520 RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
521 
522 static int	eoip_clone_create(struct if_clone *, int);
523 static int	eoip_clone_destroy(struct ifnet *);
524 
525 static void	eoip_start(struct ifnet *);
526 static int	eoip_ioctl(struct ifnet *, u_long, caddr_t);
527 
528 static void	eoip_keepalive_send(void *);
529 static void	eoip_keepalive_recv(struct eoip_softc *);
530 static void	eoip_keepalive_hold(void *);
531 
532 static int	eoip_up(struct eoip_softc *);
533 static int	eoip_down(struct eoip_softc *);
534 
535 static struct mbuf *
536 		eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t);
537 
538 static struct mbuf *
539 		eoip_input(struct gre_tunnel *, struct mbuf *,
540 		    const struct gre_header *, uint8_t, int);
541 struct if_clone eoip_cloner =
542     IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy);
543 
544 /* protected by NET_LOCK */
545 struct eoip_tree eoip_tree = RBT_INITIALIZER();
546 
547 /*
548  * It is not easy to calculate the right value for a GRE MTU.
549  * We leave this task to the admin and use the same default that
550  * other vendors use.
551  */
552 #define GREMTU 1476
553 
554 /*
555  * We can control the acceptance of GRE and MobileIP packets by
556  * altering the sysctl net.inet.gre.allow values
557  * respectively. Zero means drop them, all else is acceptance.  We can also
558  * control acceptance of WCCPv1-style GRE packets through the
559  * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
560  * allowed as well.
561  *
562  */
563 int gre_allow = 0;
564 int gre_wccp = 0;
565 
566 void
567 greattach(int n)
568 {
569 	if_clone_attach(&gre_cloner);
570 	if_clone_attach(&mgre_cloner);
571 	if_clone_attach(&egre_cloner);
572 	if_clone_attach(&nvgre_cloner);
573 	if_clone_attach(&eoip_cloner);
574 }
575 
576 static int
577 gre_clone_create(struct if_clone *ifc, int unit)
578 {
579 	struct gre_softc *sc;
580 	struct ifnet *ifp;
581 
582 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
583 	snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
584 	    ifc->ifc_name, unit);
585 
586 	ifp = &sc->sc_if;
587 	ifp->if_softc = sc;
588 	ifp->if_type = IFT_TUNNEL;
589 	ifp->if_hdrlen = GRE_HDRLEN;
590 	ifp->if_mtu = GREMTU;
591 	ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
592 	ifp->if_xflags = IFXF_CLONED;
593 	ifp->if_output = gre_output;
594 	ifp->if_start = gre_start;
595 	ifp->if_ioctl = gre_ioctl;
596 	ifp->if_rtrequest = p2p_rtrequest;
597 
598 	sc->sc_tunnel.t_ttl = ip_defttl;
599 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
600 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
601 	sc->sc_tunnel.t_df = htons(0);
602 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
603 
604 	timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc);
605 	timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc);
606 	sc->sc_ka_state = GRE_KA_NONE;
607 
608 	if_counters_alloc(ifp);
609 	if_attach(ifp);
610 	if_alloc_sadl(ifp);
611 
612 #if NBPFILTER > 0
613 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
614 #endif
615 
616 	ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL);
617 
618 	NET_LOCK();
619 	TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry);
620 	NET_UNLOCK();
621 
622 	return (0);
623 }
624 
625 static int
626 gre_clone_destroy(struct ifnet *ifp)
627 {
628 	struct gre_softc *sc = ifp->if_softc;
629 
630 	NET_LOCK();
631 	if (ISSET(ifp->if_flags, IFF_RUNNING))
632 		gre_down(sc);
633 
634 	TAILQ_REMOVE(&gre_list, sc, sc_entry);
635 	NET_UNLOCK();
636 
637 	if_detach(ifp);
638 
639 	free(sc, M_DEVBUF, sizeof(*sc));
640 
641 	return (0);
642 }
643 
644 static int
645 mgre_clone_create(struct if_clone *ifc, int unit)
646 {
647 	struct mgre_softc *sc;
648 	struct ifnet *ifp;
649 
650 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
651 	ifp = &sc->sc_if;
652 
653 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
654 	    "%s%d", ifc->ifc_name, unit);
655 
656 	ifp->if_softc = sc;
657 	ifp->if_type = IFT_L3IPVLAN;
658 	ifp->if_hdrlen = GRE_HDRLEN;
659 	ifp->if_mtu = GREMTU;
660 	ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX;
661 	ifp->if_xflags = IFXF_CLONED;
662 	ifp->if_rtrequest = mgre_rtrequest;
663 	ifp->if_output = mgre_output;
664 	ifp->if_start = mgre_start;
665 	ifp->if_ioctl = mgre_ioctl;
666 
667 	sc->sc_tunnel.t_ttl = ip_defttl;
668 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
669 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
670 	sc->sc_tunnel.t_df = htons(0);
671 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
672 
673 	if_counters_alloc(ifp);
674 	if_attach(ifp);
675 	if_alloc_sadl(ifp);
676 
677 #if NBPFILTER > 0
678 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
679 #endif
680 
681 	return (0);
682 }
683 
684 static int
685 mgre_clone_destroy(struct ifnet *ifp)
686 {
687 	struct mgre_softc *sc = ifp->if_softc;
688 
689 	NET_LOCK();
690 	if (ISSET(ifp->if_flags, IFF_RUNNING))
691 		mgre_down(sc);
692 	NET_UNLOCK();
693 
694 	if_detach(ifp);
695 
696 	free(sc, M_DEVBUF, sizeof(*sc));
697 
698 	return (0);
699 }
700 
701 static int
702 egre_clone_create(struct if_clone *ifc, int unit)
703 {
704 	struct egre_softc *sc;
705 	struct ifnet *ifp;
706 
707 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
708 	ifp = &sc->sc_ac.ac_if;
709 
710 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
711 	    ifc->ifc_name, unit);
712 
713 	ifp->if_softc = sc;
714 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
715 	ifp->if_ioctl = egre_ioctl;
716 	ifp->if_start = egre_start;
717 	ifp->if_xflags = IFXF_CLONED;
718 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
719 	ether_fakeaddr(ifp);
720 
721 	sc->sc_tunnel.t_ttl = ip_defttl;
722 	sc->sc_tunnel.t_txhprio = 0;
723 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
724 	sc->sc_tunnel.t_df = htons(0);
725 
726 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
727 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
728 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
729 
730 	if_counters_alloc(ifp);
731 	if_attach(ifp);
732 	ether_ifattach(ifp);
733 
734 	return (0);
735 }
736 
737 static int
738 egre_clone_destroy(struct ifnet *ifp)
739 {
740 	struct egre_softc *sc = ifp->if_softc;
741 
742 	NET_LOCK();
743 	if (ISSET(ifp->if_flags, IFF_RUNNING))
744 		egre_down(sc);
745 	NET_UNLOCK();
746 
747 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
748 	ether_ifdetach(ifp);
749 	if_detach(ifp);
750 
751 	free(sc, M_DEVBUF, sizeof(*sc));
752 
753 	return (0);
754 }
755 
756 static int
757 nvgre_clone_create(struct if_clone *ifc, int unit)
758 {
759 	struct nvgre_softc *sc;
760 	struct ifnet *ifp;
761 	struct gre_tunnel *tunnel;
762 
763 	if (nvgre_pool.pr_size == 0) {
764 		pool_init(&nvgre_pool, sizeof(struct nvgre_entry), 0,
765 		    IPL_SOFTNET, 0, "nvgren", NULL);
766 	}
767 
768 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
769 	ifp = &sc->sc_ac.ac_if;
770 
771 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
772 	    ifc->ifc_name, unit);
773 
774 	ifp->if_softc = sc;
775 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
776 	ifp->if_ioctl = nvgre_ioctl;
777 	ifp->if_start = nvgre_start;
778 	ifp->if_xflags = IFXF_CLONED;
779 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
780 	ether_fakeaddr(ifp);
781 
782 	tunnel = &sc->sc_tunnel;
783 	tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL;
784 	tunnel->t_txhprio = 0;
785 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
786 	tunnel->t_df = htons(IP_DF);
787 	tunnel->t_key_mask = GRE_KEY_ENTROPY;
788 	tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) <<
789 	    GRE_KEY_ENTROPY_SHIFT);
790 
791 	mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET);
792 	task_set(&sc->sc_send_task, nvgre_send, sc);
793 	task_set(&sc->sc_ltask, nvgre_link_change, sc);
794 	task_set(&sc->sc_dtask, nvgre_detach, sc);
795 
796 	rw_init(&sc->sc_ether_lock, "nvgrelk");
797 	RBT_INIT(nvgre_map, &sc->sc_ether_map);
798 	sc->sc_ether_num = 0;
799 	sc->sc_ether_max = 100;
800 	sc->sc_ether_tmo = 240 * hz;
801 	timeout_set_proc(&sc->sc_ether_age, nvgre_age, sc); /* ugh */
802 
803 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
804 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
805 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
806 
807 	if_counters_alloc(ifp);
808 	if_attach(ifp);
809 	ether_ifattach(ifp);
810 
811 	return (0);
812 }
813 
814 static int
815 nvgre_clone_destroy(struct ifnet *ifp)
816 {
817 	struct nvgre_softc *sc = ifp->if_softc;
818 
819 	NET_LOCK();
820 	if (ISSET(ifp->if_flags, IFF_RUNNING))
821 		nvgre_down(sc);
822 	NET_UNLOCK();
823 
824 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
825 	ether_ifdetach(ifp);
826 	if_detach(ifp);
827 
828 	free(sc, M_DEVBUF, sizeof(*sc));
829 
830 	return (0);
831 }
832 
833 static int
834 eoip_clone_create(struct if_clone *ifc, int unit)
835 {
836 	struct eoip_softc *sc;
837 	struct ifnet *ifp;
838 
839 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
840 	ifp = &sc->sc_ac.ac_if;
841 
842 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
843 	    ifc->ifc_name, unit);
844 
845 	ifp->if_softc = sc;
846 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
847 	ifp->if_ioctl = eoip_ioctl;
848 	ifp->if_start = eoip_start;
849 	ifp->if_xflags = IFXF_CLONED;
850 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
851 	ether_fakeaddr(ifp);
852 
853 	sc->sc_tunnel.t_ttl = ip_defttl;
854 	sc->sc_tunnel.t_txhprio = 0;
855 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
856 	sc->sc_tunnel.t_df = htons(0);
857 
858 	sc->sc_ka_timeo = 10;
859 	sc->sc_ka_count = 10;
860 
861 	timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc);
862 	timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc);
863 	sc->sc_ka_state = GRE_KA_DOWN;
864 
865 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
866 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
867 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
868 
869 	if_counters_alloc(ifp);
870 	if_attach(ifp);
871 	ether_ifattach(ifp);
872 
873 	return (0);
874 }
875 
876 static int
877 eoip_clone_destroy(struct ifnet *ifp)
878 {
879 	struct eoip_softc *sc = ifp->if_softc;
880 
881 	NET_LOCK();
882 	if (ISSET(ifp->if_flags, IFF_RUNNING))
883 		eoip_down(sc);
884 	NET_UNLOCK();
885 
886 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
887 	ether_ifdetach(ifp);
888 	if_detach(ifp);
889 
890 	free(sc, M_DEVBUF, sizeof(*sc));
891 
892 	return (0);
893 }
894 
895 int
896 gre_input(struct mbuf **mp, int *offp, int type, int af)
897 {
898 	struct mbuf *m = *mp;
899 	struct gre_tunnel key;
900 	struct ip *ip;
901 
902 	ip = mtod(m, struct ip *);
903 
904 	/* XXX check if ip_src is sane for nvgre? */
905 
906 	key.t_af = AF_INET;
907 	key.t_src4 = ip->ip_dst;
908 	key.t_dst4 = ip->ip_src;
909 
910 	if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1)
911 		return (rip_input(mp, offp, type, af));
912 
913 	return (IPPROTO_DONE);
914 }
915 
916 #ifdef INET6
917 int
918 gre_input6(struct mbuf **mp, int *offp, int type, int af)
919 {
920 	struct mbuf *m = *mp;
921 	struct gre_tunnel key;
922 	struct ip6_hdr *ip6;
923 	uint32_t flow;
924 
925 	ip6 = mtod(m, struct ip6_hdr *);
926 
927 	/* XXX check if ip6_src is sane for nvgre? */
928 
929 	key.t_af = AF_INET6;
930 	key.t_src6 = ip6->ip6_dst;
931 	key.t_dst6 = ip6->ip6_src;
932 
933 	flow = bemtoh32(&ip6->ip6_flow);
934 
935 	if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1)
936 		return (rip6_input(mp, offp, type, af));
937 
938 	return (IPPROTO_DONE);
939 }
940 #endif /* INET6 */
941 
942 static inline struct ifnet *
943 gre_find(const struct gre_tunnel *key)
944 {
945 	struct gre_softc *sc;
946 
947 	TAILQ_FOREACH(sc, &gre_list, sc_entry) {
948 		if (gre_cmp(key, &sc->sc_tunnel) != 0)
949 			continue;
950 
951 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
952 			continue;
953 
954 		return (&sc->sc_if);
955 	}
956 
957 	return (NULL);
958 }
959 
960 static inline struct ifnet *
961 mgre_find(const struct gre_tunnel *key)
962 {
963 	struct mgre_softc *sc;
964 
965 	NET_ASSERT_LOCKED();
966 	sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key);
967 	if (sc != NULL)
968 		return (&sc->sc_if);
969 
970 	return (NULL);
971 }
972 
973 static struct mbuf *
974 gre_input_1(struct gre_tunnel *key, struct mbuf *m,
975     const struct gre_header *gh, uint8_t otos, int iphlen)
976 {
977 	switch (gh->gre_proto) {
978 	case htons(ETHERTYPE_PPP):
979 #ifdef PIPEX
980 		if (pipex_enable) {
981 			struct pipex_session *session;
982 
983 			session = pipex_pptp_lookup_session(m);
984 			if (session != NULL &&
985 			    pipex_pptp_input(m, session) == NULL)
986 				return (NULL);
987 		}
988 #endif
989 		break;
990 	case htons(GRE_EOIP):
991 		return (eoip_input(key, m, gh, otos, iphlen));
992 		break;
993 	}
994 
995 	return (m);
996 }
997 
998 static int
999 gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos,
1000     struct gre_tunnel *key)
1001 {
1002 	struct mbuf *m = *mp;
1003 	int iphlen = *offp, hlen, rxprio;
1004 	struct ifnet *ifp;
1005 	const struct gre_tunnel *tunnel;
1006 	caddr_t buf;
1007 	struct gre_header *gh;
1008 	struct gre_h_key *gkh;
1009 	void (*input)(struct ifnet *, struct mbuf *);
1010 	struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *,
1011 	    uint8_t *, uint8_t);
1012 #if NBPFILTER > 0
1013 	int bpf_af = AF_UNSPEC; /* bpf */
1014 #endif
1015 	int mcast = 0;
1016 	uint8_t itos;
1017 
1018 	if (!gre_allow)
1019 		goto decline;
1020 
1021 	key->t_rtableid = m->m_pkthdr.ph_rtableid;
1022 
1023 	hlen = iphlen + sizeof(*gh);
1024 	if (m->m_pkthdr.len < hlen)
1025 		goto decline;
1026 
1027 	m = m_pullup(m, hlen);
1028 	if (m == NULL)
1029 		return (IPPROTO_DONE);
1030 
1031 	buf = mtod(m, caddr_t);
1032 	gh = (struct gre_header *)(buf + iphlen);
1033 
1034 	/* check the version */
1035 	switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
1036 	case htons(GRE_VERS_0):
1037 		break;
1038 
1039 	case htons(GRE_VERS_1):
1040 		m = gre_input_1(key, m, gh, otos, iphlen);
1041 		if (m == NULL)
1042 			return (IPPROTO_DONE);
1043 		/* FALLTHROUGH */
1044 	default:
1045 		goto decline;
1046 	}
1047 
1048 	/* the only optional bit in the header is K flag */
1049 	if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
1050 		goto decline;
1051 
1052 	if (gh->gre_flags & htons(GRE_KP)) {
1053 		hlen += sizeof(*gkh);
1054 		if (m->m_pkthdr.len < hlen)
1055 			goto decline;
1056 
1057 		m = m_pullup(m, hlen);
1058 		if (m == NULL)
1059 			return (IPPROTO_DONE);
1060 
1061 		buf = mtod(m, caddr_t);
1062 		gh = (struct gre_header *)(buf + iphlen);
1063 		gkh = (struct gre_h_key *)(gh + 1);
1064 
1065 		key->t_key_mask = GRE_KEY_MASK;
1066 		key->t_key = gkh->gre_key;
1067 	} else
1068 		key->t_key_mask = GRE_KEY_NONE;
1069 
1070 	if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) {
1071 		if (egre_input(key, m, hlen, otos) == -1 &&
1072 		    nvgre_input(key, m, hlen, otos) == -1)
1073 			goto decline;
1074 
1075 		return (IPPROTO_DONE);
1076 	}
1077 
1078 	ifp = gre_find(key);
1079 	if (ifp == NULL) {
1080 		ifp = mgre_find(key);
1081 		if (ifp == NULL)
1082 			goto decline;
1083 	}
1084 
1085 	switch (gh->gre_proto) {
1086 	case htons(GRE_WCCP): {
1087 		struct mbuf *n;
1088 		int off;
1089 
1090 		/* WCCP/GRE:
1091 		 *   So far as I can see (and test) it seems that Cisco's WCCP
1092 		 *   GRE tunnel is precisely a IP-in-GRE tunnel that differs
1093 		 *   only in its protocol number.  At least, it works for me.
1094 		 *
1095 		 *   The Internet Drafts can be found if you look for
1096 		 *   the following:
1097 		 *     draft-forster-wrec-wccp-v1-00.txt
1098 		 *     draft-wilson-wrec-wccp-v2-01.txt
1099 		 */
1100 
1101 		if (!gre_wccp && !ISSET(ifp->if_flags, IFF_LINK0))
1102 			goto decline;
1103 
1104 		/*
1105 		 * If the first nibble of the payload does not look like
1106 		 * IPv4, assume it is WCCP v2.
1107 		 */
1108 		n = m_getptr(m, hlen, &off);
1109 		if (n == NULL)
1110 			goto decline;
1111 		if (n->m_data[off] >> 4 != IPVERSION)
1112 			hlen += sizeof(gre_wccp);
1113 
1114 		/* FALLTHROUGH */
1115 	}
1116 	case htons(ETHERTYPE_IP):
1117 #if NBPFILTER > 0
1118 		bpf_af = AF_INET;
1119 #endif
1120 		patch = gre_ipv4_patch;
1121 		input = ipv4_input;
1122 		break;
1123 #ifdef INET6
1124 	case htons(ETHERTYPE_IPV6):
1125 #if NBPFILTER > 0
1126 		bpf_af = AF_INET6;
1127 #endif
1128 		patch = gre_ipv6_patch;
1129 		input = ipv6_input;
1130 		break;
1131 #endif
1132 #ifdef MPLS
1133 	case htons(ETHERTYPE_MPLS_MCAST):
1134 		mcast = M_MCAST|M_BCAST;
1135 		/* fallthrough */
1136 	case htons(ETHERTYPE_MPLS):
1137 #if NBPFILTER > 0
1138 		bpf_af = AF_MPLS;
1139 #endif
1140 		patch = gre_mpls_patch;
1141 		input = mpls_input;
1142 		break;
1143 #endif
1144 	case htons(0):
1145 		if (ifp->if_type != IFT_TUNNEL) {
1146 			/* keepalives dont make sense for mgre */
1147 			goto decline;
1148 		}
1149 
1150 		m_adj(m, hlen);
1151 		gre_keepalive_recv(ifp, m);
1152 		return (IPPROTO_DONE);
1153 
1154 	default:
1155 		goto decline;
1156 	}
1157 
1158 	/* it's ours now */
1159 
1160 	m_adj(m, hlen);
1161 
1162 	tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */
1163 
1164 	m = (*patch)(tunnel, m, &itos, otos);
1165 	if (m == NULL)
1166 		return (IPPROTO_DONE);
1167 
1168 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
1169 		SET(m->m_pkthdr.csum_flags, M_FLOWID);
1170 		m->m_pkthdr.ph_flowid =
1171 		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1172 	}
1173 
1174 	rxprio = tunnel->t_rxhprio;
1175 	switch (rxprio) {
1176 	case IF_HDRPRIO_PACKET:
1177 		/* nop */
1178 		break;
1179 	case IF_HDRPRIO_OUTER:
1180 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
1181 		break;
1182 	case IF_HDRPRIO_PAYLOAD:
1183 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
1184 		break;
1185 	default:
1186 		m->m_pkthdr.pf.prio = rxprio;
1187 		break;
1188 	}
1189 
1190 	m->m_flags &= ~(M_MCAST|M_BCAST);
1191 	m->m_flags |= mcast;
1192 	m->m_pkthdr.ph_ifidx = ifp->if_index;
1193 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
1194 
1195 #if NPF > 0
1196 	pf_pkt_addr_changed(m);
1197 #endif
1198 
1199 	counters_pkt(ifp->if_counters,
1200 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
1201 
1202 #if NBPFILTER > 0
1203 	if (ifp->if_bpf)
1204 		bpf_mtap_af(ifp->if_bpf, bpf_af, m, BPF_DIRECTION_IN);
1205 #endif
1206 
1207 	(*input)(ifp, m);
1208 	return (IPPROTO_DONE);
1209 decline:
1210 	*mp = m;
1211 	return (-1);
1212 }
1213 
1214 static struct mbuf *
1215 gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1216     uint8_t *itosp, uint8_t otos)
1217 {
1218 	struct ip *ip;
1219 	uint8_t itos;
1220 
1221 	m = m_pullup(m, sizeof(*ip));
1222 	if (m == NULL)
1223 		return (NULL);
1224 
1225 	ip = mtod(m, struct ip *);
1226 
1227 	itos = ip->ip_tos;
1228 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1229 		m_freem(m);
1230 		return (NULL);
1231 	}
1232 	if (itos != ip->ip_tos)
1233 		ip_tos_patch(ip, itos);
1234 
1235 	*itosp = itos;
1236 
1237 	return (m);
1238 }
1239 
1240 #ifdef INET6
1241 static struct mbuf *
1242 gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1243     uint8_t *itosp, uint8_t otos)
1244 {
1245 	struct ip6_hdr *ip6;
1246 	uint32_t flow;
1247 	uint8_t itos;
1248 
1249 	m = m_pullup(m, sizeof(*ip6));
1250 	if (m == NULL)
1251 		return (NULL);
1252 
1253 	ip6 = mtod(m, struct ip6_hdr *);
1254 
1255 	flow = bemtoh32(&ip6->ip6_flow);
1256 	itos = flow >> 20;
1257 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1258 		m_freem(m);
1259 		return (NULL);
1260 	}
1261 
1262 	CLR(flow, 0xff << 20);
1263 	SET(flow, itos << 20);
1264 	htobem32(&ip6->ip6_flow, flow);
1265 
1266 	*itosp = itos;
1267 
1268 	return (m);
1269 }
1270 #endif
1271 
1272 #ifdef MPLS
1273 static struct mbuf *
1274 gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1275     uint8_t *itosp, uint8_t otos)
1276 {
1277 	uint8_t itos;
1278 	uint32_t shim;
1279 
1280 	m = m_pullup(m, sizeof(shim));
1281 	if (m == NULL)
1282 		return (NULL);
1283 
1284 	shim = *mtod(m, uint32_t *);
1285 	itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5;
1286 
1287 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1288 		m_freem(m);
1289 		return (NULL);
1290 	}
1291 
1292 	*itosp = itos;
1293 
1294 	return (m);
1295 }
1296 #endif
1297 
1298 #define gre_l2_prio(_t, _m, _otos) do {					\
1299 	int rxprio = (_t)->t_rxhprio;					\
1300 	switch (rxprio) {						\
1301 	case IF_HDRPRIO_PACKET:						\
1302 		/* nop */						\
1303 		break;							\
1304 	case IF_HDRPRIO_OUTER:						\
1305 		(_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos));		\
1306 		break;							\
1307 	default:							\
1308 		(_m)->m_pkthdr.pf.prio = rxprio;			\
1309 		break;							\
1310 	}								\
1311 } while (0)
1312 
1313 static int
1314 egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos)
1315 {
1316 	struct egre_softc *sc;
1317 
1318 	NET_ASSERT_LOCKED();
1319 	sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key);
1320 	if (sc == NULL)
1321 		return (-1);
1322 
1323 	/* it's ours now */
1324 	m = gre_ether_align(m, hlen);
1325 	if (m == NULL)
1326 		return (0);
1327 
1328 	if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
1329 		SET(m->m_pkthdr.csum_flags, M_FLOWID);
1330 		m->m_pkthdr.ph_flowid =
1331 		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1332 	}
1333 
1334 	m->m_flags &= ~(M_MCAST|M_BCAST);
1335 
1336 #if NPF > 0
1337 	pf_pkt_addr_changed(m);
1338 #endif
1339 
1340 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1341 
1342 	if_vinput(&sc->sc_ac.ac_if, m);
1343 
1344 	return (0);
1345 }
1346 
1347 static int
1348 nvgre_rtfind(struct nvgre_softc *sc, struct ifbaconf *baconf)
1349 {
1350 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1351 	struct nvgre_entry *nv;
1352 	struct ifbareq bareq;
1353 	caddr_t uaddr, end;
1354 	int error;
1355 	int age;
1356 
1357 	if (baconf->ifbac_len == 0) {
1358 		/* single read is atomic */
1359 		baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq);
1360 		return (0);
1361 	}
1362 
1363 	uaddr = baconf->ifbac_buf;
1364 	end = uaddr + baconf->ifbac_len;
1365 
1366 	rw_enter_read(&sc->sc_ether_lock);
1367 	RBT_FOREACH(nv, nvgre_map, &sc->sc_ether_map) {
1368 		if (uaddr >= end)
1369 			break;
1370 
1371 		memcpy(bareq.ifba_name, ifp->if_xname,
1372 		    sizeof(bareq.ifba_name));
1373 		memcpy(bareq.ifba_ifsname, ifp->if_xname,
1374 		    sizeof(bareq.ifba_ifsname));
1375 		memcpy(&bareq.ifba_dst, &nv->nv_dst,
1376 		    sizeof(bareq.ifba_dst));
1377 
1378 		memset(&bareq.ifba_dstsa, 0, sizeof(bareq.ifba_dstsa));
1379 		switch (sc->sc_tunnel.t_af) {
1380 		case AF_INET: {
1381 			struct sockaddr_in *sin;
1382 
1383 			sin = (struct sockaddr_in *)&bareq.ifba_dstsa;
1384 			sin->sin_len = sizeof(*sin);
1385 			sin->sin_family = AF_INET;
1386 			sin->sin_addr = nv->nv_gateway.in4;
1387 
1388 			break;
1389 		}
1390 #ifdef INET6
1391 		case AF_INET6: {
1392 			struct sockaddr_in6 *sin6;
1393 
1394 			sin6 = (struct sockaddr_in6 *)&bareq.ifba_dstsa;
1395 			sin6->sin6_len = sizeof(*sin6);
1396 			sin6->sin6_family = AF_INET6;
1397 			sin6->sin6_addr = nv->nv_gateway.in6;
1398 
1399 			break;
1400 		}
1401 #endif /* INET6 */
1402 		default:
1403 			unhandled_af(sc->sc_tunnel.t_af);
1404 		}
1405 
1406 		switch (nv->nv_type) {
1407 		case NVGRE_ENTRY_DYNAMIC:
1408 			age = (ticks - nv->nv_age) / hz;
1409 			bareq.ifba_age = MIN(age, 0xff);
1410 			bareq.ifba_flags = IFBAF_DYNAMIC;
1411 			break;
1412 		case NVGRE_ENTRY_STATIC:
1413 			bareq.ifba_age = 0;
1414 			bareq.ifba_flags = IFBAF_STATIC;
1415 			break;
1416 		}
1417 
1418 		error = copyout(&bareq, uaddr, sizeof(bareq));
1419 		if (error != 0) {
1420 			rw_exit_read(&sc->sc_ether_lock);
1421 			return (error);
1422 		}
1423 
1424 		uaddr += sizeof(bareq);
1425 	}
1426 	baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq);
1427 	rw_exit_read(&sc->sc_ether_lock);
1428 
1429 	return (0);
1430 }
1431 
1432 static void
1433 nvgre_flush_map(struct nvgre_softc *sc)
1434 {
1435 	struct nvgre_map map;
1436 	struct nvgre_entry *nv, *nnv;
1437 
1438 	rw_enter_write(&sc->sc_ether_lock);
1439 	map = sc->sc_ether_map;
1440 	RBT_INIT(nvgre_map, &sc->sc_ether_map);
1441 	sc->sc_ether_num = 0;
1442 	rw_exit_write(&sc->sc_ether_lock);
1443 
1444 	RBT_FOREACH_SAFE(nv, nvgre_map, &map, nnv) {
1445 		RBT_REMOVE(nvgre_map, &map, nv);
1446 		if (refcnt_rele(&nv->nv_refs))
1447 			pool_put(&nvgre_pool, nv);
1448 	}
1449 }
1450 
1451 static void
1452 nvgre_input_map(struct nvgre_softc *sc, const struct gre_tunnel *key,
1453     const struct ether_header *eh)
1454 {
1455 	struct nvgre_entry *nv, nkey;
1456 	int new = 0;
1457 
1458 	if (ETHER_IS_BROADCAST(eh->ether_shost) ||
1459 	    ETHER_IS_MULTICAST(eh->ether_shost))
1460 		return;
1461 
1462 	memcpy(&nkey.nv_dst, eh->ether_shost, ETHER_ADDR_LEN);
1463 
1464 	/* remember where it came from */
1465 	rw_enter_read(&sc->sc_ether_lock);
1466 	nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &nkey);
1467 	if (nv == NULL)
1468 		new = 1;
1469 	else {
1470 		nv->nv_age = ticks;
1471 
1472 		if (nv->nv_type != NVGRE_ENTRY_DYNAMIC ||
1473 		    gre_ip_cmp(key->t_af, &key->t_dst, &nv->nv_gateway))
1474 			nv = NULL;
1475 		else
1476 			refcnt_take(&nv->nv_refs);
1477 	}
1478 	rw_exit_read(&sc->sc_ether_lock);
1479 
1480 	if (new) {
1481 		struct nvgre_entry *onv;
1482 		unsigned int num;
1483 
1484 		nv = pool_get(&nvgre_pool, PR_NOWAIT);
1485 		if (nv == NULL) {
1486 			/* oh well */
1487 			return;
1488 		}
1489 
1490 		memcpy(&nv->nv_dst, eh->ether_shost, ETHER_ADDR_LEN);
1491 		nv->nv_type = NVGRE_ENTRY_DYNAMIC;
1492 		nv->nv_gateway = key->t_dst;
1493 		refcnt_init(&nv->nv_refs);
1494 		nv->nv_age = ticks;
1495 
1496 		rw_enter_write(&sc->sc_ether_lock);
1497 		num = sc->sc_ether_num;
1498 		if (++num > sc->sc_ether_max)
1499 			onv = nv;
1500 		else {
1501 			/* try to give the ref to the map */
1502 			onv = RBT_INSERT(nvgre_map, &sc->sc_ether_map, nv);
1503 			if (onv == NULL) {
1504 				/* count the successful insert */
1505 				sc->sc_ether_num = num;
1506 			}
1507 		}
1508 		rw_exit_write(&sc->sc_ether_lock);
1509 
1510 		if (onv != NULL)
1511 			pool_put(&nvgre_pool, nv);
1512 	} else if (nv != NULL) {
1513 		rw_enter_write(&sc->sc_ether_lock);
1514 		nv->nv_gateway = key->t_dst;
1515 		rw_exit_write(&sc->sc_ether_lock);
1516 
1517 		if (refcnt_rele(&nv->nv_refs)) {
1518 			/* ioctl may have deleted the entry */
1519 			pool_put(&nvgre_pool, nv);
1520 		}
1521 	}
1522 }
1523 
1524 static inline struct nvgre_softc *
1525 nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx)
1526 {
1527 	struct nvgre_softc *sc;
1528 	int rv;
1529 
1530 	/*
1531 	 * building an nvgre_softc to use with RBT_FIND is expensive, and
1532 	 * would need to swap the src and dst addresses in the key. so do the
1533 	 * find by hand.
1534 	 */
1535 
1536 	NET_ASSERT_LOCKED();
1537 	sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree);
1538 	while (sc != NULL) {
1539 		rv = nvgre_cmp_mcast(key, &key->t_src, if0idx,
1540 		    &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0);
1541 		if (rv == 0)
1542 			return (sc);
1543 		if (rv < 0)
1544 			sc = RBT_LEFT(nvgre_mcast_tree, sc);
1545 		else
1546 			sc = RBT_RIGHT(nvgre_mcast_tree, sc);
1547 	}
1548 
1549 	return (NULL);
1550 }
1551 
1552 static inline struct nvgre_softc *
1553 nvgre_ucast_find(const struct gre_tunnel *key)
1554 {
1555 	NET_ASSERT_LOCKED();
1556 	return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree,
1557 	    (struct nvgre_softc *)key));
1558 }
1559 
1560 static int
1561 nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen,
1562     uint8_t otos)
1563 {
1564 	struct nvgre_softc *sc;
1565 
1566 	if (ISSET(m->m_flags, M_MCAST|M_BCAST))
1567 		sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx);
1568 	else
1569 		sc = nvgre_ucast_find(key);
1570 
1571 	if (sc == NULL)
1572 		return (-1);
1573 
1574 	/* it's ours now */
1575 	m = gre_ether_align(m, hlen);
1576 	if (m == NULL)
1577 		return (0);
1578 
1579 	nvgre_input_map(sc, key, mtod(m, struct ether_header *));
1580 
1581 	SET(m->m_pkthdr.csum_flags, M_FLOWID);
1582 	m->m_pkthdr.ph_flowid = bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1583 
1584 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1585 
1586 	m->m_flags &= ~(M_MCAST|M_BCAST);
1587 
1588 #if NPF > 0
1589 	pf_pkt_addr_changed(m);
1590 #endif
1591 
1592 	if_vinput(&sc->sc_ac.ac_if, m);
1593 
1594 	return (0);
1595 }
1596 
1597 static struct mbuf *
1598 gre_ether_align(struct mbuf *m, int hlen)
1599 {
1600 	struct mbuf *n;
1601 	int off;
1602 
1603 	m_adj(m, hlen);
1604 
1605 	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
1606 		m_freem(m);
1607 		return (NULL);
1608 	}
1609 
1610 	m = m_pullup(m, sizeof(struct ether_header));
1611 	if (m == NULL)
1612 		return (NULL);
1613 
1614 	n = m_getptr(m, sizeof(struct ether_header), &off);
1615 	if (n == NULL) {
1616 		m_freem(m);
1617 		return (NULL);
1618 	}
1619 
1620 	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
1621 		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
1622 		m_freem(m);
1623 		if (n == NULL)
1624 			return (NULL);
1625 		m = n;
1626 	}
1627 
1628 	return (m);
1629 }
1630 
1631 static void
1632 gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m)
1633 {
1634 	struct gre_softc *sc = ifp->if_softc;
1635 	struct gre_keepalive *gk;
1636 	SIPHASH_CTX ctx;
1637 	uint8_t digest[SIPHASH_DIGEST_LENGTH];
1638 	int uptime, delta;
1639 	int tick = ticks;
1640 
1641 	if (sc->sc_ka_state == GRE_KA_NONE ||
1642 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
1643 		goto drop;
1644 
1645 	if (m->m_pkthdr.len < sizeof(*gk))
1646 		goto drop;
1647 	m = m_pullup(m, sizeof(*gk));
1648 	if (m == NULL)
1649 		return;
1650 
1651 	gk = mtod(m, struct gre_keepalive *);
1652 	uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias;
1653 	delta = tick - uptime;
1654 	if (delta < 0)
1655 		goto drop;
1656 	if (delta > hz * 10) /* magic */
1657 		goto drop;
1658 
1659 	/* avoid too much siphash work */
1660 	delta = tick - sc->sc_ka_recvtm;
1661 	if (delta > 0 && delta < (hz / 10))
1662 		goto drop;
1663 
1664 	SipHash24_Init(&ctx, &sc->sc_ka_key);
1665 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
1666 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
1667 	SipHash24_Final(digest, &ctx);
1668 
1669 	if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0)
1670 		goto drop;
1671 
1672 	sc->sc_ka_recvtm = tick;
1673 
1674 	switch (sc->sc_ka_state) {
1675 	case GRE_KA_DOWN:
1676 		sc->sc_ka_state = GRE_KA_HOLD;
1677 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
1678 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
1679 		    16 * sc->sc_ka_count);
1680 		break;
1681 	case GRE_KA_HOLD:
1682 		if (--sc->sc_ka_holdcnt > 0)
1683 			break;
1684 
1685 		sc->sc_ka_state = GRE_KA_UP;
1686 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
1687 		break;
1688 
1689 	case GRE_KA_UP:
1690 		sc->sc_ka_holdmax--;
1691 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
1692 		break;
1693 	}
1694 
1695 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
1696 
1697 drop:
1698 	m_freem(m);
1699 }
1700 
1701 static int
1702 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1703     struct rtentry *rt)
1704 {
1705 	struct m_tag *mtag;
1706 	int error = 0;
1707 
1708 	if (!gre_allow) {
1709 		error = EACCES;
1710 		goto drop;
1711 	}
1712 
1713 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1714 		error = ENETDOWN;
1715 		goto drop;
1716 	}
1717 
1718 	switch (dst->sa_family) {
1719 	case AF_INET:
1720 #ifdef INET6
1721 	case AF_INET6:
1722 #endif
1723 #ifdef MPLS
1724 	case AF_MPLS:
1725 #endif
1726 		break;
1727 	default:
1728 		error = EAFNOSUPPORT;
1729 		goto drop;
1730 	}
1731 
1732 	/* Try to limit infinite recursion through misconfiguration. */
1733 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1734 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1735 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1736 		    sizeof(ifp->if_index)) == 0) {
1737 			m_freem(m);
1738 			error = EIO;
1739 			goto end;
1740 		}
1741 	}
1742 
1743 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1744 	if (mtag == NULL) {
1745 		m_freem(m);
1746 		error = ENOBUFS;
1747 		goto end;
1748 	}
1749 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1750 	m_tag_prepend(m, mtag);
1751 
1752 	m->m_pkthdr.ph_family = dst->sa_family;
1753 
1754 	error = if_enqueue(ifp, m);
1755 end:
1756 	if (error)
1757 		ifp->if_oerrors++;
1758 	return (error);
1759 
1760 drop:
1761 	m_freem(m);
1762 	return (error);
1763 }
1764 
1765 void
1766 gre_start(struct ifnet *ifp)
1767 {
1768 	struct gre_softc *sc = ifp->if_softc;
1769 	struct mbuf *m;
1770 	int af;
1771 #if NBPFILTER > 0
1772 	caddr_t if_bpf;
1773 #endif
1774 
1775 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1776 		af = m->m_pkthdr.ph_family;
1777 
1778 #if NBPFILTER > 0
1779 		if_bpf = ifp->if_bpf;
1780 		if (if_bpf)
1781 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
1782 #endif
1783 
1784 		m = gre_l3_encap(&sc->sc_tunnel, m, af);
1785 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1786 			ifp->if_oerrors++;
1787 			continue;
1788 		}
1789 	}
1790 }
1791 
1792 void
1793 mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1794 {
1795 	struct ifnet *lo0ifp;
1796 	struct ifaddr *ifa, *lo0ifa;
1797 
1798 	switch (req) {
1799 	case RTM_ADD:
1800 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1801 			break;
1802 
1803 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1804 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1805 			    rt_key(rt)->sa_len) == 0)
1806 				break;
1807 		}
1808 
1809 		if (ifa == NULL)
1810 			break;
1811 
1812 		KASSERT(ifa == rt->rt_ifa);
1813 
1814 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1815 		KASSERT(lo0ifp != NULL);
1816 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1817 			if (lo0ifa->ifa_addr->sa_family ==
1818 			    ifa->ifa_addr->sa_family)
1819 				break;
1820 		}
1821 		if_put(lo0ifp);
1822 
1823 		if (lo0ifa == NULL)
1824 			break;
1825 
1826 		rt->rt_flags &= ~RTF_LLINFO;
1827 		break;
1828 	case RTM_DELETE:
1829 	case RTM_RESOLVE:
1830 	default:
1831 		break;
1832 	}
1833 }
1834 
1835 static int
1836 mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest,
1837     struct rtentry *rt0)
1838 {
1839 	struct mgre_softc *sc = ifp->if_softc;
1840 	struct sockaddr *gate;
1841 	struct rtentry *rt;
1842 	struct m_tag *mtag;
1843 	int error = 0;
1844 	sa_family_t af;
1845 	const void *addr;
1846 
1847 	if (!gre_allow) {
1848 		error = EACCES;
1849 		goto drop;
1850 	}
1851 
1852 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1853 		error = ENETDOWN;
1854 		goto drop;
1855 	}
1856 
1857 	switch (dest->sa_family) {
1858 	case AF_INET:
1859 #ifdef INET6
1860 	case AF_INET6:
1861 #endif
1862 #ifdef MPLS
1863 	case AF_MPLS:
1864 #endif
1865 		break;
1866 	default:
1867 		error = EAFNOSUPPORT;
1868 		goto drop;
1869 	}
1870 
1871 	if (ISSET(m->m_flags, M_MCAST|M_BCAST)) {
1872 		error = ENETUNREACH;
1873 		goto drop;
1874 	}
1875 
1876 	rt = rt_getll(rt0);
1877 
1878 	/* chech rt_expire? */
1879 	if (ISSET(rt->rt_flags, RTF_REJECT)) {
1880 		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
1881 		goto drop;
1882 	}
1883 	if (!ISSET(rt->rt_flags, RTF_HOST)) {
1884 		error = EHOSTUNREACH;
1885 		goto drop;
1886 	}
1887 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1888 		error = EINVAL;
1889 		goto drop;
1890 	}
1891 
1892 	gate = rt->rt_gateway;
1893 	af = gate->sa_family;
1894 	if (af != sc->sc_tunnel.t_af) {
1895 		error = EAGAIN;
1896 		goto drop;
1897 	}
1898 
1899 	/* Try to limit infinite recursion through misconfiguration. */
1900 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1901 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1902 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1903 		    sizeof(ifp->if_index)) == 0) {
1904 			error = EIO;
1905 			goto drop;
1906 		}
1907 	}
1908 
1909 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1910 	if (mtag == NULL) {
1911 		error = ENOBUFS;
1912 		goto drop;
1913 	}
1914 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1915 	m_tag_prepend(m, mtag);
1916 
1917 	switch (af) {
1918 	case AF_INET: {
1919 		struct sockaddr_in *sin = (struct sockaddr_in *)gate;
1920 		addr = &sin->sin_addr;
1921 		break;
1922 	}
1923 #ifdef INET6
1924 	case AF_INET6: {
1925 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate;
1926 		addr = &sin6->sin6_addr;
1927 		break;
1928 	}
1929  #endif
1930 	default:
1931 		unhandled_af(af);
1932 		/* NOTREACHED */
1933 	}
1934 
1935 	m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family);
1936 	if (m == NULL) {
1937 		ifp->if_oerrors++;
1938 		return (ENOBUFS);
1939 	}
1940 
1941 	m->m_pkthdr.ph_family = dest->sa_family;
1942 
1943 	error = if_enqueue(ifp, m);
1944 	if (error)
1945 		ifp->if_oerrors++;
1946 	return (error);
1947 
1948 drop:
1949 	m_freem(m);
1950 	return (error);
1951 }
1952 
1953 static void
1954 mgre_start(struct ifnet *ifp)
1955 {
1956 	struct mgre_softc *sc = ifp->if_softc;
1957 	struct mbuf *m;
1958 #if NBPFILTER > 0
1959 	caddr_t if_bpf;
1960 #endif
1961 
1962 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1963 #if NBPFILTER > 0
1964 		if_bpf = ifp->if_bpf;
1965 		if (if_bpf) {
1966 			struct m_hdr mh;
1967 			struct mbuf *n;
1968 			int off;
1969 
1970 			n = m_getptr(m, ifp->if_hdrlen, &off);
1971 			KASSERT(n != NULL);
1972 
1973 			mh.mh_flags = 0;
1974 			mh.mh_next = n->m_next;
1975 			mh.mh_len = n->m_len - off;
1976 			mh.mh_data = n->m_data + off;
1977 
1978 			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
1979 			    (struct mbuf *)&mh, BPF_DIRECTION_OUT);
1980 		}
1981 #endif
1982 
1983 		if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
1984 			ifp->if_oerrors++;
1985 			continue;
1986 		}
1987 	}
1988 }
1989 
1990 static void
1991 egre_start(struct ifnet *ifp)
1992 {
1993 	struct egre_softc *sc = ifp->if_softc;
1994 	struct mbuf *m0, *m;
1995 #if NBPFILTER > 0
1996 	caddr_t if_bpf;
1997 #endif
1998 
1999 	if (!gre_allow) {
2000 		ifq_purge(&ifp->if_snd);
2001 		return;
2002 	}
2003 
2004 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
2005 #if NBPFILTER > 0
2006 		if_bpf = ifp->if_bpf;
2007 		if (if_bpf)
2008 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
2009 #endif
2010 
2011 		/* force prepend mbuf because of alignment problems */
2012 		m = m_get(M_DONTWAIT, m0->m_type);
2013 		if (m == NULL) {
2014 			m_freem(m0);
2015 			continue;
2016 		}
2017 
2018 		M_MOVE_PKTHDR(m, m0);
2019 		m->m_next = m0;
2020 
2021 		m_align(m, 0);
2022 		m->m_len = 0;
2023 
2024 		m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER),
2025 		    sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m));
2026 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
2027 			ifp->if_oerrors++;
2028 			continue;
2029 		}
2030 	}
2031 }
2032 
2033 static struct mbuf *
2034 gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst,
2035     struct mbuf *m, sa_family_t af)
2036 {
2037 	uint16_t proto;
2038 	uint8_t ttl, itos, otos;
2039 	int tttl = tunnel->t_ttl;
2040 	int ttloff;
2041 
2042 	switch (af) {
2043 	case AF_INET: {
2044 		struct ip *ip;
2045 
2046 		m = m_pullup(m, sizeof(*ip));
2047 		if (m == NULL)
2048 			return (NULL);
2049 
2050 		ip = mtod(m, struct ip *);
2051 		itos = ip->ip_tos;
2052 
2053 		ttloff = offsetof(struct ip, ip_ttl);
2054 		proto = htons(ETHERTYPE_IP);
2055 		break;
2056 	}
2057 #ifdef INET6
2058 	case AF_INET6: {
2059 		struct ip6_hdr *ip6;
2060 
2061 		m = m_pullup(m, sizeof(*ip6));
2062 		if (m == NULL)
2063 			return (NULL);
2064 
2065 		ip6 = mtod(m, struct ip6_hdr *);
2066 		itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20;
2067 
2068 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
2069 		proto = htons(ETHERTYPE_IPV6);
2070 		break;
2071 	}
2072  #endif
2073 #ifdef MPLS
2074 	case AF_MPLS: {
2075 		uint32_t shim;
2076 
2077 		m = m_pullup(m, sizeof(shim));
2078 		if (m == NULL)
2079 			return (NULL);
2080 
2081 		shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK;
2082 		itos = (shim >> MPLS_EXP_OFFSET) << 5;
2083 
2084 		ttloff = 3;
2085 
2086 		if (m->m_flags & (M_BCAST | M_MCAST))
2087 			proto = htons(ETHERTYPE_MPLS_MCAST);
2088 		else
2089 			proto = htons(ETHERTYPE_MPLS);
2090 		break;
2091 	}
2092 #endif
2093 	default:
2094 		unhandled_af(af);
2095 	}
2096 
2097 	if (tttl == -1) {
2098 		KASSERT(m->m_len > ttloff); /* m_pullup has happened */
2099 
2100 		ttl = *(m->m_data + ttloff);
2101 	} else
2102 		ttl = tttl;
2103 
2104 	itos = gre_l3_tos(tunnel, m, itos);
2105 	ip_ecn_ingress(tunnel->t_ecn, &otos, &itos);
2106 
2107 	return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos));
2108 }
2109 
2110 static struct mbuf *
2111 gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst,
2112     struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos)
2113 {
2114 	struct gre_header *gh;
2115 	struct gre_h_key *gkh;
2116 	int hlen;
2117 
2118 	hlen = sizeof(*gh);
2119 	if (tunnel->t_key_mask != GRE_KEY_NONE)
2120 		hlen += sizeof(*gkh);
2121 
2122 	m = m_prepend(m, hlen, M_DONTWAIT);
2123 	if (m == NULL)
2124 		return (NULL);
2125 
2126 	gh = mtod(m, struct gre_header *);
2127 	gh->gre_flags = GRE_VERS_0;
2128 	gh->gre_proto = proto;
2129 	if (tunnel->t_key_mask != GRE_KEY_NONE) {
2130 		gh->gre_flags |= htons(GRE_KP);
2131 
2132 		gkh = (struct gre_h_key *)(gh + 1);
2133 		gkh->gre_key = tunnel->t_key;
2134 
2135 		if (tunnel->t_key_mask == GRE_KEY_ENTROPY &&
2136 		    ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) {
2137 			gkh->gre_key |= htonl(~GRE_KEY_ENTROPY &
2138 			    m->m_pkthdr.ph_flowid);
2139 		}
2140 	}
2141 
2142 	return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos));
2143 }
2144 
2145 static struct mbuf *
2146 gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst,
2147     struct mbuf *m, uint8_t ttl, uint8_t tos)
2148 {
2149 	switch (tunnel->t_af) {
2150 	case AF_UNSPEC:
2151 		/* packets may arrive before tunnel is set up */
2152 		m_freem(m);
2153 		return (NULL);
2154 	case AF_INET: {
2155 		struct ip *ip;
2156 
2157 		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
2158 		if (m == NULL)
2159 			return (NULL);
2160 
2161 		ip = mtod(m, struct ip *);
2162 		ip->ip_v = IPVERSION;
2163 		ip->ip_hl = sizeof(*ip) >> 2;
2164 		ip->ip_off = tunnel->t_df;
2165 		ip->ip_tos = tos;
2166 		ip->ip_len = htons(m->m_pkthdr.len);
2167 		ip->ip_ttl = ttl;
2168 		ip->ip_p = IPPROTO_GRE;
2169 		ip->ip_src = tunnel->t_src4;
2170 		ip->ip_dst = dst->in4;
2171 		break;
2172 	}
2173 #ifdef INET6
2174 	case AF_INET6: {
2175 		struct ip6_hdr *ip6;
2176 		int len = m->m_pkthdr.len;
2177 
2178 		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
2179 		if (m == NULL)
2180 			return (NULL);
2181 
2182 		ip6 = mtod(m, struct ip6_hdr *);
2183 		ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
2184 		    htonl(m->m_pkthdr.ph_flowid) : 0;
2185 		ip6->ip6_vfc |= IPV6_VERSION;
2186 		ip6->ip6_flow |= htonl((uint32_t)tos << 20);
2187 		ip6->ip6_plen = htons(len);
2188 		ip6->ip6_nxt = IPPROTO_GRE;
2189 		ip6->ip6_hlim = ttl;
2190 		ip6->ip6_src = tunnel->t_src6;
2191 		ip6->ip6_dst = dst->in6;
2192 
2193 		if (tunnel->t_df)
2194 			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
2195 
2196 		break;
2197 	}
2198 #endif /* INET6 */
2199 	default:
2200 		unhandled_af(tunnel->t_af);
2201 	}
2202 
2203 	return (m);
2204 }
2205 
2206 static int
2207 gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m)
2208 {
2209 	m->m_flags &= ~(M_BCAST|M_MCAST);
2210 	m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
2211 
2212 #if NPF > 0
2213 	pf_pkt_addr_changed(m);
2214 #endif
2215 
2216 	switch (tunnel->t_af) {
2217 	case AF_INET:
2218 		ip_send(m);
2219 		break;
2220 #ifdef INET6
2221 	case AF_INET6:
2222 		ip6_send(m);
2223 		break;
2224 #endif
2225 	default:
2226 		unhandled_af(tunnel->t_af);
2227 	}
2228 
2229 	return (0);
2230 }
2231 
2232 static int
2233 gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel,
2234     u_long cmd, void *data)
2235 {
2236 	struct ifreq *ifr = (struct ifreq *)data;
2237 	int error = 0;
2238 
2239 	switch(cmd) {
2240 	case SIOCSIFMTU:
2241 		if (ifr->ifr_mtu < 576) {
2242 			error = EINVAL;
2243 			break;
2244 		}
2245 		ifp->if_mtu = ifr->ifr_mtu;
2246 		break;
2247 	case SIOCADDMULTI:
2248 	case SIOCDELMULTI:
2249 		break;
2250 
2251 	case SIOCSVNETID:
2252 		error = gre_set_vnetid(tunnel, ifr);
2253 		break;
2254 
2255 	case SIOCGVNETID:
2256 		error = gre_get_vnetid(tunnel, ifr);
2257 		break;
2258 	case SIOCDVNETID:
2259 		error = gre_del_vnetid(tunnel);
2260 		break;
2261 
2262 	case SIOCSVNETFLOWID:
2263 		error = gre_set_vnetflowid(tunnel, ifr);
2264 		break;
2265 
2266 	case SIOCGVNETFLOWID:
2267 		error = gre_get_vnetflowid(tunnel, ifr);
2268 		break;
2269 
2270 	case SIOCSLIFPHYADDR:
2271 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1);
2272 		break;
2273 	case SIOCGLIFPHYADDR:
2274 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2275 		break;
2276 	case SIOCDIFPHYADDR:
2277 		error = gre_del_tunnel(tunnel);
2278 		break;
2279 
2280 	case SIOCSLIFPHYRTABLE:
2281 		if (ifr->ifr_rdomainid < 0 ||
2282 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2283 		    !rtable_exists(ifr->ifr_rdomainid)) {
2284 			error = EINVAL;
2285 			break;
2286 		}
2287 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2288 		break;
2289 	case SIOCGLIFPHYRTABLE:
2290 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2291 		break;
2292 
2293 	case SIOCSLIFPHYDF:
2294 		/* commit */
2295 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2296 		break;
2297 	case SIOCGLIFPHYDF:
2298 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2299 		break;
2300 
2301 	default:
2302 		error = ENOTTY;
2303 		break;
2304 	}
2305 
2306 	return (error);
2307 }
2308 
2309 static uint8_t
2310 gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m)
2311 {
2312 	uint8_t prio;
2313 
2314 	switch (t->t_txhprio) {
2315 	case IF_HDRPRIO_PACKET:
2316 		prio = m->m_pkthdr.pf.prio;
2317 		break;
2318 	default:
2319 		prio = t->t_txhprio;
2320 		break;
2321 	}
2322 
2323 	return (IFQ_PRIO2TOS(prio));
2324 }
2325 
2326 static uint8_t
2327 gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos)
2328 {
2329 	uint8_t prio;
2330 
2331 	switch (t->t_txhprio) {
2332 	case IF_HDRPRIO_PAYLOAD:
2333 		return (tos);
2334 	case IF_HDRPRIO_PACKET:
2335 		prio = m->m_pkthdr.pf.prio;
2336 		break;
2337 	default:
2338 		prio = t->t_txhprio;
2339 		break;
2340 	}
2341 
2342 	return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK));
2343 }
2344 
2345 static int
2346 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2347 {
2348 	struct gre_softc *sc = ifp->if_softc;
2349 	struct ifreq *ifr = (struct ifreq *)data;
2350 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2351 	int error = 0;
2352 
2353 	switch(cmd) {
2354 	case SIOCSIFADDR:
2355 		ifp->if_flags |= IFF_UP;
2356 		/* FALLTHROUGH */
2357 	case SIOCSIFFLAGS:
2358 		if (ISSET(ifp->if_flags, IFF_UP)) {
2359 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2360 				error = gre_up(sc);
2361 			else
2362 				error = 0;
2363 		} else {
2364 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2365 				error = gre_down(sc);
2366 		}
2367 		break;
2368 	case SIOCSIFRDOMAIN:
2369 		/* let if_rdomain do its thing */
2370 		error = ENOTTY;
2371 		break;
2372 
2373 	case SIOCSETKALIVE:
2374 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2375 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 ||
2376 		    (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0))
2377 			return (EINVAL);
2378 
2379 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2380 			sc->sc_ka_count = 0;
2381 			sc->sc_ka_timeo = 0;
2382 			sc->sc_ka_state = GRE_KA_NONE;
2383 		} else {
2384 			sc->sc_ka_count = ikar->ikar_cnt;
2385 			sc->sc_ka_timeo = ikar->ikar_timeo;
2386 			sc->sc_ka_state = GRE_KA_DOWN;
2387 
2388 			arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key));
2389 			sc->sc_ka_bias = arc4random();
2390 			sc->sc_ka_holdmax = sc->sc_ka_count;
2391 
2392 			sc->sc_ka_recvtm = ticks - hz;
2393 			timeout_add(&sc->sc_ka_send, 1);
2394 			timeout_add_sec(&sc->sc_ka_hold,
2395 			    sc->sc_ka_timeo * sc->sc_ka_count);
2396 		}
2397 		break;
2398 
2399 	case SIOCGETKALIVE:
2400 		ikar->ikar_cnt = sc->sc_ka_count;
2401 		ikar->ikar_timeo = sc->sc_ka_timeo;
2402 		break;
2403 
2404 	case SIOCSLIFPHYTTL:
2405 		if (ifr->ifr_ttl != -1 &&
2406 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2407 			error = EINVAL;
2408 			break;
2409 		}
2410 
2411 		/* commit */
2412 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2413 		break;
2414 
2415 	case SIOCGLIFPHYTTL:
2416 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2417 		break;
2418 
2419 	case SIOCSLIFPHYECN:
2420 		sc->sc_tunnel.t_ecn =
2421 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2422 		break;
2423 	case SIOCGLIFPHYECN:
2424 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2425 		break;
2426 
2427 	case SIOCSTXHPRIO:
2428 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2429 		if (error != 0)
2430 			break;
2431 
2432 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2433 		break;
2434 	case SIOCGTXHPRIO:
2435 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2436 		break;
2437 
2438 	case SIOCSRXHPRIO:
2439 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2440 		if (error != 0)
2441 			break;
2442 
2443 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2444 		break;
2445 	case SIOCGRXHPRIO:
2446 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2447 		break;
2448 
2449 	default:
2450 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2451 		break;
2452 	}
2453 
2454 	return (error);
2455 }
2456 
2457 static int
2458 mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2459 {
2460 	struct mgre_softc *sc = ifp->if_softc;
2461 	struct ifreq *ifr = (struct ifreq *)data;
2462 	int error = 0;
2463 
2464 	switch(cmd) {
2465 	case SIOCSIFADDR:
2466 		break;
2467 	case SIOCSIFFLAGS:
2468 		if (ISSET(ifp->if_flags, IFF_UP)) {
2469 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2470 				error = mgre_up(sc);
2471 			else
2472 				error = 0;
2473 		} else {
2474 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2475 				error = mgre_down(sc);
2476 		}
2477 		break;
2478 
2479 	case SIOCSLIFPHYTTL:
2480 		if (ifr->ifr_ttl != -1 &&
2481 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2482 			error = EINVAL;
2483 			break;
2484 		}
2485 
2486 		/* commit */
2487 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2488 		break;
2489 
2490 	case SIOCGLIFPHYTTL:
2491 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2492 		break;
2493 
2494 	case SIOCSLIFPHYECN:
2495 		sc->sc_tunnel.t_ecn =
2496 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2497 		break;
2498 	case SIOCGLIFPHYECN:
2499 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2500 		break;
2501 
2502 	case SIOCSLIFPHYADDR:
2503 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2504 			error = EBUSY;
2505 			break;
2506 		}
2507 		error = mgre_set_tunnel(sc, (struct if_laddrreq *)data);
2508 		break;
2509 	case SIOCGLIFPHYADDR:
2510 		error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
2511 		break;
2512 
2513 	case SIOCSTXHPRIO:
2514 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2515 		if (error != 0)
2516 			break;
2517 
2518 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2519 		break;
2520 	case SIOCGTXHPRIO:
2521 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2522 		break;
2523 
2524 	case SIOCSRXHPRIO:
2525 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2526 		if (error != 0)
2527 			break;
2528 
2529 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2530 		break;
2531 	case SIOCGRXHPRIO:
2532 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2533 		break;
2534 
2535 	case SIOCSVNETID:
2536 	case SIOCDVNETID:
2537 	case SIOCDIFPHYADDR:
2538 	case SIOCSLIFPHYRTABLE:
2539 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2540 			error = EBUSY;
2541 			break;
2542 		}
2543 
2544 		/* FALLTHROUGH */
2545 	default:
2546 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2547 		break;
2548 	}
2549 
2550 	return (error);
2551 }
2552 
2553 static int
2554 mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2555 {
2556 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2557 	struct sockaddr *addr = (struct sockaddr *)&req->addr;
2558 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2559 	struct sockaddr_in *addr4;
2560 #ifdef INET6
2561 	struct sockaddr_in6 *addr6;
2562 	int error;
2563 #endif
2564 
2565 	if (dstaddr->sa_family != AF_UNSPEC)
2566 		return (EINVAL);
2567 
2568 	/* validate */
2569 	switch (addr->sa_family) {
2570 	case AF_INET:
2571 		if (addr->sa_len != sizeof(*addr4))
2572 			return (EINVAL);
2573 
2574 		addr4 = (struct sockaddr_in *)addr;
2575 		if (in_nullhost(addr4->sin_addr) ||
2576 		    IN_MULTICAST(addr4->sin_addr.s_addr))
2577 			return (EINVAL);
2578 
2579 		tunnel->t_src4 = addr4->sin_addr;
2580 		tunnel->t_dst4.s_addr = INADDR_ANY;
2581 
2582 		break;
2583 #ifdef INET6
2584 	case AF_INET6:
2585 		if (addr->sa_len != sizeof(*addr6))
2586 			return (EINVAL);
2587 
2588 		addr6 = (struct sockaddr_in6 *)addr;
2589 		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) ||
2590 		    IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr))
2591 			return (EINVAL);
2592 
2593 		error = in6_embedscope(&tunnel->t_src6, addr6, NULL);
2594 		if (error != 0)
2595 			return (error);
2596 
2597 		memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
2598 
2599 		break;
2600 #endif
2601 	default:
2602 		return (EAFNOSUPPORT);
2603 	}
2604 
2605 	/* commit */
2606 	tunnel->t_af = addr->sa_family;
2607 
2608 	return (0);
2609 }
2610 
2611 static int
2612 mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2613 {
2614 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2615 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2616 	struct sockaddr_in *sin;
2617 #ifdef INET6
2618 	struct sockaddr_in6 *sin6;
2619 #endif
2620 
2621 	switch (tunnel->t_af) {
2622 	case AF_UNSPEC:
2623 		return (EADDRNOTAVAIL);
2624 	case AF_INET:
2625 		sin = (struct sockaddr_in *)&req->addr;
2626 		memset(sin, 0, sizeof(*sin));
2627 		sin->sin_family = AF_INET;
2628 		sin->sin_len = sizeof(*sin);
2629 		sin->sin_addr = tunnel->t_src4;
2630 		break;
2631 
2632 #ifdef INET6
2633 	case AF_INET6:
2634 		sin6 = (struct sockaddr_in6 *)&req->addr;
2635 		memset(sin6, 0, sizeof(*sin6));
2636 		sin6->sin6_family = AF_INET6;
2637 		sin6->sin6_len = sizeof(*sin6);
2638 		in6_recoverscope(sin6, &tunnel->t_src6);
2639 		break;
2640 #endif
2641 	default:
2642 		unhandled_af(tunnel->t_af);
2643 	}
2644 
2645 	dstaddr->sa_len = 2;
2646 	dstaddr->sa_family = AF_UNSPEC;
2647 
2648 	return (0);
2649 }
2650 
2651 static int
2652 egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2653 {
2654 	struct egre_softc *sc = ifp->if_softc;
2655 	struct ifreq *ifr = (struct ifreq *)data;
2656 	int error = 0;
2657 
2658 	switch(cmd) {
2659 	case SIOCSIFADDR:
2660 		break;
2661 	case SIOCSIFFLAGS:
2662 		if (ISSET(ifp->if_flags, IFF_UP)) {
2663 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2664 				error = egre_up(sc);
2665 			else
2666 				error = 0;
2667 		} else {
2668 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2669 				error = egre_down(sc);
2670 		}
2671 		break;
2672 
2673 	case SIOCSLIFPHYTTL:
2674 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2675 			error = EINVAL;
2676 			break;
2677 		}
2678 
2679 		/* commit */
2680 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2681 		break;
2682 
2683 	case SIOCGLIFPHYTTL:
2684 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2685 		break;
2686 
2687 	case SIOCSTXHPRIO:
2688 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2689 		if (error != 0)
2690 			break;
2691 
2692 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2693 		break;
2694 	case SIOCGTXHPRIO:
2695 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2696 		break;
2697 
2698 	case SIOCSRXHPRIO:
2699 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2700 		if (error != 0)
2701 			break;
2702 
2703 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2704 		break;
2705 	case SIOCGRXHPRIO:
2706 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2707 		break;
2708 
2709 	case SIOCSVNETID:
2710 	case SIOCDVNETID:
2711 	case SIOCSVNETFLOWID:
2712 	case SIOCSLIFPHYADDR:
2713 	case SIOCDIFPHYADDR:
2714 	case SIOCSLIFPHYRTABLE:
2715 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2716 			error = EBUSY;
2717 			break;
2718 		}
2719 
2720 		/* FALLTHROUGH */
2721 	default:
2722 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2723 		if (error == ENOTTY)
2724 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2725 		break;
2726 	}
2727 
2728 	if (error == ENETRESET) {
2729 		/* no hardware to program */
2730 		error = 0;
2731 	}
2732 
2733 	return (error);
2734 }
2735 
2736 static int
2737 nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2738 {
2739 	struct nvgre_softc *sc = ifp->if_softc;
2740 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2741 
2742 	struct ifreq *ifr = (struct ifreq *)data;
2743 	struct if_parent *parent = (struct if_parent *)data;
2744 	struct ifbrparam *bparam = (struct ifbrparam *)data;
2745 	struct ifnet *ifp0;
2746 
2747 	int error = 0;
2748 
2749 	switch (cmd) {
2750 	case SIOCSIFADDR:
2751 		break;
2752 	case SIOCSIFFLAGS:
2753 		if (ISSET(ifp->if_flags, IFF_UP)) {
2754 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2755 				error = nvgre_up(sc);
2756 			else
2757 				error = ENETRESET;
2758 		} else {
2759 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2760 				error = nvgre_down(sc);
2761 		}
2762 		break;
2763 
2764 	case SIOCSLIFPHYADDR:
2765 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2766 			error = EBUSY;
2767 			break;
2768 		}
2769 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0);
2770 		if (error == 0)
2771 			nvgre_flush_map(sc);
2772 		break;
2773 	case SIOCGLIFPHYADDR:
2774 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2775 		break;
2776 	case SIOCDIFPHYADDR:
2777 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2778 			error = EBUSY;
2779 			break;
2780 		}
2781 		error = gre_del_tunnel(tunnel);
2782 		if (error == 0)
2783 			nvgre_flush_map(sc);
2784 		break;
2785 
2786 	case SIOCSIFPARENT:
2787 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2788 			error = EBUSY;
2789 			break;
2790 		}
2791 		error = nvgre_set_parent(sc, parent->ifp_parent);
2792 		if (error == 0)
2793 			nvgre_flush_map(sc);
2794 		break;
2795 	case SIOCGIFPARENT:
2796 		ifp0 = if_get(sc->sc_ifp0);
2797 		if (ifp0 == NULL)
2798 			error = EADDRNOTAVAIL;
2799 		else {
2800 			memcpy(parent->ifp_parent, ifp0->if_xname,
2801 			    sizeof(parent->ifp_parent));
2802 		}
2803 		if_put(ifp0);
2804 		break;
2805 	case SIOCDIFPARENT:
2806 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2807 			error = EBUSY;
2808 			break;
2809 		}
2810 		/* commit */
2811 		sc->sc_ifp0 = 0;
2812 		nvgre_flush_map(sc);
2813 		break;
2814 
2815 	case SIOCSVNETID:
2816 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2817 			error = EBUSY;
2818 			break;
2819 		}
2820 		if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN ||
2821 		    ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) {
2822 			error = EINVAL;
2823 			break;
2824 		}
2825 
2826 		/* commit */
2827 		tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT);
2828 		nvgre_flush_map(sc);
2829 		break;
2830 	case SIOCGVNETID:
2831 		error = gre_get_vnetid(tunnel, ifr);
2832 		break;
2833 
2834 	case SIOCSLIFPHYRTABLE:
2835 		if (ifr->ifr_rdomainid < 0 ||
2836 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2837 		    !rtable_exists(ifr->ifr_rdomainid)) {
2838 			error = EINVAL;
2839 			break;
2840 		}
2841 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2842 		nvgre_flush_map(sc);
2843 		break;
2844 	case SIOCGLIFPHYRTABLE:
2845 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2846 		break;
2847 
2848 	case SIOCSLIFPHYDF:
2849 		/* commit */
2850 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2851 		break;
2852 	case SIOCGLIFPHYDF:
2853 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2854 		break;
2855 
2856 	case SIOCSLIFPHYTTL:
2857 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2858 			error = EINVAL;
2859 			break;
2860 		}
2861 
2862 		/* commit */
2863 		tunnel->t_ttl = ifr->ifr_ttl;
2864 		break;
2865 
2866 	case SIOCGLIFPHYTTL:
2867 		ifr->ifr_ttl = tunnel->t_ttl;
2868 		break;
2869 
2870 	case SIOCSTXHPRIO:
2871 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2872 		if (error != 0)
2873 			break;
2874 
2875 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2876 		break;
2877 	case SIOCGTXHPRIO:
2878 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2879 		break;
2880 
2881 	case SIOCSRXHPRIO:
2882 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2883 		if (error != 0)
2884 			break;
2885 
2886 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2887 		break;
2888 	case SIOCGRXHPRIO:
2889 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2890 		break;
2891 
2892 	case SIOCBRDGSCACHE:
2893 		if (bparam->ifbrp_csize < 1) {
2894 			error = EINVAL;
2895 			break;
2896 		}
2897 
2898 		/* commit */
2899 		sc->sc_ether_max = bparam->ifbrp_csize;
2900 		break;
2901 	case SIOCBRDGGCACHE:
2902 		bparam->ifbrp_csize = sc->sc_ether_max;
2903 		break;
2904 
2905 	case SIOCBRDGSTO:
2906 		if (bparam->ifbrp_ctime < 0 ||
2907 		    bparam->ifbrp_ctime > INT_MAX / hz) {
2908 			error = EINVAL;
2909 			break;
2910 		}
2911 		sc->sc_ether_tmo = bparam->ifbrp_ctime * hz;
2912 		break;
2913 	case SIOCBRDGGTO:
2914 		bparam->ifbrp_ctime = sc->sc_ether_tmo / hz;
2915 		break;
2916 
2917 	case SIOCBRDGRTS:
2918 		error = nvgre_rtfind(sc, (struct ifbaconf *)data);
2919 		break;
2920 	case SIOCBRDGFLUSH:
2921 		nvgre_flush_map(sc);
2922 		break;
2923 
2924 	case SIOCADDMULTI:
2925 	case SIOCDELMULTI:
2926 		break;
2927 
2928 	default:
2929 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2930 		break;
2931 	}
2932 
2933 	if (error == ENETRESET) {
2934 		/* no hardware to program */
2935 		error = 0;
2936 	}
2937 
2938 	return (error);
2939 }
2940 
2941 static int
2942 eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2943 {
2944 	struct eoip_softc *sc = ifp->if_softc;
2945 	struct ifreq *ifr = (struct ifreq *)data;
2946 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2947 	int error = 0;
2948 
2949 	switch(cmd) {
2950 	case SIOCSIFADDR:
2951 		break;
2952 	case SIOCSIFFLAGS:
2953 		if (ISSET(ifp->if_flags, IFF_UP)) {
2954 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2955 				error = eoip_up(sc);
2956 			else
2957 				error = 0;
2958 		} else {
2959 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2960 				error = eoip_down(sc);
2961 		}
2962 		break;
2963 
2964 	case SIOCSETKALIVE:
2965 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2966 			error = EBUSY;
2967 			break;
2968 		}
2969 
2970 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2971 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256)
2972 			return (EINVAL);
2973 
2974 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2975 			sc->sc_ka_count = 0;
2976 			sc->sc_ka_timeo = 0;
2977 			sc->sc_ka_state = GRE_KA_NONE;
2978 		} else {
2979 			sc->sc_ka_count = ikar->ikar_cnt;
2980 			sc->sc_ka_timeo = ikar->ikar_timeo;
2981 			sc->sc_ka_state = GRE_KA_DOWN;
2982 		}
2983 		break;
2984 
2985 	case SIOCGETKALIVE:
2986 		ikar->ikar_cnt = sc->sc_ka_count;
2987 		ikar->ikar_timeo = sc->sc_ka_timeo;
2988 		break;
2989 
2990 	case SIOCSVNETID:
2991 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2992 			error = EBUSY;
2993 			break;
2994 		}
2995 		if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff)
2996 			return (EINVAL);
2997 
2998 		sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */
2999 		sc->sc_tunnel_id = htole16(ifr->ifr_vnetid);
3000 		break;
3001 
3002 	case SIOCGVNETID:
3003 		ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id);
3004 		break;
3005 
3006 	case SIOCSLIFPHYADDR:
3007 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3008 			error = EBUSY;
3009 			break;
3010 		}
3011 
3012 		error = gre_set_tunnel(&sc->sc_tunnel,
3013 		    (struct if_laddrreq *)data, 1);
3014 		break;
3015 	case SIOCGLIFPHYADDR:
3016 		error = gre_get_tunnel(&sc->sc_tunnel,
3017 		    (struct if_laddrreq *)data);
3018 		break;
3019 	case SIOCDIFPHYADDR:
3020 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3021 			error = EBUSY;
3022 			break;
3023 		}
3024 
3025 		error = gre_del_tunnel(&sc->sc_tunnel);
3026 		break;
3027 
3028 	case SIOCSLIFPHYRTABLE:
3029 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3030 			error = EBUSY;
3031 			break;
3032 		}
3033 
3034 		if (ifr->ifr_rdomainid < 0 ||
3035 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
3036 		    !rtable_exists(ifr->ifr_rdomainid)) {
3037 			error = EINVAL;
3038 			break;
3039 		}
3040 		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
3041 		break;
3042 	case SIOCGLIFPHYRTABLE:
3043 		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
3044 		break;
3045 
3046 	case SIOCSLIFPHYTTL:
3047 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
3048 			error = EINVAL;
3049 			break;
3050 		}
3051 
3052 		/* commit */
3053 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
3054 		break;
3055 	case SIOCGLIFPHYTTL:
3056 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
3057 		break;
3058 
3059 	case SIOCSLIFPHYDF:
3060 		/* commit */
3061 		sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
3062 		break;
3063 	case SIOCGLIFPHYDF:
3064 		ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
3065 		break;
3066 
3067 	case SIOCSTXHPRIO:
3068 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
3069 		if (error != 0)
3070 			break;
3071 
3072 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
3073 		break;
3074 	case SIOCGTXHPRIO:
3075 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
3076 		break;
3077 
3078 	case SIOCSRXHPRIO:
3079 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
3080 		if (error != 0)
3081 			break;
3082 
3083 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
3084 		break;
3085 	case SIOCGRXHPRIO:
3086 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
3087 		break;
3088 
3089 	case SIOCADDMULTI:
3090 	case SIOCDELMULTI:
3091 		break;
3092 
3093 	default:
3094 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
3095 		break;
3096 	}
3097 
3098 	if (error == ENETRESET) {
3099 		/* no hardware to program */
3100 		error = 0;
3101 	}
3102 
3103 	return (error);
3104 }
3105 
3106 static int
3107 gre_up(struct gre_softc *sc)
3108 {
3109 	NET_ASSERT_LOCKED();
3110 	SET(sc->sc_if.if_flags, IFF_RUNNING);
3111 
3112 	if (sc->sc_ka_state != GRE_KA_NONE)
3113 		gre_keepalive_send(sc);
3114 
3115 	return (0);
3116 }
3117 
3118 static int
3119 gre_down(struct gre_softc *sc)
3120 {
3121 	NET_ASSERT_LOCKED();
3122 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3123 
3124 	if (sc->sc_ka_state != GRE_KA_NONE) {
3125 		timeout_del_barrier(&sc->sc_ka_hold);
3126 		timeout_del_barrier(&sc->sc_ka_send);
3127 
3128 		sc->sc_ka_state = GRE_KA_DOWN;
3129 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
3130 	}
3131 
3132 	return (0);
3133 }
3134 
3135 static void
3136 gre_link_state(struct ifnet *ifp, unsigned int state)
3137 {
3138 	int link_state = LINK_STATE_UNKNOWN;
3139 
3140 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3141 		switch (state) {
3142 		case GRE_KA_NONE:
3143 			/* maybe up? or down? it's unknown, really */
3144 			break;
3145 		case GRE_KA_UP:
3146 			link_state = LINK_STATE_UP;
3147 			break;
3148 		default:
3149 			link_state = LINK_STATE_KALIVE_DOWN;
3150 			break;
3151 		}
3152 	}
3153 
3154 	if (ifp->if_link_state != link_state) {
3155 		ifp->if_link_state = link_state;
3156 		if_link_state_change(ifp);
3157 	}
3158 }
3159 
3160 static void
3161 gre_keepalive_send(void *arg)
3162 {
3163 	struct gre_tunnel t;
3164 	struct gre_softc *sc = arg;
3165 	struct mbuf *m;
3166 	struct gre_keepalive *gk;
3167 	SIPHASH_CTX ctx;
3168 	int linkhdr, len;
3169 	uint16_t proto;
3170 	uint8_t ttl;
3171 	uint8_t tos;
3172 
3173 	/*
3174 	 * re-schedule immediately, so we deal with incomplete configuation
3175 	 * or temporary errors.
3176 	 */
3177 	if (sc->sc_ka_timeo)
3178 		timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
3179 
3180 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
3181 	    sc->sc_ka_state == GRE_KA_NONE ||
3182 	    sc->sc_tunnel.t_af == AF_UNSPEC ||
3183 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
3184 		return;
3185 
3186 	/* this is really conservative */
3187 #ifdef INET6
3188 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
3189 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
3190 #else
3191 	linkhdr = max_linkhdr + sizeof(struct ip) +
3192 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
3193 #endif
3194 	len = linkhdr + sizeof(*gk);
3195 
3196 	MGETHDR(m, M_DONTWAIT, MT_DATA);
3197 	if (m == NULL)
3198 		return;
3199 
3200 	if (len > MHLEN) {
3201 		MCLGETI(m, M_DONTWAIT, NULL, len);
3202 		if (!ISSET(m->m_flags, M_EXT)) {
3203 			m_freem(m);
3204 			return;
3205 		}
3206 	}
3207 
3208 	m->m_pkthdr.len = m->m_len = len;
3209 	m_adj(m, linkhdr);
3210 
3211 	/*
3212 	 * build the inside packet
3213 	 */
3214 	gk = mtod(m, struct gre_keepalive *);
3215 	htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks);
3216 	htobem32(&gk->gk_random, arc4random());
3217 
3218 	SipHash24_Init(&ctx, &sc->sc_ka_key);
3219 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
3220 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
3221 	SipHash24_Final(gk->gk_digest, &ctx);
3222 
3223 	ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl;
3224 
3225 	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
3226 	tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio));
3227 
3228 	t.t_af = sc->sc_tunnel.t_af;
3229 	t.t_df = sc->sc_tunnel.t_df;
3230 	t.t_src = sc->sc_tunnel.t_dst;
3231 	t.t_dst = sc->sc_tunnel.t_src;
3232 	t.t_key = sc->sc_tunnel.t_key;
3233 	t.t_key_mask = sc->sc_tunnel.t_key_mask;
3234 
3235 	m = gre_encap(&t, m, htons(0), ttl, tos);
3236 	if (m == NULL)
3237 		return;
3238 
3239 	switch (sc->sc_tunnel.t_af) {
3240 	case AF_INET: {
3241 		struct ip *ip;
3242 
3243 		ip = mtod(m, struct ip *);
3244 		ip->ip_id = htons(ip_randomid());
3245 		ip->ip_sum = 0;
3246 		ip->ip_sum = in_cksum(m, sizeof(*ip));
3247 
3248 		proto = htons(ETHERTYPE_IP);
3249 		break;
3250 	}
3251 #ifdef INET6
3252 	case AF_INET6:
3253 		proto = htons(ETHERTYPE_IPV6);
3254 		break;
3255 #endif
3256 	default:
3257 		m_freem(m);
3258 		return;
3259 	}
3260 
3261 	/*
3262 	 * put it in the tunnel
3263 	 */
3264 	m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos);
3265 	if (m == NULL)
3266 		return;
3267 
3268 	gre_ip_output(&sc->sc_tunnel, m);
3269 }
3270 
3271 static void
3272 gre_keepalive_hold(void *arg)
3273 {
3274 	struct gre_softc *sc = arg;
3275 	struct ifnet *ifp = &sc->sc_if;
3276 
3277 	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
3278 	    sc->sc_ka_state == GRE_KA_NONE)
3279 		return;
3280 
3281 	NET_LOCK();
3282 	sc->sc_ka_state = GRE_KA_DOWN;
3283 	gre_link_state(ifp, sc->sc_ka_state);
3284 	NET_UNLOCK();
3285 }
3286 
3287 static int
3288 gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast)
3289 {
3290 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3291 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3292 	struct sockaddr_in *src4, *dst4;
3293 #ifdef INET6
3294 	struct sockaddr_in6 *src6, *dst6;
3295 	int error;
3296 #endif
3297 
3298 	/* sa_family and sa_len must be equal */
3299 	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
3300 		return (EINVAL);
3301 
3302 	/* validate */
3303 	switch (dst->sa_family) {
3304 	case AF_INET:
3305 		if (dst->sa_len != sizeof(*dst4))
3306 			return (EINVAL);
3307 
3308 		src4 = (struct sockaddr_in *)src;
3309 		if (in_nullhost(src4->sin_addr) ||
3310 		    IN_MULTICAST(src4->sin_addr.s_addr))
3311 			return (EINVAL);
3312 
3313 		dst4 = (struct sockaddr_in *)dst;
3314 		if (in_nullhost(dst4->sin_addr) ||
3315 		    (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast))
3316 			return (EINVAL);
3317 
3318 		tunnel->t_src4 = src4->sin_addr;
3319 		tunnel->t_dst4 = dst4->sin_addr;
3320 
3321 		break;
3322 #ifdef INET6
3323 	case AF_INET6:
3324 		if (dst->sa_len != sizeof(*dst6))
3325 			return (EINVAL);
3326 
3327 		src6 = (struct sockaddr_in6 *)src;
3328 		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
3329 		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
3330 			return (EINVAL);
3331 
3332 		dst6 = (struct sockaddr_in6 *)dst;
3333 		if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
3334 		    IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast)
3335 			return (EINVAL);
3336 
3337 		if (src6->sin6_scope_id != dst6->sin6_scope_id)
3338 			return (EINVAL);
3339 
3340 		error = in6_embedscope(&tunnel->t_src6, src6, NULL);
3341 		if (error != 0)
3342 			return (error);
3343 
3344 		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL);
3345 		if (error != 0)
3346 			return (error);
3347 
3348 		break;
3349 #endif
3350 	default:
3351 		return (EAFNOSUPPORT);
3352 	}
3353 
3354 	/* commit */
3355 	tunnel->t_af = dst->sa_family;
3356 
3357 	return (0);
3358 }
3359 
3360 static int
3361 gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
3362 {
3363 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3364 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3365 	struct sockaddr_in *sin;
3366 #ifdef INET6 /* ifconfig already embeds the scopeid */
3367 	struct sockaddr_in6 *sin6;
3368 #endif
3369 
3370 	switch (tunnel->t_af) {
3371 	case AF_UNSPEC:
3372 		return (EADDRNOTAVAIL);
3373 	case AF_INET:
3374 		sin = (struct sockaddr_in *)src;
3375 		memset(sin, 0, sizeof(*sin));
3376 		sin->sin_family = AF_INET;
3377 		sin->sin_len = sizeof(*sin);
3378 		sin->sin_addr = tunnel->t_src4;
3379 
3380 		sin = (struct sockaddr_in *)dst;
3381 		memset(sin, 0, sizeof(*sin));
3382 		sin->sin_family = AF_INET;
3383 		sin->sin_len = sizeof(*sin);
3384 		sin->sin_addr = tunnel->t_dst4;
3385 
3386 		break;
3387 
3388 #ifdef INET6
3389 	case AF_INET6:
3390 		sin6 = (struct sockaddr_in6 *)src;
3391 		memset(sin6, 0, sizeof(*sin6));
3392 		sin6->sin6_family = AF_INET6;
3393 		sin6->sin6_len = sizeof(*sin6);
3394 		in6_recoverscope(sin6, &tunnel->t_src6);
3395 
3396 		sin6 = (struct sockaddr_in6 *)dst;
3397 		memset(sin6, 0, sizeof(*sin6));
3398 		sin6->sin6_family = AF_INET6;
3399 		sin6->sin6_len = sizeof(*sin6);
3400 		in6_recoverscope(sin6, &tunnel->t_dst6);
3401 
3402 		break;
3403 #endif
3404 	default:
3405 		return (EAFNOSUPPORT);
3406 	}
3407 
3408 	return (0);
3409 }
3410 
3411 static int
3412 gre_del_tunnel(struct gre_tunnel *tunnel)
3413 {
3414 	/* commit */
3415 	tunnel->t_af = AF_UNSPEC;
3416 
3417 	return (0);
3418 }
3419 
3420 static int
3421 gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3422 {
3423 	uint32_t key;
3424 	uint32_t min = GRE_KEY_MIN;
3425 	uint32_t max = GRE_KEY_MAX;
3426 	unsigned int shift = GRE_KEY_SHIFT;
3427 	uint32_t mask = GRE_KEY_MASK;
3428 
3429 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
3430 		min = GRE_KEY_ENTROPY_MIN;
3431 		max = GRE_KEY_ENTROPY_MAX;
3432 		shift = GRE_KEY_ENTROPY_SHIFT;
3433 		mask = GRE_KEY_ENTROPY;
3434 	}
3435 
3436 	if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max)
3437 		return (EINVAL);
3438 
3439 	key = htonl(ifr->ifr_vnetid << shift);
3440 
3441 	/* commit */
3442 	tunnel->t_key_mask = mask;
3443 	tunnel->t_key = key;
3444 
3445 	return (0);
3446 }
3447 
3448 static int
3449 gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3450 {
3451 	int shift;
3452 
3453 	switch (tunnel->t_key_mask) {
3454 	case GRE_KEY_NONE:
3455 		return (EADDRNOTAVAIL);
3456 	case GRE_KEY_ENTROPY:
3457 		shift = GRE_KEY_ENTROPY_SHIFT;
3458 		break;
3459 	case GRE_KEY_MASK:
3460 		shift = GRE_KEY_SHIFT;
3461 		break;
3462 	}
3463 
3464 	ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift;
3465 
3466 	return (0);
3467 }
3468 
3469 static int
3470 gre_del_vnetid(struct gre_tunnel *tunnel)
3471 {
3472 	tunnel->t_key_mask = GRE_KEY_NONE;
3473 
3474 	return (0);
3475 }
3476 
3477 static int
3478 gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3479 {
3480 	uint32_t mask, key;
3481 
3482 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3483 		return (EADDRNOTAVAIL);
3484 
3485 	mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK;
3486 	if (tunnel->t_key_mask == mask) {
3487 		/* nop */
3488 		return (0);
3489 	}
3490 
3491 	key = ntohl(tunnel->t_key);
3492 	if (mask == GRE_KEY_ENTROPY) {
3493 		if (key > GRE_KEY_ENTROPY_MAX)
3494 			return (ERANGE);
3495 
3496 		key = htonl(key << GRE_KEY_ENTROPY_SHIFT);
3497 	} else
3498 		key = htonl(key >> GRE_KEY_ENTROPY_SHIFT);
3499 
3500 	/* commit */
3501 	tunnel->t_key_mask = mask;
3502 	tunnel->t_key = key;
3503 
3504 	return (0);
3505 }
3506 
3507 static int
3508 gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3509 {
3510 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3511 		return (EADDRNOTAVAIL);
3512 
3513 	ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY;
3514 
3515 	return (0);
3516 }
3517 
3518 static int
3519 mgre_up(struct mgre_softc *sc)
3520 {
3521 	unsigned int hlen;
3522 
3523 	switch (sc->sc_tunnel.t_af) {
3524 	case AF_UNSPEC:
3525 		return (EDESTADDRREQ);
3526 	case AF_INET:
3527 		hlen = sizeof(struct ip);
3528 		break;
3529 #ifdef INET6
3530 	case AF_INET6:
3531 		hlen = sizeof(struct ip6_hdr);
3532 		break;
3533 #endif /* INET6 */
3534 	default:
3535 		unhandled_af(sc->sc_tunnel.t_af);
3536 	}
3537 
3538 	hlen += sizeof(struct gre_header);
3539 	if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
3540 		hlen += sizeof(struct gre_h_key);
3541 
3542 	NET_ASSERT_LOCKED();
3543 
3544 	if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL)
3545 		return (EADDRINUSE);
3546 
3547 	sc->sc_if.if_hdrlen = hlen;
3548 	SET(sc->sc_if.if_flags, IFF_RUNNING);
3549 
3550 	return (0);
3551 }
3552 
3553 static int
3554 mgre_down(struct mgre_softc *sc)
3555 {
3556 	NET_ASSERT_LOCKED();
3557 
3558 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3559 	sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */
3560 
3561 	RBT_REMOVE(mgre_tree, &mgre_tree, sc);
3562 
3563 	/* barrier? */
3564 
3565 	return (0);
3566 }
3567 
3568 static int
3569 egre_up(struct egre_softc *sc)
3570 {
3571 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3572 		return (EDESTADDRREQ);
3573 
3574 	NET_ASSERT_LOCKED();
3575 
3576 	if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL)
3577 		return (EADDRINUSE);
3578 
3579 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3580 
3581 	return (0);
3582 }
3583 
3584 static int
3585 egre_down(struct egre_softc *sc)
3586 {
3587 	NET_ASSERT_LOCKED();
3588 
3589 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3590 
3591 	RBT_REMOVE(egre_tree, &egre_tree, sc);
3592 
3593 	/* barrier? */
3594 
3595 	return (0);
3596 }
3597 
3598 static int
3599 egre_media_change(struct ifnet *ifp)
3600 {
3601 	return (ENOTTY);
3602 }
3603 
3604 static void
3605 egre_media_status(struct ifnet *ifp, struct ifmediareq *imr)
3606 {
3607 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
3608 	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
3609 }
3610 
3611 static int
3612 nvgre_up(struct nvgre_softc *sc)
3613 {
3614 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3615 	struct ifnet *ifp0;
3616 	void *inm;
3617 	int error;
3618 
3619 	if (tunnel->t_af == AF_UNSPEC)
3620 		return (EDESTADDRREQ);
3621 
3622 	ifp0 = if_get(sc->sc_ifp0);
3623 	if (ifp0 == NULL)
3624 		return (ENXIO);
3625 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3626 		error = ENODEV;
3627 		goto put;
3628 	}
3629 
3630 	NET_ASSERT_LOCKED();
3631 
3632 	if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) {
3633 		error = EADDRINUSE;
3634 		goto put;
3635 	}
3636 	if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) {
3637 		error = EADDRINUSE;
3638 		goto remove_mcast;
3639 	}
3640 
3641 	switch (tunnel->t_af) {
3642 	case AF_INET:
3643 		inm = in_addmulti(&tunnel->t_dst4, ifp0);
3644 		if (inm == NULL) {
3645 			error = ECONNABORTED;
3646 			goto remove_ucast;
3647 		}
3648 		break;
3649 #ifdef INET6
3650 	case AF_INET6:
3651 		inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error);
3652 		if (inm == NULL) {
3653 			/* error is already set */
3654 			goto remove_ucast;
3655 		}
3656 		break;
3657 #endif /* INET6 */
3658 	default:
3659 		unhandled_af(tunnel->t_af);
3660 	}
3661 
3662 	if_linkstatehook_add(ifp0, &sc->sc_ltask);
3663 	if_detachhook_add(ifp0, &sc->sc_dtask);
3664 
3665 	if_put(ifp0);
3666 
3667 	sc->sc_inm = inm;
3668 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3669 
3670 	timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO);
3671 
3672 	return (0);
3673 
3674 remove_ucast:
3675 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3676 remove_mcast:
3677 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3678 put:
3679 	if_put(ifp0);
3680 	return (error);
3681 }
3682 
3683 static int
3684 nvgre_down(struct nvgre_softc *sc)
3685 {
3686 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3687 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3688 	struct taskq *softnet = net_tq(ifp->if_index);
3689 	struct ifnet *ifp0;
3690 
3691 	NET_ASSERT_LOCKED();
3692 
3693 	CLR(ifp->if_flags, IFF_RUNNING);
3694 
3695 	NET_UNLOCK();
3696 	timeout_del_barrier(&sc->sc_ether_age);
3697 	ifq_barrier(&ifp->if_snd);
3698 	if (!task_del(softnet, &sc->sc_send_task))
3699 		taskq_barrier(softnet);
3700 	NET_LOCK();
3701 
3702 	mq_purge(&sc->sc_send_list);
3703 
3704 	ifp0 = if_get(sc->sc_ifp0);
3705 	if (ifp0 != NULL) {
3706 		if_detachhook_del(ifp0, &sc->sc_dtask);
3707 		if_linkstatehook_del(ifp0, &sc->sc_ltask);
3708 	}
3709 	if_put(ifp0);
3710 
3711 	switch (tunnel->t_af) {
3712 	case AF_INET:
3713 		in_delmulti(sc->sc_inm);
3714 		break;
3715 
3716 #ifdef INET6
3717 	case AF_INET6:
3718 		in6_delmulti(sc->sc_inm);
3719 		break;
3720 #endif
3721 	default:
3722 		unhandled_af(tunnel->t_af);
3723 	}
3724 
3725 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3726 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3727 
3728 	return (0);
3729 }
3730 
3731 static void
3732 nvgre_link_change(void *arg)
3733 {
3734 	/* nop */
3735 }
3736 
3737 static void
3738 nvgre_detach(void *arg)
3739 {
3740 	struct nvgre_softc *sc = arg;
3741 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3742 
3743 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3744 		nvgre_down(sc);
3745 		if_down(ifp);
3746 	}
3747 
3748 	sc->sc_ifp0 = 0;
3749 }
3750 
3751 static int
3752 nvgre_set_parent(struct nvgre_softc *sc, const char *parent)
3753 {
3754 	struct ifnet *ifp0;
3755 
3756 	ifp0 = ifunit(parent); /* doesn't need an if_put */
3757 	if (ifp0 == NULL)
3758 		return (EINVAL);
3759 
3760 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST))
3761 		return (EPROTONOSUPPORT);
3762 
3763 	/* commit */
3764 	sc->sc_ifp0 = ifp0->if_index;
3765 
3766 	return (0);
3767 }
3768 
3769 static void
3770 nvgre_age(void *arg)
3771 {
3772 	struct nvgre_softc *sc = arg;
3773 	struct nvgre_entry *nv, *nnv;
3774 	int tmo = sc->sc_ether_tmo * 2;
3775 	int diff;
3776 
3777 	if (!ISSET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING))
3778 		return;
3779 
3780 	rw_enter_write(&sc->sc_ether_lock); /* XXX */
3781 	RBT_FOREACH_SAFE(nv, nvgre_map, &sc->sc_ether_map, nnv) {
3782 		if (nv->nv_type != NVGRE_ENTRY_DYNAMIC)
3783 			continue;
3784 
3785 		diff = ticks - nv->nv_age;
3786 		if (diff < tmo)
3787 			continue;
3788 
3789 		sc->sc_ether_num--;
3790 		RBT_REMOVE(nvgre_map, &sc->sc_ether_map, nv);
3791 		if (refcnt_rele(&nv->nv_refs))
3792 			pool_put(&nvgre_pool, nv);
3793 	}
3794 	rw_exit_write(&sc->sc_ether_lock);
3795 
3796 	timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO);
3797 }
3798 
3799 static inline int
3800 nvgre_entry_valid(struct nvgre_softc *sc, const struct nvgre_entry *nv)
3801 {
3802 	int diff;
3803 
3804 	if (nv == NULL)
3805 		return (0);
3806 
3807 	if (nv->nv_type == NVGRE_ENTRY_STATIC)
3808 		return (1);
3809 
3810 	diff = ticks - nv->nv_age;
3811 	if (diff < sc->sc_ether_tmo)
3812 		return (1);
3813 
3814 	return (0);
3815 }
3816 
3817 static void
3818 nvgre_start(struct ifnet *ifp)
3819 {
3820 	struct nvgre_softc *sc = ifp->if_softc;
3821 	const struct gre_tunnel *tunnel = &sc->sc_tunnel;
3822 	union gre_addr gateway;
3823 	struct nvgre_entry *nv, key;
3824 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
3825 	struct ether_header *eh;
3826 	struct mbuf *m, *m0;
3827 #if NBPFILTER > 0
3828 	caddr_t if_bpf;
3829 #endif
3830 
3831 	if (!gre_allow) {
3832 		ifq_purge(&ifp->if_snd);
3833 		return;
3834 	}
3835 
3836 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3837 #if NBPFILTER > 0
3838 		if_bpf = ifp->if_bpf;
3839 		if (if_bpf)
3840 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3841 #endif
3842 
3843 		eh = mtod(m0, struct ether_header *);
3844 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
3845 			gateway = tunnel->t_dst;
3846 		else {
3847 			memcpy(&key.nv_dst, eh->ether_dhost,
3848 			    sizeof(key.nv_dst));
3849 
3850 			rw_enter_read(&sc->sc_ether_lock);
3851 			nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &key);
3852 			if (nvgre_entry_valid(sc, nv))
3853 				gateway = nv->nv_gateway;
3854 			else {
3855 				/* "flood" to unknown hosts */
3856 				gateway = tunnel->t_dst;
3857 			}
3858 			rw_exit_read(&sc->sc_ether_lock);
3859 		}
3860 
3861 		/* force prepend mbuf because of alignment problems */
3862 		m = m_get(M_DONTWAIT, m0->m_type);
3863 		if (m == NULL) {
3864 			m_freem(m0);
3865 			continue;
3866 		}
3867 
3868 		M_MOVE_PKTHDR(m, m0);
3869 		m->m_next = m0;
3870 
3871 		m_align(m, 0);
3872 		m->m_len = 0;
3873 
3874 		m = gre_encap_dst(tunnel, &gateway, m,
3875 		    htons(ETHERTYPE_TRANSETHER),
3876 		    tunnel->t_ttl, gre_l2_tos(tunnel, m));
3877 		if (m == NULL)
3878 			continue;
3879 
3880 		m->m_flags &= ~(M_BCAST|M_MCAST);
3881 		m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
3882 
3883 #if NPF > 0
3884 		pf_pkt_addr_changed(m);
3885 #endif
3886 
3887 		ml_enqueue(&ml, m);
3888 	}
3889 
3890 	if (!ml_empty(&ml)) {
3891 		if (mq_enlist(&sc->sc_send_list, &ml) == 0)
3892 			task_add(net_tq(ifp->if_index), &sc->sc_send_task);
3893 		/* else set OACTIVE? */
3894 	}
3895 }
3896 
3897 static uint64_t
3898 nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml)
3899 {
3900 	struct ip_moptions imo;
3901 	struct mbuf *m;
3902 	uint64_t oerrors = 0;
3903 
3904 	imo.imo_ifidx = sc->sc_ifp0;
3905 	imo.imo_ttl = sc->sc_tunnel.t_ttl;
3906 	imo.imo_loop = 0;
3907 
3908 	NET_LOCK();
3909 	while ((m = ml_dequeue(ml)) != NULL) {
3910 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
3911 			oerrors++;
3912 	}
3913 	NET_UNLOCK();
3914 
3915 	return (oerrors);
3916 }
3917 
3918 #ifdef INET6
3919 static uint64_t
3920 nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml)
3921 {
3922 	struct ip6_moptions im6o;
3923 	struct mbuf *m;
3924 	uint64_t oerrors = 0;
3925 
3926 	im6o.im6o_ifidx = sc->sc_ifp0;
3927 	im6o.im6o_hlim = sc->sc_tunnel.t_ttl;
3928 	im6o.im6o_loop = 0;
3929 
3930 	NET_LOCK();
3931 	while ((m = ml_dequeue(ml)) != NULL) {
3932 		if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
3933 			oerrors++;
3934 	}
3935 	NET_UNLOCK();
3936 
3937 	return (oerrors);
3938 }
3939 #endif /* INET6 */
3940 
3941 static void
3942 nvgre_send(void *arg)
3943 {
3944 	struct nvgre_softc *sc = arg;
3945 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3946 	sa_family_t af = sc->sc_tunnel.t_af;
3947 	struct mbuf_list ml;
3948 	uint64_t oerrors;
3949 
3950 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3951 		return;
3952 
3953 	mq_delist(&sc->sc_send_list, &ml);
3954 	if (ml_empty(&ml))
3955 		return;
3956 
3957 	switch (af) {
3958 	case AF_INET:
3959 		oerrors = nvgre_send4(sc, &ml);
3960 		break;
3961 #ifdef INET6
3962 	case AF_INET6:
3963 		oerrors = nvgre_send6(sc, &ml);
3964 		break;
3965 #endif
3966 	default:
3967 		unhandled_af(af);
3968 		/* NOTREACHED */
3969 	}
3970 
3971 	ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */
3972 }
3973 
3974 static int
3975 eoip_up(struct eoip_softc *sc)
3976 {
3977 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3978 		return (EDESTADDRREQ);
3979 
3980 	NET_ASSERT_LOCKED();
3981 
3982 	if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL)
3983 		return (EADDRINUSE);
3984 
3985 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3986 
3987 	if (sc->sc_ka_state != GRE_KA_NONE) {
3988 		sc->sc_ka_holdmax = sc->sc_ka_count;
3989 		eoip_keepalive_send(sc);
3990 	}
3991 
3992 	return (0);
3993 }
3994 
3995 static int
3996 eoip_down(struct eoip_softc *sc)
3997 {
3998 	NET_ASSERT_LOCKED();
3999 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
4000 
4001 	if (sc->sc_ka_state != GRE_KA_NONE) {
4002 		timeout_del_barrier(&sc->sc_ka_hold);
4003 		timeout_del_barrier(&sc->sc_ka_send);
4004 
4005 		sc->sc_ka_state = GRE_KA_DOWN;
4006 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
4007 	}
4008 
4009 	RBT_REMOVE(eoip_tree, &eoip_tree, sc);
4010 
4011 	return (0);
4012 }
4013 
4014 static void
4015 eoip_start(struct ifnet *ifp)
4016 {
4017 	struct eoip_softc *sc = ifp->if_softc;
4018 	struct mbuf *m0, *m;
4019 #if NBPFILTER > 0
4020 	caddr_t if_bpf;
4021 #endif
4022 
4023 	if (!gre_allow) {
4024 		ifq_purge(&ifp->if_snd);
4025 		return;
4026 	}
4027 
4028 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
4029 #if NBPFILTER > 0
4030 		if_bpf = ifp->if_bpf;
4031 		if (if_bpf)
4032 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
4033 #endif
4034 
4035 		/* force prepend mbuf because of alignment problems */
4036 		m = m_get(M_DONTWAIT, m0->m_type);
4037 		if (m == NULL) {
4038 			m_freem(m0);
4039 			continue;
4040 		}
4041 
4042 		M_MOVE_PKTHDR(m, m0);
4043 		m->m_next = m0;
4044 
4045 		m_align(m, 0);
4046 		m->m_len = 0;
4047 
4048 		m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
4049 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
4050 			ifp->if_oerrors++;
4051 			continue;
4052 		}
4053 	}
4054 }
4055 
4056 static struct mbuf *
4057 eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos)
4058 {
4059 	struct gre_header *gh;
4060 	struct gre_h_key_eoip *eoiph;
4061 	int len = m->m_pkthdr.len;
4062 
4063 	m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT);
4064 	if (m == NULL)
4065 		return (NULL);
4066 
4067 	gh = mtod(m, struct gre_header *);
4068 	gh->gre_flags = htons(GRE_VERS_1 | GRE_KP);
4069 	gh->gre_proto = htons(GRE_EOIP);
4070 
4071 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
4072 	htobem16(&eoiph->eoip_len, len);
4073 	eoiph->eoip_tunnel_id = sc->sc_tunnel_id;
4074 
4075 	return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
4076 }
4077 
4078 static void
4079 eoip_keepalive_send(void *arg)
4080 {
4081 	struct eoip_softc *sc = arg;
4082 	struct ifnet *ifp = &sc->sc_ac.ac_if;
4083 	struct mbuf *m;
4084 	int linkhdr;
4085 
4086 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
4087 		return;
4088 
4089 	/* this is really conservative */
4090 #ifdef INET6
4091 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
4092 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
4093 #else
4094 	linkhdr = max_linkhdr + sizeof(struct ip) +
4095 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
4096 #endif
4097 	MGETHDR(m, M_DONTWAIT, MT_DATA);
4098 	if (m == NULL)
4099 		return;
4100 
4101 	if (linkhdr > MHLEN) {
4102 		MCLGETI(m, M_DONTWAIT, NULL, linkhdr);
4103 		if (!ISSET(m->m_flags, M_EXT)) {
4104 			m_freem(m);
4105 			return;
4106 		}
4107 	}
4108 
4109 	m->m_pkthdr.pf.prio = ifp->if_llprio;
4110 	m->m_pkthdr.len = m->m_len = linkhdr;
4111 	m_adj(m, linkhdr);
4112 
4113 	m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
4114 	if (m == NULL)
4115 		return;
4116 
4117 	gre_ip_output(&sc->sc_tunnel, m);
4118 
4119 	timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
4120 }
4121 
4122 static void
4123 eoip_keepalive_hold(void *arg)
4124 {
4125 	struct eoip_softc *sc = arg;
4126 	struct ifnet *ifp = &sc->sc_ac.ac_if;
4127 
4128 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
4129 		return;
4130 
4131 	NET_LOCK();
4132 	sc->sc_ka_state = GRE_KA_DOWN;
4133 	gre_link_state(ifp, sc->sc_ka_state);
4134 	NET_UNLOCK();
4135 }
4136 
4137 static void
4138 eoip_keepalive_recv(struct eoip_softc *sc)
4139 {
4140 	switch (sc->sc_ka_state) {
4141 	case GRE_KA_NONE:
4142 		return;
4143 	case GRE_KA_DOWN:
4144 		sc->sc_ka_state = GRE_KA_HOLD;
4145 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
4146 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
4147 		    16 * sc->sc_ka_count);
4148 		break;
4149 	case GRE_KA_HOLD:
4150 		if (--sc->sc_ka_holdcnt > 0)
4151 			break;
4152 
4153 		sc->sc_ka_state = GRE_KA_UP;
4154 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
4155 		break;
4156 
4157 	case GRE_KA_UP:
4158 		sc->sc_ka_holdmax--;
4159 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
4160 		break;
4161 	}
4162 
4163 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
4164 }
4165 
4166 static struct mbuf *
4167 eoip_input(struct gre_tunnel *key, struct mbuf *m,
4168     const struct gre_header *gh, uint8_t otos, int iphlen)
4169 {
4170 	struct eoip_softc *sc;
4171 	struct gre_h_key_eoip *eoiph;
4172 	int hlen, len;
4173 	caddr_t buf;
4174 
4175 	if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1))
4176 		goto decline;
4177 
4178 	hlen = iphlen + sizeof(*gh) + sizeof(*eoiph);
4179 	if (m->m_pkthdr.len < hlen)
4180 		goto decline;
4181 
4182 	m = m_pullup(m, hlen);
4183 	if (m == NULL)
4184 		return (NULL);
4185 
4186 	buf = mtod(m, caddr_t);
4187 	gh = (struct gre_header *)(buf + iphlen);
4188 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
4189 
4190 	key->t_key = eoiph->eoip_tunnel_id;
4191 
4192 	NET_ASSERT_LOCKED();
4193 	sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key);
4194 	if (sc == NULL)
4195 		goto decline;
4196 
4197 	/* it's ours now */
4198 	len = bemtoh16(&eoiph->eoip_len);
4199 	if (len == 0) {
4200 		eoip_keepalive_recv(sc);
4201 		goto drop;
4202 	}
4203 
4204 	m = gre_ether_align(m, hlen);
4205 	if (m == NULL)
4206 		return (NULL);
4207 
4208 	if (m->m_pkthdr.len < len)
4209 		goto drop;
4210 	if (m->m_pkthdr.len != len)
4211 		m_adj(m, len - m->m_pkthdr.len);
4212 
4213 	gre_l2_prio(&sc->sc_tunnel, m, otos);
4214 
4215 	m->m_flags &= ~(M_MCAST|M_BCAST);
4216 
4217 #if NPF > 0
4218 	pf_pkt_addr_changed(m);
4219 #endif
4220 
4221 	if_vinput(&sc->sc_ac.ac_if, m);
4222 
4223 	return (NULL);
4224 
4225 decline:
4226 	return (m);
4227 drop:
4228 	m_freem(m);
4229 	return (NULL);
4230 }
4231 
4232 int
4233 gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
4234     size_t newlen)
4235 {
4236 	int error;
4237 
4238 	/* All sysctl names at this level are terminal. */
4239 	if (namelen != 1)
4240 		return (ENOTDIR);
4241 
4242 	switch (name[0]) {
4243 	case GRECTL_ALLOW:
4244 		NET_LOCK();
4245 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow);
4246 		NET_UNLOCK();
4247 		return (error);
4248 	case GRECTL_WCCP:
4249 		NET_LOCK();
4250 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp);
4251 		NET_UNLOCK();
4252 		return (error);
4253 	default:
4254 		return (ENOPROTOOPT);
4255 	}
4256 	/* NOTREACHED */
4257 }
4258 
4259 static inline int
4260 gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b)
4261 {
4262 	switch (af) {
4263 #ifdef INET6
4264 	case AF_INET6:
4265 		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
4266 #endif /* INET6 */
4267 	case AF_INET:
4268 		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
4269 	default:
4270 		unhandled_af(af);
4271 	}
4272 
4273 	return (0);
4274 }
4275 
4276 static int
4277 gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b)
4278 {
4279 	uint32_t ka, kb;
4280 	uint32_t mask;
4281 	int rv;
4282 
4283 	/* is K set at all? */
4284 	ka = a->t_key_mask & GRE_KEY_ENTROPY;
4285 	kb = b->t_key_mask & GRE_KEY_ENTROPY;
4286 
4287 	/* sort by whether K is set */
4288 	if (ka > kb)
4289 		return (1);
4290 	if (ka < kb)
4291 		return (-1);
4292 
4293 	/* is K set on both? */
4294 	if (ka != GRE_KEY_NONE) {
4295 		/* get common prefix */
4296 		mask = a->t_key_mask & b->t_key_mask;
4297 
4298 		ka = a->t_key & mask;
4299 		kb = b->t_key & mask;
4300 
4301 		/* sort by common prefix */
4302 		if (ka > kb)
4303 			return (1);
4304 		if (ka < kb)
4305 			return (-1);
4306 	}
4307 
4308 	/* sort by routing table */
4309 	if (a->t_rtableid > b->t_rtableid)
4310 		return (1);
4311 	if (a->t_rtableid < b->t_rtableid)
4312 		return (-1);
4313 
4314 	/* sort by address */
4315 	if (a->t_af > b->t_af)
4316 		return (1);
4317 	if (a->t_af < b->t_af)
4318 		return (-1);
4319 
4320 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4321 	if (rv != 0)
4322 		return (rv);
4323 
4324 	return (0);
4325 }
4326 
4327 static int
4328 gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
4329 {
4330 	int rv;
4331 
4332 	rv = gre_cmp_src(a, b);
4333 	if (rv != 0)
4334 		return (rv);
4335 
4336 	return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst));
4337 }
4338 
4339 static inline int
4340 mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b)
4341 {
4342 	return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel));
4343 }
4344 
4345 RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
4346 
4347 static inline int
4348 egre_cmp(const struct egre_softc *a, const struct egre_softc *b)
4349 {
4350 	return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel));
4351 }
4352 
4353 RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp);
4354 
4355 static inline int
4356 nvgre_entry_cmp(const struct nvgre_entry *a, const struct nvgre_entry *b)
4357 {
4358 	return (memcmp(&a->nv_dst, &b->nv_dst, sizeof(a->nv_dst)));
4359 }
4360 
4361 RBT_GENERATE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp);
4362 
4363 static int
4364 nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b)
4365 {
4366 	uint32_t ka, kb;
4367 
4368 	ka = a->t_key & GRE_KEY_ENTROPY;
4369 	kb = b->t_key & GRE_KEY_ENTROPY;
4370 
4371 	/* sort by common prefix */
4372 	if (ka > kb)
4373 		return (1);
4374 	if (ka < kb)
4375 		return (-1);
4376 
4377 	/* sort by routing table */
4378 	if (a->t_rtableid > b->t_rtableid)
4379 		return (1);
4380 	if (a->t_rtableid < b->t_rtableid)
4381 		return (-1);
4382 
4383 	/* sort by address */
4384 	if (a->t_af > b->t_af)
4385 		return (1);
4386 	if (a->t_af < b->t_af)
4387 		return (-1);
4388 
4389 	return (0);
4390 }
4391 
4392 static inline int
4393 nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4394 {
4395 	const struct gre_tunnel *a = &na->sc_tunnel;
4396 	const struct gre_tunnel *b = &nb->sc_tunnel;
4397 	int rv;
4398 
4399 	rv = nvgre_cmp_tunnel(a, b);
4400 	if (rv != 0)
4401 		return (rv);
4402 
4403 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4404 	if (rv != 0)
4405 		return (rv);
4406 
4407 	return (0);
4408 }
4409 
4410 static int
4411 nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa,
4412     unsigned int if0idxa, const struct gre_tunnel *b,
4413     const union gre_addr *ab,unsigned int if0idxb)
4414 {
4415 	int rv;
4416 
4417 	rv = nvgre_cmp_tunnel(a, b);
4418 	if (rv != 0)
4419 		return (rv);
4420 
4421 	rv = gre_ip_cmp(a->t_af, aa, ab);
4422 	if (rv != 0)
4423 		return (rv);
4424 
4425 	if (if0idxa > if0idxb)
4426 		return (1);
4427 	if (if0idxa < if0idxb)
4428 		return (-1);
4429 
4430 	return (0);
4431 }
4432 
4433 static inline int
4434 nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4435 {
4436 	const struct gre_tunnel *a = &na->sc_tunnel;
4437 	const struct gre_tunnel *b = &nb->sc_tunnel;
4438 
4439 	return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0,
4440 	    b, &b->t_dst, nb->sc_ifp0));
4441 }
4442 
4443 RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
4444 RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
4445 
4446 static inline int
4447 eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
4448 {
4449 	const struct gre_tunnel *a = &ea->sc_tunnel;
4450 	const struct gre_tunnel *b = &eb->sc_tunnel;
4451 	int rv;
4452 
4453 	if (a->t_key > b->t_key)
4454 		return (1);
4455 	if (a->t_key < b->t_key)
4456 		return (-1);
4457 
4458 	/* sort by routing table */
4459 	if (a->t_rtableid > b->t_rtableid)
4460 		return (1);
4461 	if (a->t_rtableid < b->t_rtableid)
4462 		return (-1);
4463 
4464 	/* sort by address */
4465 	if (a->t_af > b->t_af)
4466 		return (1);
4467 	if (a->t_af < b->t_af)
4468 		return (-1);
4469 
4470 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4471 	if (rv != 0)
4472 		return (rv);
4473 
4474 	rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
4475 	if (rv != 0)
4476 		return (rv);
4477 
4478 	return (0);
4479 }
4480 
4481 RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
4482