xref: /openbsd-src/sys/net/if_gre.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: if_gre.c,v 1.150 2019/04/23 11:48:55 dlg Exp $ */
2 /*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3 
4 /*
5  * Copyright (c) 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
37  * See gre(4) for more details.
38  * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
39  */
40 
41 #include "bpfilter.h"
42 #include "pf.h"
43 
44 #include <sys/param.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/kernel.h>
49 #include <sys/systm.h>
50 #include <sys/errno.h>
51 #include <sys/timeout.h>
52 #include <sys/queue.h>
53 #include <sys/tree.h>
54 #include <sys/pool.h>
55 #include <sys/rwlock.h>
56 
57 #include <crypto/siphash.h>
58 
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_types.h>
62 #include <net/if_media.h>
63 #include <net/route.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/ip_ecn.h>
71 
72 #ifdef INET6
73 #include <netinet/ip6.h>
74 #include <netinet6/ip6_var.h>
75 #include <netinet6/in6_var.h>
76 #endif
77 
78 #ifdef PIPEX
79 #include <net/pipex.h>
80 #endif
81 
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif /* MPLS */
85 
86 #if NBPFILTER > 0
87 #include <net/bpf.h>
88 #endif
89 
90 #if NPF > 0
91 #include <net/pfvar.h>
92 #endif
93 
94 #include <net/if_gre.h>
95 
96 #include <netinet/ip_gre.h>
97 #include <sys/sysctl.h>
98 
99 /* for nvgre bridge shizz */
100 #include <sys/socket.h>
101 #include <net/if_bridge.h>
102 
103 /*
104  * packet formats
105  */
106 struct gre_header {
107 	uint16_t		gre_flags;
108 #define GRE_CP				0x8000  /* Checksum Present */
109 #define GRE_KP				0x2000  /* Key Present */
110 #define GRE_SP				0x1000  /* Sequence Present */
111 
112 #define GRE_VERS_MASK			0x0007
113 #define GRE_VERS_0			0x0000
114 #define GRE_VERS_1			0x0001
115 
116 	uint16_t		gre_proto;
117 } __packed __aligned(4);
118 
119 struct gre_h_cksum {
120 	uint16_t		gre_cksum;
121 	uint16_t		gre_reserved1;
122 } __packed __aligned(4);
123 
124 struct gre_h_key {
125 	uint32_t		gre_key;
126 } __packed __aligned(4);
127 
128 #define GRE_EOIP		0x6400
129 
130 struct gre_h_key_eoip {
131 	uint16_t		eoip_len;	/* network order */
132 	uint16_t		eoip_tunnel_id;	/* little endian */
133 } __packed __aligned(4);
134 
135 #define NVGRE_VSID_RES_MIN	0x000000 /* reserved for future use */
136 #define NVGRE_VSID_RES_MAX	0x000fff
137 #define NVGRE_VSID_NVE2NVE	0xffffff /* vendor specific NVE-to-NVE comms */
138 
139 struct gre_h_seq {
140 	uint32_t		gre_seq;
141 } __packed __aligned(4);
142 
143 struct gre_h_wccp {
144 	uint8_t			wccp_flags;
145 	uint8_t			service_id;
146 	uint8_t			alt_bucket;
147 	uint8_t			pri_bucket;
148 } __packed __aligned(4);
149 
150 #define GRE_WCCP 0x883e
151 
152 #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
153 
154 /*
155  * GRE tunnel metadata
156  */
157 
158 #define GRE_KA_NONE		0
159 #define GRE_KA_DOWN		1
160 #define GRE_KA_HOLD		2
161 #define GRE_KA_UP		3
162 
163 union gre_addr {
164 	struct in_addr		in4;
165 	struct in6_addr		in6;
166 };
167 
168 static inline int
169 		gre_ip_cmp(int, const union gre_addr *,
170 		    const union gre_addr *);
171 
172 #define GRE_KEY_MIN		0x00000000U
173 #define GRE_KEY_MAX		0xffffffffU
174 #define GRE_KEY_SHIFT		0
175 
176 #define GRE_KEY_ENTROPY_MIN	0x00000000U
177 #define GRE_KEY_ENTROPY_MAX	0x00ffffffU
178 #define GRE_KEY_ENTROPY_SHIFT	8
179 
180 struct gre_tunnel {
181 	uint32_t		t_key_mask;
182 #define GRE_KEY_NONE			htonl(0x00000000U)
183 #define GRE_KEY_ENTROPY			htonl(0xffffff00U)
184 #define GRE_KEY_MASK			htonl(0xffffffffU)
185 	uint32_t		t_key;
186 
187 	u_int			t_rtableid;
188 	union gre_addr		t_src;
189 #define t_src4	t_src.in4
190 #define t_src6	t_src.in6
191 	union gre_addr		t_dst;
192 #define t_dst4	t_dst.in4
193 #define t_dst6	t_dst.in6
194 	int			t_ttl;
195 	int			t_txhprio;
196 	int			t_rxhprio;
197 	int			t_ecn;
198 	uint16_t		t_df;
199 	sa_family_t		t_af;
200 };
201 
202 static int
203 		gre_cmp_src(const struct gre_tunnel *,
204 		    const struct gre_tunnel *);
205 static int
206 		gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
207 
208 static int	gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int);
209 static int	gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
210 static int	gre_del_tunnel(struct gre_tunnel *);
211 
212 static int	gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
213 static int	gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
214 static int	gre_del_vnetid(struct gre_tunnel *);
215 
216 static int	gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *);
217 static int	gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *);
218 
219 static struct mbuf *
220 		gre_encap_dst(const struct gre_tunnel *, const union gre_addr *,
221 		    struct mbuf *, uint16_t, uint8_t, uint8_t);
222 #define gre_encap(_t, _m, _p, _ttl, _tos) \
223 		gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos))
224 
225 static struct mbuf *
226 		gre_encap_dst_ip(const struct gre_tunnel *,
227 		    const union gre_addr *, struct mbuf *, uint8_t, uint8_t);
228 #define gre_encap_ip(_t, _m, _ttl, _tos) \
229 		gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos))
230 
231 static int
232 		gre_ip_output(const struct gre_tunnel *, struct mbuf *);
233 
234 static int	gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *,
235 		    u_long, void *);
236 
237 static uint8_t	gre_l2_tos(const struct gre_tunnel *, const struct mbuf *);
238 static uint8_t	gre_l3_tos(const struct gre_tunnel *,
239 		    const struct mbuf *, uint8_t);
240 
241 /*
242  * layer 3 GRE tunnels
243  */
244 
245 struct gre_softc {
246 	struct gre_tunnel	sc_tunnel; /* must be first */
247 	TAILQ_ENTRY(gre_softc)	sc_entry;
248 
249 	struct ifnet		sc_if;
250 
251 	struct timeout		sc_ka_send;
252 	struct timeout		sc_ka_hold;
253 
254 	unsigned int		sc_ka_state;
255 	unsigned int		sc_ka_timeo;
256 	unsigned int		sc_ka_count;
257 
258 	unsigned int		sc_ka_holdmax;
259 	unsigned int		sc_ka_holdcnt;
260 
261 	SIPHASH_KEY		sc_ka_key;
262 	uint32_t		sc_ka_bias;
263 	int			sc_ka_recvtm;
264 };
265 
266 TAILQ_HEAD(gre_list, gre_softc);
267 
268 struct gre_keepalive {
269 	uint32_t		gk_uptime;
270 	uint32_t		gk_random;
271 	uint8_t			gk_digest[SIPHASH_DIGEST_LENGTH];
272 } __packed __aligned(4);
273 
274 static int	gre_clone_create(struct if_clone *, int);
275 static int	gre_clone_destroy(struct ifnet *);
276 
277 struct if_clone gre_cloner =
278     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
279 
280 /* protected by NET_LOCK */
281 struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list);
282 
283 static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
284 		    struct rtentry *);
285 static void	gre_start(struct ifnet *);
286 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
287 
288 static int	gre_up(struct gre_softc *);
289 static int	gre_down(struct gre_softc *);
290 static void	gre_link_state(struct ifnet *, unsigned int);
291 
292 static int	gre_input_key(struct mbuf **, int *, int, int, uint8_t,
293 		    struct gre_tunnel *);
294 
295 static struct mbuf *
296 		gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *,
297 		    uint8_t *, uint8_t);
298 #ifdef INET6
299 static struct mbuf *
300 		gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *,
301 		    uint8_t *, uint8_t);
302 #endif
303 #ifdef MPLS
304 static struct mbuf *
305 		gre_mpls_patch(const struct gre_tunnel *, struct mbuf *,
306 		    uint8_t *, uint8_t);
307 #endif
308 static void	gre_keepalive_send(void *);
309 static void	gre_keepalive_recv(struct ifnet *ifp, struct mbuf *);
310 static void	gre_keepalive_hold(void *);
311 
312 static struct mbuf *
313 		gre_l3_encap_dst(const struct gre_tunnel *, const void *,
314 		    struct mbuf *m, sa_family_t);
315 
316 #define gre_l3_encap(_t, _m, _af) \
317 		gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af))
318 
319 struct mgre_softc {
320 	struct gre_tunnel	sc_tunnel; /* must be first */
321 	RBT_ENTRY(mgre_softc)	sc_entry;
322 
323 	struct ifnet		sc_if;
324 };
325 
326 RBT_HEAD(mgre_tree, mgre_softc);
327 
328 static inline int
329 		mgre_cmp(const struct mgre_softc *, const struct mgre_softc *);
330 
331 RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
332 
333 static int	mgre_clone_create(struct if_clone *, int);
334 static int	mgre_clone_destroy(struct ifnet *);
335 
336 struct if_clone mgre_cloner =
337     IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy);
338 
339 static void	mgre_rtrequest(struct ifnet *, int, struct rtentry *);
340 static int	mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
341 		    struct rtentry *);
342 static void	mgre_start(struct ifnet *);
343 static int	mgre_ioctl(struct ifnet *, u_long, caddr_t);
344 
345 static int	mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *);
346 static int	mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *);
347 static int	mgre_up(struct mgre_softc *);
348 static int	mgre_down(struct mgre_softc *);
349 
350 /* protected by NET_LOCK */
351 struct mgre_tree mgre_tree = RBT_INITIALIZER();
352 
353 /*
354  * Ethernet GRE tunnels
355  */
356 #define ether_cmp(_a, _b)	memcmp((_a), (_b), ETHER_ADDR_LEN)
357 #define ether_isequal(_a, _b)	(ether_cmp((_a), (_b)) == 0)
358 #define ether_isbcast(_e)	ether_isequal((_e), etherbroadcastaddr)
359 
360 static struct mbuf *
361 		gre_ether_align(struct mbuf *, int);
362 
363 struct egre_softc {
364 	struct gre_tunnel	sc_tunnel; /* must be first */
365 	RBT_ENTRY(egre_softc)	sc_entry;
366 
367 	struct arpcom		sc_ac;
368 	struct ifmedia		sc_media;
369 };
370 
371 RBT_HEAD(egre_tree, egre_softc);
372 
373 static inline int
374 		egre_cmp(const struct egre_softc *, const struct egre_softc *);
375 
376 RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp);
377 
378 static int	egre_clone_create(struct if_clone *, int);
379 static int	egre_clone_destroy(struct ifnet *);
380 
381 static void	egre_start(struct ifnet *);
382 static int	egre_ioctl(struct ifnet *, u_long, caddr_t);
383 static int	egre_media_change(struct ifnet *);
384 static void	egre_media_status(struct ifnet *, struct ifmediareq *);
385 
386 static int	egre_up(struct egre_softc *);
387 static int	egre_down(struct egre_softc *);
388 
389 static int	egre_input(const struct gre_tunnel *, struct mbuf *, int,
390 		    uint8_t);
391 struct if_clone egre_cloner =
392     IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy);
393 
394 /* protected by NET_LOCK */
395 struct egre_tree egre_tree = RBT_INITIALIZER();
396 
397 /*
398  * Network Virtualisation Using Generic Routing Encapsulation (NVGRE)
399  */
400 
401 #define NVGRE_AGE_TMO		100	/* seconds */
402 
403 struct nvgre_entry {
404 	RB_ENTRY(nvgre_entry)	 nv_entry;
405 	struct ether_addr	 nv_dst;
406 	uint8_t			 nv_type;
407 #define NVGRE_ENTRY_DYNAMIC		0
408 #define NVGRE_ENTRY_STATIC		1
409 	union gre_addr		 nv_gateway;
410 	struct refcnt		 nv_refs;
411 	int			 nv_age;
412 };
413 
414 RBT_HEAD(nvgre_map, nvgre_entry);
415 
416 static inline int
417 		nvgre_entry_cmp(const struct nvgre_entry *,
418 		    const struct nvgre_entry *);
419 
420 RBT_PROTOTYPE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp);
421 
422 struct nvgre_softc {
423 	struct gre_tunnel	 sc_tunnel; /* must be first */
424 	unsigned int		 sc_ifp0;
425 	RBT_ENTRY(nvgre_softc)	 sc_uentry;
426 	RBT_ENTRY(nvgre_softc)	 sc_mentry;
427 
428 	struct arpcom		 sc_ac;
429 	struct ifmedia		 sc_media;
430 
431 	struct mbuf_queue	 sc_send_list;
432 	struct task		 sc_send_task;
433 
434 	void			*sc_inm;
435 	void			*sc_lhcookie;
436 	void			*sc_dhcookie;
437 
438 	struct rwlock		 sc_ether_lock;
439 	struct nvgre_map	 sc_ether_map;
440 	unsigned int		 sc_ether_num;
441 	unsigned int		 sc_ether_max;
442 	int			 sc_ether_tmo;
443 	struct timeout		 sc_ether_age;
444 };
445 
446 RBT_HEAD(nvgre_ucast_tree, nvgre_softc);
447 RBT_HEAD(nvgre_mcast_tree, nvgre_softc);
448 
449 static inline int
450 		nvgre_cmp_ucast(const struct nvgre_softc *,
451 		    const struct nvgre_softc *);
452 static int
453 		nvgre_cmp_mcast(const struct gre_tunnel *,
454 		    const union gre_addr *, unsigned int,
455 		    const struct gre_tunnel *, const union gre_addr *,
456 		    unsigned int);
457 static inline int
458 		nvgre_cmp_mcast_sc(const struct nvgre_softc *,
459 		    const struct nvgre_softc *);
460 
461 RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
462 RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
463 
464 static int	nvgre_clone_create(struct if_clone *, int);
465 static int	nvgre_clone_destroy(struct ifnet *);
466 
467 static void	nvgre_start(struct ifnet *);
468 static int	nvgre_ioctl(struct ifnet *, u_long, caddr_t);
469 
470 static int	nvgre_up(struct nvgre_softc *);
471 static int	nvgre_down(struct nvgre_softc *);
472 static int	nvgre_set_parent(struct nvgre_softc *, const char *);
473 static void	nvgre_link_change(void *);
474 static void	nvgre_detach(void *);
475 
476 static int	nvgre_input(const struct gre_tunnel *, struct mbuf *, int,
477 		    uint8_t);
478 static void	nvgre_send(void *);
479 
480 static int	nvgre_rtfind(struct nvgre_softc *, struct ifbaconf *);
481 static void	nvgre_flush_map(struct nvgre_softc *);
482 static void	nvgre_input_map(struct nvgre_softc *,
483 		    const struct gre_tunnel *, const struct ether_header *);
484 static void	nvgre_age(void *);
485 
486 struct if_clone nvgre_cloner =
487     IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy);
488 
489 struct pool nvgre_pool;
490 
491 /* protected by NET_LOCK */
492 struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER();
493 struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER();
494 
495 /*
496  * MikroTik Ethernet over IP protocol (eoip)
497  */
498 
499 struct eoip_softc {
500 	struct gre_tunnel	sc_tunnel; /* must be first */
501 	uint16_t		sc_tunnel_id;
502 	RBT_ENTRY(eoip_softc)	sc_entry;
503 
504 	struct arpcom		sc_ac;
505 	struct ifmedia		sc_media;
506 
507 	struct timeout		sc_ka_send;
508 	struct timeout		sc_ka_hold;
509 
510 	unsigned int		sc_ka_state;
511 	unsigned int		sc_ka_timeo;
512 	unsigned int		sc_ka_count;
513 
514 	unsigned int		sc_ka_holdmax;
515 	unsigned int		sc_ka_holdcnt;
516 };
517 
518 RBT_HEAD(eoip_tree, eoip_softc);
519 
520 static inline int
521 		eoip_cmp(const struct eoip_softc *, const struct eoip_softc *);
522 
523 RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
524 
525 static int	eoip_clone_create(struct if_clone *, int);
526 static int	eoip_clone_destroy(struct ifnet *);
527 
528 static void	eoip_start(struct ifnet *);
529 static int	eoip_ioctl(struct ifnet *, u_long, caddr_t);
530 
531 static void	eoip_keepalive_send(void *);
532 static void	eoip_keepalive_recv(struct eoip_softc *);
533 static void	eoip_keepalive_hold(void *);
534 
535 static int	eoip_up(struct eoip_softc *);
536 static int	eoip_down(struct eoip_softc *);
537 
538 static struct mbuf *
539 		eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t);
540 
541 static struct mbuf *
542 		eoip_input(struct gre_tunnel *, struct mbuf *,
543 		    const struct gre_header *, uint8_t, int);
544 struct if_clone eoip_cloner =
545     IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy);
546 
547 /* protected by NET_LOCK */
548 struct eoip_tree eoip_tree = RBT_INITIALIZER();
549 
550 /*
551  * It is not easy to calculate the right value for a GRE MTU.
552  * We leave this task to the admin and use the same default that
553  * other vendors use.
554  */
555 #define GREMTU 1476
556 
557 /*
558  * We can control the acceptance of GRE and MobileIP packets by
559  * altering the sysctl net.inet.gre.allow values
560  * respectively. Zero means drop them, all else is acceptance.  We can also
561  * control acceptance of WCCPv1-style GRE packets through the
562  * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
563  * allowed as well.
564  *
565  */
566 int gre_allow = 0;
567 int gre_wccp = 0;
568 
569 void
570 greattach(int n)
571 {
572 	if_clone_attach(&gre_cloner);
573 	if_clone_attach(&mgre_cloner);
574 	if_clone_attach(&egre_cloner);
575 	if_clone_attach(&nvgre_cloner);
576 	if_clone_attach(&eoip_cloner);
577 }
578 
579 static int
580 gre_clone_create(struct if_clone *ifc, int unit)
581 {
582 	struct gre_softc *sc;
583 	struct ifnet *ifp;
584 
585 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
586 	snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
587 	    ifc->ifc_name, unit);
588 
589 	ifp = &sc->sc_if;
590 	ifp->if_softc = sc;
591 	ifp->if_type = IFT_TUNNEL;
592 	ifp->if_hdrlen = GRE_HDRLEN;
593 	ifp->if_mtu = GREMTU;
594 	ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
595 	ifp->if_xflags = IFXF_CLONED;
596 	ifp->if_output = gre_output;
597 	ifp->if_start = gre_start;
598 	ifp->if_ioctl = gre_ioctl;
599 	ifp->if_rtrequest = p2p_rtrequest;
600 
601 	sc->sc_tunnel.t_ttl = ip_defttl;
602 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
603 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
604 	sc->sc_tunnel.t_df = htons(0);
605 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
606 
607 	timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc);
608 	timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc);
609 	sc->sc_ka_state = GRE_KA_NONE;
610 
611 	if_counters_alloc(ifp);
612 	if_attach(ifp);
613 	if_alloc_sadl(ifp);
614 
615 #if NBPFILTER > 0
616 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
617 #endif
618 
619 	ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL);
620 
621 	NET_LOCK();
622 	TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry);
623 	NET_UNLOCK();
624 
625 	return (0);
626 }
627 
628 static int
629 gre_clone_destroy(struct ifnet *ifp)
630 {
631 	struct gre_softc *sc = ifp->if_softc;
632 
633 	NET_LOCK();
634 	if (ISSET(ifp->if_flags, IFF_RUNNING))
635 		gre_down(sc);
636 
637 	TAILQ_REMOVE(&gre_list, sc, sc_entry);
638 	NET_UNLOCK();
639 
640 	if_detach(ifp);
641 
642 	free(sc, M_DEVBUF, sizeof(*sc));
643 
644 	return (0);
645 }
646 
647 static int
648 mgre_clone_create(struct if_clone *ifc, int unit)
649 {
650 	struct mgre_softc *sc;
651 	struct ifnet *ifp;
652 
653 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
654 	ifp = &sc->sc_if;
655 
656 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
657 	    "%s%d", ifc->ifc_name, unit);
658 
659 	ifp->if_softc = sc;
660 	ifp->if_type = IFT_L3IPVLAN;
661 	ifp->if_hdrlen = GRE_HDRLEN;
662 	ifp->if_mtu = GREMTU;
663 	ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX;
664 	ifp->if_xflags = IFXF_CLONED;
665 	ifp->if_rtrequest = mgre_rtrequest;
666 	ifp->if_output = mgre_output;
667 	ifp->if_start = mgre_start;
668 	ifp->if_ioctl = mgre_ioctl;
669 
670 	sc->sc_tunnel.t_ttl = ip_defttl;
671 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
672 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
673 	sc->sc_tunnel.t_df = htons(0);
674 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
675 
676 	if_counters_alloc(ifp);
677 	if_attach(ifp);
678 	if_alloc_sadl(ifp);
679 
680 #if NBPFILTER > 0
681 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
682 #endif
683 
684 	return (0);
685 }
686 
687 static int
688 mgre_clone_destroy(struct ifnet *ifp)
689 {
690 	struct mgre_softc *sc = ifp->if_softc;
691 
692 	NET_LOCK();
693 	if (ISSET(ifp->if_flags, IFF_RUNNING))
694 		mgre_down(sc);
695 	NET_UNLOCK();
696 
697 	if_detach(ifp);
698 
699 	free(sc, M_DEVBUF, sizeof(*sc));
700 
701 	return (0);
702 }
703 
704 static int
705 egre_clone_create(struct if_clone *ifc, int unit)
706 {
707 	struct egre_softc *sc;
708 	struct ifnet *ifp;
709 
710 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
711 	ifp = &sc->sc_ac.ac_if;
712 
713 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
714 	    ifc->ifc_name, unit);
715 
716 	ifp->if_softc = sc;
717 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
718 	ifp->if_ioctl = egre_ioctl;
719 	ifp->if_start = egre_start;
720 	ifp->if_xflags = IFXF_CLONED;
721 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
722 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
723 	ether_fakeaddr(ifp);
724 
725 	sc->sc_tunnel.t_ttl = ip_defttl;
726 	sc->sc_tunnel.t_txhprio = 0;
727 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
728 	sc->sc_tunnel.t_df = htons(0);
729 
730 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
731 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
732 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
733 
734 	if_counters_alloc(ifp);
735 	if_attach(ifp);
736 	ether_ifattach(ifp);
737 
738 	return (0);
739 }
740 
741 static int
742 egre_clone_destroy(struct ifnet *ifp)
743 {
744 	struct egre_softc *sc = ifp->if_softc;
745 
746 	NET_LOCK();
747 	if (ISSET(ifp->if_flags, IFF_RUNNING))
748 		egre_down(sc);
749 	NET_UNLOCK();
750 
751 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
752 	ether_ifdetach(ifp);
753 	if_detach(ifp);
754 
755 	free(sc, M_DEVBUF, sizeof(*sc));
756 
757 	return (0);
758 }
759 
760 static int
761 nvgre_clone_create(struct if_clone *ifc, int unit)
762 {
763 	struct nvgre_softc *sc;
764 	struct ifnet *ifp;
765 	struct gre_tunnel *tunnel;
766 
767 	if (nvgre_pool.pr_size == 0) {
768 		pool_init(&nvgre_pool, sizeof(struct nvgre_entry), 0,
769 		    IPL_SOFTNET, 0, "nvgren", NULL);
770 	}
771 
772 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
773 	ifp = &sc->sc_ac.ac_if;
774 
775 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
776 	    ifc->ifc_name, unit);
777 
778 	ifp->if_softc = sc;
779 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
780 	ifp->if_ioctl = nvgre_ioctl;
781 	ifp->if_start = nvgre_start;
782 	ifp->if_xflags = IFXF_CLONED;
783 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
784 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
785 	ether_fakeaddr(ifp);
786 
787 	tunnel = &sc->sc_tunnel;
788 	tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL;
789 	tunnel->t_txhprio = 0;
790 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
791 	tunnel->t_df = htons(IP_DF);
792 	tunnel->t_key_mask = GRE_KEY_ENTROPY;
793 	tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) <<
794 	    GRE_KEY_ENTROPY_SHIFT);
795 
796 	mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET);
797 	task_set(&sc->sc_send_task, nvgre_send, sc);
798 
799 	rw_init(&sc->sc_ether_lock, "nvgrelk");
800 	RBT_INIT(nvgre_map, &sc->sc_ether_map);
801 	sc->sc_ether_num = 0;
802 	sc->sc_ether_max = 100;
803 	sc->sc_ether_tmo = 240 * hz;
804 	timeout_set_proc(&sc->sc_ether_age, nvgre_age, sc); /* ugh */
805 
806 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
807 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
808 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
809 
810 	if_counters_alloc(ifp);
811 	if_attach(ifp);
812 	ether_ifattach(ifp);
813 
814 	return (0);
815 }
816 
817 static int
818 nvgre_clone_destroy(struct ifnet *ifp)
819 {
820 	struct nvgre_softc *sc = ifp->if_softc;
821 
822 	NET_LOCK();
823 	if (ISSET(ifp->if_flags, IFF_RUNNING))
824 		nvgre_down(sc);
825 	NET_UNLOCK();
826 
827 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
828 	ether_ifdetach(ifp);
829 	if_detach(ifp);
830 
831 	free(sc, M_DEVBUF, sizeof(*sc));
832 
833 	return (0);
834 }
835 
836 static int
837 eoip_clone_create(struct if_clone *ifc, int unit)
838 {
839 	struct eoip_softc *sc;
840 	struct ifnet *ifp;
841 
842 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
843 	ifp = &sc->sc_ac.ac_if;
844 
845 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
846 	    ifc->ifc_name, unit);
847 
848 	ifp->if_softc = sc;
849 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
850 	ifp->if_ioctl = eoip_ioctl;
851 	ifp->if_start = eoip_start;
852 	ifp->if_xflags = IFXF_CLONED;
853 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
854 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
855 	ether_fakeaddr(ifp);
856 
857 	sc->sc_tunnel.t_ttl = ip_defttl;
858 	sc->sc_tunnel.t_txhprio = 0;
859 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
860 	sc->sc_tunnel.t_df = htons(0);
861 
862 	sc->sc_ka_timeo = 10;
863 	sc->sc_ka_count = 10;
864 
865 	timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc);
866 	timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc);
867 	sc->sc_ka_state = GRE_KA_DOWN;
868 
869 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
870 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
871 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
872 
873 	if_counters_alloc(ifp);
874 	if_attach(ifp);
875 	ether_ifattach(ifp);
876 
877 	return (0);
878 }
879 
880 static int
881 eoip_clone_destroy(struct ifnet *ifp)
882 {
883 	struct eoip_softc *sc = ifp->if_softc;
884 
885 	NET_LOCK();
886 	if (ISSET(ifp->if_flags, IFF_RUNNING))
887 		eoip_down(sc);
888 	NET_UNLOCK();
889 
890 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
891 	ether_ifdetach(ifp);
892 	if_detach(ifp);
893 
894 	free(sc, M_DEVBUF, sizeof(*sc));
895 
896 	return (0);
897 }
898 
899 int
900 gre_input(struct mbuf **mp, int *offp, int type, int af)
901 {
902 	struct mbuf *m = *mp;
903 	struct gre_tunnel key;
904 	struct ip *ip;
905 
906 	ip = mtod(m, struct ip *);
907 
908 	/* XXX check if ip_src is sane for nvgre? */
909 
910 	key.t_af = AF_INET;
911 	key.t_src4 = ip->ip_dst;
912 	key.t_dst4 = ip->ip_src;
913 
914 	if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1)
915 		return (rip_input(mp, offp, type, af));
916 
917 	return (IPPROTO_DONE);
918 }
919 
920 #ifdef INET6
921 int
922 gre_input6(struct mbuf **mp, int *offp, int type, int af)
923 {
924 	struct mbuf *m = *mp;
925 	struct gre_tunnel key;
926 	struct ip6_hdr *ip6;
927 	uint32_t flow;
928 
929 	ip6 = mtod(m, struct ip6_hdr *);
930 
931 	/* XXX check if ip6_src is sane for nvgre? */
932 
933 	key.t_af = AF_INET6;
934 	key.t_src6 = ip6->ip6_dst;
935 	key.t_dst6 = ip6->ip6_src;
936 
937 	flow = bemtoh32(&ip6->ip6_flow);
938 
939 	if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1)
940 		return (rip6_input(mp, offp, type, af));
941 
942 	return (IPPROTO_DONE);
943 }
944 #endif /* INET6 */
945 
946 static inline struct ifnet *
947 gre_find(const struct gre_tunnel *key)
948 {
949 	struct gre_softc *sc;
950 
951 	TAILQ_FOREACH(sc, &gre_list, sc_entry) {
952 		if (gre_cmp(key, &sc->sc_tunnel) != 0)
953 			continue;
954 
955 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
956 			continue;
957 
958 		return (&sc->sc_if);
959 	}
960 
961 	return (NULL);
962 }
963 
964 static inline struct ifnet *
965 mgre_find(const struct gre_tunnel *key)
966 {
967 	struct mgre_softc *sc;
968 
969 	NET_ASSERT_LOCKED();
970 	sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key);
971 	if (sc != NULL)
972 		return (&sc->sc_if);
973 
974 	return (NULL);
975 }
976 
977 static struct mbuf *
978 gre_input_1(struct gre_tunnel *key, struct mbuf *m,
979     const struct gre_header *gh, uint8_t otos, int iphlen)
980 {
981 	switch (gh->gre_proto) {
982 	case htons(ETHERTYPE_PPP):
983 #ifdef PIPEX
984 		if (pipex_enable) {
985 			struct pipex_session *session;
986 
987 			session = pipex_pptp_lookup_session(m);
988 			if (session != NULL &&
989 			    pipex_pptp_input(m, session) == NULL)
990 				return (NULL);
991 		}
992 #endif
993 		break;
994 	case htons(GRE_EOIP):
995 		return (eoip_input(key, m, gh, otos, iphlen));
996 		break;
997 	}
998 
999 	return (m);
1000 }
1001 
1002 static int
1003 gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos,
1004     struct gre_tunnel *key)
1005 {
1006 	struct mbuf *m = *mp;
1007 	int iphlen = *offp, hlen, rxprio;
1008 	struct ifnet *ifp;
1009 	const struct gre_tunnel *tunnel;
1010 	caddr_t buf;
1011 	struct gre_header *gh;
1012 	struct gre_h_key *gkh;
1013 	void (*input)(struct ifnet *, struct mbuf *);
1014 	struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *,
1015 	    uint8_t *, uint8_t);
1016 	int bpf_af = AF_UNSPEC; /* bpf */
1017 	int mcast = 0;
1018 	uint8_t itos;
1019 
1020 	if (!gre_allow)
1021 		goto decline;
1022 
1023 	key->t_rtableid = m->m_pkthdr.ph_rtableid;
1024 
1025 	hlen = iphlen + sizeof(*gh);
1026 	if (m->m_pkthdr.len < hlen)
1027 		goto decline;
1028 
1029 	m = m_pullup(m, hlen);
1030 	if (m == NULL)
1031 		return (IPPROTO_DONE);
1032 
1033 	buf = mtod(m, caddr_t);
1034 	gh = (struct gre_header *)(buf + iphlen);
1035 
1036 	/* check the version */
1037 	switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
1038 	case htons(GRE_VERS_0):
1039 		break;
1040 
1041 	case htons(GRE_VERS_1):
1042 		m = gre_input_1(key, m, gh, otos, iphlen);
1043 		if (m == NULL)
1044 			return (IPPROTO_DONE);
1045 		/* FALLTHROUGH */
1046 	default:
1047 		goto decline;
1048 	}
1049 
1050 	/* the only optional bit in the header is K flag */
1051 	if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
1052 		goto decline;
1053 
1054 	if (gh->gre_flags & htons(GRE_KP)) {
1055 		hlen += sizeof(*gkh);
1056 		if (m->m_pkthdr.len < hlen)
1057 			goto decline;
1058 
1059 		m = m_pullup(m, hlen);
1060 		if (m == NULL)
1061 			return (IPPROTO_DONE);
1062 
1063 		buf = mtod(m, caddr_t);
1064 		gh = (struct gre_header *)(buf + iphlen);
1065 		gkh = (struct gre_h_key *)(gh + 1);
1066 
1067 		key->t_key_mask = GRE_KEY_MASK;
1068 		key->t_key = gkh->gre_key;
1069 	} else
1070 		key->t_key_mask = GRE_KEY_NONE;
1071 
1072 	if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) {
1073 		if (egre_input(key, m, hlen, otos) == -1 &&
1074 		    nvgre_input(key, m, hlen, otos) == -1)
1075 			goto decline;
1076 
1077 		return (IPPROTO_DONE);
1078 	}
1079 
1080 	ifp = gre_find(key);
1081 	if (ifp == NULL) {
1082 		ifp = mgre_find(key);
1083 		if (ifp == NULL)
1084 			goto decline;
1085 	}
1086 
1087 	switch (gh->gre_proto) {
1088 	case htons(GRE_WCCP): {
1089 		struct mbuf *n;
1090 		int off;
1091 
1092 		/* WCCP/GRE:
1093 		 *   So far as I can see (and test) it seems that Cisco's WCCP
1094 		 *   GRE tunnel is precisely a IP-in-GRE tunnel that differs
1095 		 *   only in its protocol number.  At least, it works for me.
1096 		 *
1097 		 *   The Internet Drafts can be found if you look for
1098 		 *   the following:
1099 		 *     draft-forster-wrec-wccp-v1-00.txt
1100 		 *     draft-wilson-wrec-wccp-v2-01.txt
1101 		 */
1102 
1103 		if (!gre_wccp && !ISSET(ifp->if_flags, IFF_LINK0))
1104 			goto decline;
1105 
1106 		/*
1107 		 * If the first nibble of the payload does not look like
1108 		 * IPv4, assume it is WCCP v2.
1109 		 */
1110 		n = m_getptr(m, hlen, &off);
1111 		if (n == NULL)
1112 			goto decline;
1113 		if (n->m_data[off] >> 4 != IPVERSION)
1114 			hlen += sizeof(gre_wccp);
1115 
1116 		/* FALLTHROUGH */
1117 	}
1118 	case htons(ETHERTYPE_IP):
1119 #if NBPFILTER > 0
1120 		bpf_af = AF_INET;
1121 #endif
1122 		patch = gre_ipv4_patch;
1123 		input = ipv4_input;
1124 		break;
1125 #ifdef INET6
1126 	case htons(ETHERTYPE_IPV6):
1127 #if NBPFILTER > 0
1128 		bpf_af = AF_INET6;
1129 #endif
1130 		patch = gre_ipv6_patch;
1131 		input = ipv6_input;
1132 		break;
1133 #endif
1134 #ifdef MPLS
1135 	case htons(ETHERTYPE_MPLS_MCAST):
1136 		mcast = M_MCAST|M_BCAST;
1137 		/* fallthrough */
1138 	case htons(ETHERTYPE_MPLS):
1139 #if NBPFILTER > 0
1140 		bpf_af = AF_MPLS;
1141 #endif
1142 		patch = gre_mpls_patch;
1143 		input = mpls_input;
1144 		break;
1145 #endif
1146 	case htons(0):
1147 		if (ifp->if_type != IFT_TUNNEL) {
1148 			/* keepalives dont make sense for mgre */
1149 			goto decline;
1150 		}
1151 
1152 		m_adj(m, hlen);
1153 		gre_keepalive_recv(ifp, m);
1154 		return (IPPROTO_DONE);
1155 
1156 	default:
1157 		goto decline;
1158 	}
1159 
1160 	/* it's ours now */
1161 
1162 	m_adj(m, hlen);
1163 
1164 	tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */
1165 
1166 	m = (*patch)(tunnel, m, &itos, otos);
1167 	if (m == NULL)
1168 		return (IPPROTO_DONE);
1169 
1170 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
1171 		m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
1172 		    (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY);
1173 	}
1174 
1175 	rxprio = tunnel->t_rxhprio;
1176 	switch (rxprio) {
1177 	case IF_HDRPRIO_PACKET:
1178 		/* nop */
1179 		break;
1180 	case IF_HDRPRIO_OUTER:
1181 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
1182 		break;
1183 	case IF_HDRPRIO_PAYLOAD:
1184 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
1185 		break;
1186 	default:
1187 		m->m_pkthdr.pf.prio = rxprio;
1188 		break;
1189 	}
1190 
1191 	m->m_flags &= ~(M_MCAST|M_BCAST);
1192 	m->m_flags |= mcast;
1193 	m->m_pkthdr.ph_ifidx = ifp->if_index;
1194 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
1195 
1196 #if NPF > 0
1197 	pf_pkt_addr_changed(m);
1198 #endif
1199 
1200 	counters_pkt(ifp->if_counters,
1201 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
1202 
1203 #if NBPFILTER > 0
1204 	if (ifp->if_bpf)
1205 		bpf_mtap_af(ifp->if_bpf, bpf_af, m, BPF_DIRECTION_IN);
1206 #endif
1207 
1208 	(*input)(ifp, m);
1209 	return (IPPROTO_DONE);
1210 decline:
1211 	*mp = m;
1212 	return (-1);
1213 }
1214 
1215 static struct mbuf *
1216 gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1217     uint8_t *itosp, uint8_t otos)
1218 {
1219 	struct ip *ip;
1220 	uint8_t itos;
1221 
1222 	m = m_pullup(m, sizeof(*ip));
1223 	if (m == NULL)
1224 		return (NULL);
1225 
1226 	ip = mtod(m, struct ip *);
1227 
1228 	itos = ip->ip_tos;
1229 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1230 		m_freem(m);
1231 		return (NULL);
1232 	}
1233 	if (itos != ip->ip_tos)
1234 		ip_tos_patch(ip, itos);
1235 
1236 	*itosp = itos;
1237 
1238 	return (m);
1239 }
1240 
1241 #ifdef INET6
1242 static struct mbuf *
1243 gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1244     uint8_t *itosp, uint8_t otos)
1245 {
1246 	struct ip6_hdr *ip6;
1247 	uint32_t flow;
1248 	uint8_t itos;
1249 
1250 	m = m_pullup(m, sizeof(*ip6));
1251 	if (m == NULL)
1252 		return (NULL);
1253 
1254 	ip6 = mtod(m, struct ip6_hdr *);
1255 
1256 	flow = bemtoh32(&ip6->ip6_flow);
1257 	itos = flow >> 20;
1258 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1259 		m_freem(m);
1260 		return (NULL);
1261 	}
1262 
1263 	CLR(flow, 0xff << 20);
1264 	SET(flow, itos << 20);
1265 	htobem32(&ip6->ip6_flow, flow);
1266 
1267 	*itosp = itos;
1268 
1269 	return (m);
1270 }
1271 #endif
1272 
1273 #ifdef MPLS
1274 static struct mbuf *
1275 gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1276     uint8_t *itosp, uint8_t otos)
1277 {
1278 	uint8_t itos;
1279 	uint32_t shim;
1280 
1281 	m = m_pullup(m, sizeof(shim));
1282 	if (m == NULL)
1283 		return (NULL);
1284 
1285 	shim = *mtod(m, uint32_t *);
1286 	itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5;
1287 
1288 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1289 		m_freem(m);
1290 		return (NULL);
1291 	}
1292 
1293 	*itosp = itos;
1294 
1295 	return (m);
1296 }
1297 #endif
1298 
1299 #define gre_l2_prio(_t, _m, _otos) do {					\
1300 	int rxprio = (_t)->t_rxhprio;					\
1301 	switch (rxprio) {						\
1302 	case IF_HDRPRIO_PACKET:						\
1303 		/* nop */						\
1304 		break;							\
1305 	case IF_HDRPRIO_OUTER:						\
1306 		(_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos));		\
1307 		break;							\
1308 	default:							\
1309 		(_m)->m_pkthdr.pf.prio = rxprio;			\
1310 		break;							\
1311 	}								\
1312 } while (0)
1313 
1314 static int
1315 egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos)
1316 {
1317 	struct egre_softc *sc;
1318 
1319 	NET_ASSERT_LOCKED();
1320 	sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key);
1321 	if (sc == NULL)
1322 		return (-1);
1323 
1324 	/* it's ours now */
1325 	m = gre_ether_align(m, hlen);
1326 	if (m == NULL)
1327 		return (0);
1328 
1329 	if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
1330 		m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
1331 		    (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY);
1332 	}
1333 
1334 	m->m_flags &= ~(M_MCAST|M_BCAST);
1335 
1336 #if NPF > 0
1337 	pf_pkt_addr_changed(m);
1338 #endif
1339 
1340 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1341 
1342 	if_vinput(&sc->sc_ac.ac_if, m);
1343 
1344 	return (0);
1345 }
1346 
1347 static int
1348 nvgre_rtfind(struct nvgre_softc *sc, struct ifbaconf *baconf)
1349 {
1350 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1351 	struct nvgre_entry *nv;
1352 	struct ifbareq bareq;
1353 	caddr_t uaddr, end;
1354 	int error;
1355 	int age;
1356 
1357 	if (baconf->ifbac_len == 0) {
1358 		/* single read is atomic */
1359 		baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq);
1360 		return (0);
1361 	}
1362 
1363 	uaddr = baconf->ifbac_buf;
1364 	end = uaddr + baconf->ifbac_len;
1365 
1366 	rw_enter_read(&sc->sc_ether_lock);
1367 	RBT_FOREACH(nv, nvgre_map, &sc->sc_ether_map) {
1368 		if (uaddr >= end)
1369 			break;
1370 
1371 		memcpy(bareq.ifba_name, ifp->if_xname,
1372 		    sizeof(bareq.ifba_name));
1373 		memcpy(bareq.ifba_ifsname, ifp->if_xname,
1374 		    sizeof(bareq.ifba_ifsname));
1375 		memcpy(&bareq.ifba_dst, &nv->nv_dst,
1376 		    sizeof(bareq.ifba_dst));
1377 
1378 		memset(&bareq.ifba_dstsa, 0, sizeof(bareq.ifba_dstsa));
1379 		switch (sc->sc_tunnel.t_af) {
1380 		case AF_INET: {
1381 			struct sockaddr_in *sin;
1382 
1383 			sin = (struct sockaddr_in *)&bareq.ifba_dstsa;
1384 			sin->sin_len = sizeof(*sin);
1385 			sin->sin_family = AF_INET;
1386 			sin->sin_addr = nv->nv_gateway.in4;
1387 
1388 			break;
1389 		}
1390 #ifdef INET6
1391 		case AF_INET6: {
1392 			struct sockaddr_in6 *sin6;
1393 
1394 			sin6 = (struct sockaddr_in6 *)&bareq.ifba_dstsa;
1395 			sin6->sin6_len = sizeof(*sin6);
1396 			sin6->sin6_family = AF_INET6;
1397 			sin6->sin6_addr = nv->nv_gateway.in6;
1398 
1399 			break;
1400 		}
1401 #endif /* INET6 */
1402 		default:
1403 			unhandled_af(sc->sc_tunnel.t_af);
1404 		}
1405 
1406 		switch (nv->nv_type) {
1407 		case NVGRE_ENTRY_DYNAMIC:
1408 			age = (ticks - nv->nv_age) / hz;
1409 			bareq.ifba_age = MIN(age, 0xff);
1410 			bareq.ifba_flags = IFBAF_DYNAMIC;
1411 			break;
1412 		case NVGRE_ENTRY_STATIC:
1413 			bareq.ifba_age = 0;
1414 			bareq.ifba_flags = IFBAF_STATIC;
1415 			break;
1416 		}
1417 
1418 		error = copyout(&bareq, uaddr, sizeof(bareq));
1419 		if (error != 0) {
1420 			rw_exit_read(&sc->sc_ether_lock);
1421 			return (error);
1422 		}
1423 
1424 		uaddr += sizeof(bareq);
1425 	}
1426 	baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq);
1427 	rw_exit_read(&sc->sc_ether_lock);
1428 
1429 	return (0);
1430 }
1431 
1432 static void
1433 nvgre_flush_map(struct nvgre_softc *sc)
1434 {
1435 	struct nvgre_map map;
1436 	struct nvgre_entry *nv, *nnv;
1437 
1438 	rw_enter_write(&sc->sc_ether_lock);
1439 	map = sc->sc_ether_map;
1440 	RBT_INIT(nvgre_map, &sc->sc_ether_map);
1441 	sc->sc_ether_num = 0;
1442 	rw_exit_write(&sc->sc_ether_lock);
1443 
1444 	RBT_FOREACH_SAFE(nv, nvgre_map, &map, nnv) {
1445 		RBT_REMOVE(nvgre_map, &map, nv);
1446 		if (refcnt_rele(&nv->nv_refs))
1447 			pool_put(&nvgre_pool, nv);
1448 	}
1449 }
1450 
1451 static void
1452 nvgre_input_map(struct nvgre_softc *sc, const struct gre_tunnel *key,
1453     const struct ether_header *eh)
1454 {
1455 	struct nvgre_entry *nv, nkey;
1456 	int new = 0;
1457 
1458 	if (ether_isbcast(eh->ether_shost) ||
1459 	    ETHER_IS_MULTICAST(eh->ether_shost))
1460 		return;
1461 
1462 	memcpy(&nkey.nv_dst, eh->ether_shost, ETHER_ADDR_LEN);
1463 
1464 	/* remember where it came from */
1465 	rw_enter_read(&sc->sc_ether_lock);
1466 	nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &nkey);
1467 	if (nv == NULL)
1468 		new = 1;
1469 	else {
1470 		nv->nv_age = ticks;
1471 
1472 		if (nv->nv_type != NVGRE_ENTRY_DYNAMIC ||
1473 		    gre_ip_cmp(key->t_af, &key->t_dst, &nv->nv_gateway))
1474 			nv = NULL;
1475 		else
1476 			refcnt_take(&nv->nv_refs);
1477 	}
1478 	rw_exit_read(&sc->sc_ether_lock);
1479 
1480 	if (new) {
1481 		struct nvgre_entry *onv;
1482 		unsigned int num;
1483 
1484 		nv = pool_get(&nvgre_pool, PR_NOWAIT);
1485 		if (nv == NULL) {
1486 			/* oh well */
1487 			return;
1488 		}
1489 
1490 		memcpy(&nv->nv_dst, eh->ether_shost, ETHER_ADDR_LEN);
1491 		nv->nv_type = NVGRE_ENTRY_DYNAMIC;
1492 		nv->nv_gateway = key->t_dst;
1493 		refcnt_init(&nv->nv_refs);
1494 		nv->nv_age = ticks;
1495 
1496 		rw_enter_write(&sc->sc_ether_lock);
1497 		num = sc->sc_ether_num;
1498 		if (++num > sc->sc_ether_max)
1499 			onv = nv;
1500 		else {
1501 			/* try to give the ref to the map */
1502 			onv = RBT_INSERT(nvgre_map, &sc->sc_ether_map, nv);
1503 			if (onv == NULL) {
1504 				/* count the successful insert */
1505 				sc->sc_ether_num = num;
1506 			}
1507 		}
1508 		rw_exit_write(&sc->sc_ether_lock);
1509 
1510 		if (onv != NULL)
1511 			pool_put(&nvgre_pool, nv);
1512 	} else if (nv != NULL) {
1513 		rw_enter_write(&sc->sc_ether_lock);
1514 		nv->nv_gateway = key->t_dst;
1515 		rw_exit_write(&sc->sc_ether_lock);
1516 
1517 		if (refcnt_rele(&nv->nv_refs)) {
1518 			/* ioctl may have deleted the entry */
1519 			pool_put(&nvgre_pool, nv);
1520 		}
1521 	}
1522 }
1523 
1524 static inline struct nvgre_softc *
1525 nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx)
1526 {
1527 	struct nvgre_softc *sc;
1528 	int rv;
1529 
1530 	/*
1531 	 * building an nvgre_softc to use with RBT_FIND is expensive, and
1532 	 * would need to swap the src and dst addresses in the key. so do the
1533 	 * find by hand.
1534 	 */
1535 
1536 	NET_ASSERT_LOCKED();
1537 	sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree);
1538 	while (sc != NULL) {
1539 		rv = nvgre_cmp_mcast(key, &key->t_src, if0idx,
1540 		    &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0);
1541 		if (rv == 0)
1542 			return (sc);
1543 		if (rv < 0)
1544 			sc = RBT_LEFT(nvgre_mcast_tree, sc);
1545 		else
1546 			sc = RBT_RIGHT(nvgre_mcast_tree, sc);
1547 	}
1548 
1549 	return (NULL);
1550 }
1551 
1552 static inline struct nvgre_softc *
1553 nvgre_ucast_find(const struct gre_tunnel *key)
1554 {
1555 	NET_ASSERT_LOCKED();
1556 	return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree,
1557 	    (struct nvgre_softc *)key));
1558 }
1559 
1560 static int
1561 nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen,
1562     uint8_t otos)
1563 {
1564 	struct nvgre_softc *sc;
1565 
1566 	if (ISSET(m->m_flags, M_MCAST|M_BCAST))
1567 		sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx);
1568 	else
1569 		sc = nvgre_ucast_find(key);
1570 
1571 	if (sc == NULL)
1572 		return (-1);
1573 
1574 	/* it's ours now */
1575 	m = gre_ether_align(m, hlen);
1576 	if (m == NULL)
1577 		return (0);
1578 
1579 	nvgre_input_map(sc, key, mtod(m, struct ether_header *));
1580 
1581 	m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
1582 	    (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY);
1583 
1584 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1585 
1586 	m->m_flags &= ~(M_MCAST|M_BCAST);
1587 
1588 #if NPF > 0
1589 	pf_pkt_addr_changed(m);
1590 #endif
1591 
1592 	if_vinput(&sc->sc_ac.ac_if, m);
1593 
1594 	return (0);
1595 }
1596 
1597 static struct mbuf *
1598 gre_ether_align(struct mbuf *m, int hlen)
1599 {
1600 	struct mbuf *n;
1601 	int off;
1602 
1603 	m_adj(m, hlen);
1604 
1605 	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
1606 		m_freem(m);
1607 		return (NULL);
1608 	}
1609 
1610 	m = m_pullup(m, sizeof(struct ether_header));
1611 	if (m == NULL)
1612 		return (NULL);
1613 
1614 	n = m_getptr(m, sizeof(struct ether_header), &off);
1615 	if (n == NULL) {
1616 		m_freem(m);
1617 		return (NULL);
1618 	}
1619 
1620 	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
1621 		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
1622 		m_freem(m);
1623 		if (n == NULL)
1624 			return (NULL);
1625 		m = n;
1626 	}
1627 
1628 	return (m);
1629 }
1630 
1631 static void
1632 gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m)
1633 {
1634 	struct gre_softc *sc = ifp->if_softc;
1635 	struct gre_keepalive *gk;
1636 	SIPHASH_CTX ctx;
1637 	uint8_t digest[SIPHASH_DIGEST_LENGTH];
1638 	int uptime, delta;
1639 	int tick = ticks;
1640 
1641 	if (sc->sc_ka_state == GRE_KA_NONE ||
1642 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
1643 		goto drop;
1644 
1645 	if (m->m_pkthdr.len < sizeof(*gk))
1646 		goto drop;
1647 	m = m_pullup(m, sizeof(*gk));
1648 	if (m == NULL)
1649 		return;
1650 
1651 	gk = mtod(m, struct gre_keepalive *);
1652 	uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias;
1653 	delta = tick - uptime;
1654 	if (delta < 0)
1655 		goto drop;
1656 	if (delta > hz * 10) /* magic */
1657 		goto drop;
1658 
1659 	/* avoid too much siphash work */
1660 	delta = tick - sc->sc_ka_recvtm;
1661 	if (delta > 0 && delta < (hz / 10))
1662 		goto drop;
1663 
1664 	SipHash24_Init(&ctx, &sc->sc_ka_key);
1665 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
1666 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
1667 	SipHash24_Final(digest, &ctx);
1668 
1669 	if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0)
1670 		goto drop;
1671 
1672 	sc->sc_ka_recvtm = tick;
1673 
1674 	switch (sc->sc_ka_state) {
1675 	case GRE_KA_DOWN:
1676 		sc->sc_ka_state = GRE_KA_HOLD;
1677 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
1678 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
1679 		    16 * sc->sc_ka_count);
1680 		break;
1681 	case GRE_KA_HOLD:
1682 		if (--sc->sc_ka_holdcnt > 0)
1683 			break;
1684 
1685 		sc->sc_ka_state = GRE_KA_UP;
1686 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
1687 		break;
1688 
1689 	case GRE_KA_UP:
1690 		sc->sc_ka_holdmax--;
1691 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
1692 		break;
1693 	}
1694 
1695 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
1696 
1697 drop:
1698 	m_freem(m);
1699 }
1700 
1701 static int
1702 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1703     struct rtentry *rt)
1704 {
1705 	struct m_tag *mtag;
1706 	int error = 0;
1707 
1708 	if (!gre_allow) {
1709 		error = EACCES;
1710 		goto drop;
1711 	}
1712 
1713 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1714 		error = ENETDOWN;
1715 		goto drop;
1716 	}
1717 
1718 	switch (dst->sa_family) {
1719 	case AF_INET:
1720 #ifdef INET6
1721 	case AF_INET6:
1722 #endif
1723 #ifdef MPLS
1724 	case AF_MPLS:
1725 #endif
1726 		break;
1727 	default:
1728 		error = EAFNOSUPPORT;
1729 		goto drop;
1730 	}
1731 
1732 	/* Try to limit infinite recursion through misconfiguration. */
1733 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1734 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1735 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1736 		    sizeof(ifp->if_index)) == 0) {
1737 			m_freem(m);
1738 			error = EIO;
1739 			goto end;
1740 		}
1741 	}
1742 
1743 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1744 	if (mtag == NULL) {
1745 		m_freem(m);
1746 		error = ENOBUFS;
1747 		goto end;
1748 	}
1749 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1750 	m_tag_prepend(m, mtag);
1751 
1752 	m->m_pkthdr.ph_family = dst->sa_family;
1753 
1754 	error = if_enqueue(ifp, m);
1755 end:
1756 	if (error)
1757 		ifp->if_oerrors++;
1758 	return (error);
1759 
1760 drop:
1761 	m_freem(m);
1762 	return (error);
1763 }
1764 
1765 void
1766 gre_start(struct ifnet *ifp)
1767 {
1768 	struct gre_softc *sc = ifp->if_softc;
1769 	struct mbuf *m;
1770 	int af;
1771 #if NBPFILTER > 0
1772 	caddr_t if_bpf;
1773 #endif
1774 
1775 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1776 		af = m->m_pkthdr.ph_family;
1777 
1778 #if NBPFILTER > 0
1779 		if_bpf = ifp->if_bpf;
1780 		if (if_bpf)
1781 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
1782 #endif
1783 
1784 		m = gre_l3_encap(&sc->sc_tunnel, m, af);
1785 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1786 			ifp->if_oerrors++;
1787 			continue;
1788 		}
1789 	}
1790 }
1791 
1792 void
1793 mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1794 {
1795 	struct ifnet *lo0ifp;
1796 	struct ifaddr *ifa, *lo0ifa;
1797 
1798 	switch (req) {
1799 	case RTM_ADD:
1800 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1801 			break;
1802 
1803 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1804 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1805 			    rt_key(rt)->sa_len) == 0)
1806 				break;
1807 		}
1808 
1809 		if (ifa == NULL)
1810 			break;
1811 
1812 		KASSERT(ifa == rt->rt_ifa);
1813 
1814 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1815 		KASSERT(lo0ifp != NULL);
1816 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1817 			if (lo0ifa->ifa_addr->sa_family ==
1818 			    ifa->ifa_addr->sa_family)
1819 				break;
1820 		}
1821 		if_put(lo0ifp);
1822 
1823 		if (lo0ifa == NULL)
1824 			break;
1825 
1826 		rt->rt_flags &= ~RTF_LLINFO;
1827 		break;
1828 	case RTM_DELETE:
1829 	case RTM_RESOLVE:
1830 	default:
1831 		break;
1832 	}
1833 }
1834 
1835 static int
1836 mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest,
1837     struct rtentry *rt0)
1838 {
1839 	struct mgre_softc *sc = ifp->if_softc;
1840 	struct sockaddr *gate;
1841 	struct rtentry *rt;
1842 	struct m_tag *mtag;
1843 	int error = 0;
1844 	sa_family_t af;
1845 	const void *addr;
1846 
1847 	if (!gre_allow) {
1848 		error = EACCES;
1849 		goto drop;
1850 	}
1851 
1852 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1853 		error = ENETDOWN;
1854 		goto drop;
1855 	}
1856 
1857 	switch (dest->sa_family) {
1858 	case AF_INET:
1859 #ifdef INET6
1860 	case AF_INET6:
1861 #endif
1862 #ifdef MPLS
1863 	case AF_MPLS:
1864 #endif
1865 		break;
1866 	default:
1867 		error = EAFNOSUPPORT;
1868 		goto drop;
1869 	}
1870 
1871 	if (ISSET(m->m_flags, M_MCAST|M_BCAST)) {
1872 		error = ENETUNREACH;
1873 		goto drop;
1874 	}
1875 
1876 	rt = rt_getll(rt0);
1877 
1878 	/* chech rt_expire? */
1879 	if (ISSET(rt->rt_flags, RTF_REJECT)) {
1880 		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
1881 		goto drop;
1882 	}
1883 	if (!ISSET(rt->rt_flags, RTF_HOST)) {
1884 		error = EHOSTUNREACH;
1885 		goto drop;
1886 	}
1887 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1888 		error = EINVAL;
1889 		goto drop;
1890 	}
1891 
1892 	gate = rt->rt_gateway;
1893 	af = gate->sa_family;
1894 	if (af != sc->sc_tunnel.t_af) {
1895 		error = EAGAIN;
1896 		goto drop;
1897 	}
1898 
1899 	/* Try to limit infinite recursion through misconfiguration. */
1900 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1901 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1902 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1903 		    sizeof(ifp->if_index)) == 0) {
1904 			error = EIO;
1905 			goto drop;
1906 		}
1907 	}
1908 
1909 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1910 	if (mtag == NULL) {
1911 		error = ENOBUFS;
1912 		goto drop;
1913 	}
1914 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1915 	m_tag_prepend(m, mtag);
1916 
1917 	switch (af) {
1918 	case AF_INET: {
1919 		struct sockaddr_in *sin = (struct sockaddr_in *)gate;
1920 		addr = &sin->sin_addr;
1921 		break;
1922 	}
1923 #ifdef INET6
1924 	case AF_INET6: {
1925 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate;
1926 		addr = &sin6->sin6_addr;
1927 		break;
1928 	}
1929  #endif
1930 	default:
1931 		unhandled_af(af);
1932 		/* NOTREACHED */
1933 	}
1934 
1935 	m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family);
1936 	if (m == NULL)
1937 		return (ENOBUFS);
1938 
1939 	m->m_pkthdr.ph_family = dest->sa_family;
1940 
1941 	error = if_enqueue(ifp, m);
1942 	if (error)
1943 		ifp->if_oerrors++;
1944 	return (error);
1945 
1946 drop:
1947 	m_freem(m);
1948 	return (error);
1949 }
1950 
1951 static void
1952 mgre_start(struct ifnet *ifp)
1953 {
1954 	struct mgre_softc *sc = ifp->if_softc;
1955 	struct mbuf *m;
1956 #if NBPFILTER > 0
1957 	caddr_t if_bpf;
1958 #endif
1959 
1960 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1961 #if NBPFILTER > 0
1962 		if_bpf = ifp->if_bpf;
1963 		if (if_bpf) {
1964 			struct m_hdr mh;
1965 			struct mbuf *n;
1966 			int off;
1967 
1968 			n = m_getptr(m, ifp->if_hdrlen, &off);
1969 			KASSERT(n != NULL);
1970 
1971 			mh.mh_flags = 0;
1972 			mh.mh_next = n->m_next;
1973 			mh.mh_len = n->m_len - off;
1974 			mh.mh_data = n->m_data + off;
1975 
1976 			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
1977 			    (struct mbuf *)&mh, BPF_DIRECTION_OUT);
1978 		}
1979 #endif
1980 
1981 		if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
1982  			ifp->if_oerrors++;
1983  			continue;
1984  		}
1985 	}
1986 }
1987 
1988 static void
1989 egre_start(struct ifnet *ifp)
1990 {
1991 	struct egre_softc *sc = ifp->if_softc;
1992 	struct mbuf *m0, *m;
1993 #if NBPFILTER > 0
1994 	caddr_t if_bpf;
1995 #endif
1996 
1997 	if (!gre_allow) {
1998 		ifq_purge(&ifp->if_snd);
1999 		return;
2000 	}
2001 
2002 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
2003 #if NBPFILTER > 0
2004 		if_bpf = ifp->if_bpf;
2005 		if (if_bpf)
2006 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
2007 #endif
2008 
2009 		/* force prepend mbuf because of alignment problems */
2010 		m = m_get(M_DONTWAIT, m0->m_type);
2011 		if (m == NULL) {
2012 			m_freem(m0);
2013 			continue;
2014 		}
2015 
2016 		M_MOVE_PKTHDR(m, m0);
2017 		m->m_next = m0;
2018 
2019 		m_align(m, 0);
2020 		m->m_len = 0;
2021 
2022 		m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER),
2023 		    sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m));
2024 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
2025 			ifp->if_oerrors++;
2026 			continue;
2027 		}
2028 	}
2029 }
2030 
2031 static struct mbuf *
2032 gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst,
2033     struct mbuf *m, sa_family_t af)
2034 {
2035 	uint16_t proto;
2036 	uint8_t ttl, itos, otos;
2037 	int tttl = tunnel->t_ttl;
2038 	int ttloff;
2039 
2040 	switch (af) {
2041 	case AF_INET: {
2042 		struct ip *ip;
2043 
2044 		m = m_pullup(m, sizeof(*ip));
2045 		if (m == NULL)
2046 			return (NULL);
2047 
2048 		ip = mtod(m, struct ip *);
2049 		itos = ip->ip_tos;
2050 
2051 		ttloff = offsetof(struct ip, ip_ttl);
2052 		proto = htons(ETHERTYPE_IP);
2053 		break;
2054 	}
2055 #ifdef INET6
2056 	case AF_INET6: {
2057 		struct ip6_hdr *ip6;
2058 
2059 		m = m_pullup(m, sizeof(*ip6));
2060 		if (m == NULL)
2061 			return (NULL);
2062 
2063 		ip6 = mtod(m, struct ip6_hdr *);
2064 		itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20;
2065 
2066 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
2067 		proto = htons(ETHERTYPE_IPV6);
2068 		break;
2069 	}
2070  #endif
2071 #ifdef MPLS
2072 	case AF_MPLS: {
2073 		uint32_t shim;
2074 
2075 		m = m_pullup(m, sizeof(shim));
2076 		if (m == NULL)
2077 			return (NULL);
2078 
2079 		shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK;
2080 		itos = (shim >> MPLS_EXP_OFFSET) << 5;
2081 
2082 		ttloff = 3;
2083 
2084 		if (m->m_flags & (M_BCAST | M_MCAST))
2085 			proto = htons(ETHERTYPE_MPLS_MCAST);
2086 		else
2087 			proto = htons(ETHERTYPE_MPLS);
2088 		break;
2089 	}
2090 #endif
2091 	default:
2092 		unhandled_af(af);
2093 	}
2094 
2095 	if (tttl == -1) {
2096 		KASSERT(m->m_len > ttloff); /* m_pullup has happened */
2097 
2098 		ttl = *(m->m_data + ttloff);
2099 	} else
2100 		ttl = tttl;
2101 
2102 	itos = gre_l3_tos(tunnel, m, itos);
2103 	ip_ecn_ingress(tunnel->t_ecn, &otos, &itos);
2104 
2105 	return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos));
2106 }
2107 
2108 static struct mbuf *
2109 gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst,
2110     struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos)
2111 {
2112 	struct gre_header *gh;
2113 	struct gre_h_key *gkh;
2114 	int hlen;
2115 
2116 	hlen = sizeof(*gh);
2117 	if (tunnel->t_key_mask != GRE_KEY_NONE)
2118 		hlen += sizeof(*gkh);
2119 
2120 	m = m_prepend(m, hlen, M_DONTWAIT);
2121 	if (m == NULL)
2122 		return (NULL);
2123 
2124 	gh = mtod(m, struct gre_header *);
2125 	gh->gre_flags = GRE_VERS_0;
2126 	gh->gre_proto = proto;
2127 	if (tunnel->t_key_mask != GRE_KEY_NONE) {
2128 		gh->gre_flags |= htons(GRE_KP);
2129 
2130 		gkh = (struct gre_h_key *)(gh + 1);
2131 		gkh->gre_key = tunnel->t_key;
2132 
2133 		if (tunnel->t_key_mask == GRE_KEY_ENTROPY &&
2134 		    ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) {
2135 			gkh->gre_key |= htonl(~GRE_KEY_ENTROPY &
2136 			    (m->m_pkthdr.ph_flowid & M_FLOWID_MASK));
2137 		}
2138 	}
2139 
2140 	return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos));
2141 }
2142 
2143 static struct mbuf *
2144 gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst,
2145     struct mbuf *m, uint8_t ttl, uint8_t tos)
2146 {
2147 	switch (tunnel->t_af) {
2148 	case AF_INET: {
2149 		struct ip *ip;
2150 
2151 		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
2152 		if (m == NULL)
2153 			return (NULL);
2154 
2155 		ip = mtod(m, struct ip *);
2156 		ip->ip_v = IPVERSION;
2157 		ip->ip_hl = sizeof(*ip) >> 2;
2158 		ip->ip_off = tunnel->t_df;
2159 		ip->ip_tos = tos;
2160 		ip->ip_len = htons(m->m_pkthdr.len);
2161 		ip->ip_ttl = ttl;
2162 		ip->ip_p = IPPROTO_GRE;
2163 		ip->ip_src = tunnel->t_src4;
2164 		ip->ip_dst = dst->in4;
2165 		break;
2166 	}
2167 #ifdef INET6
2168 	case AF_INET6: {
2169 		struct ip6_hdr *ip6;
2170 		int len = m->m_pkthdr.len;
2171 
2172 		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
2173 		if (m == NULL)
2174 			return (NULL);
2175 
2176 		ip6 = mtod(m, struct ip6_hdr *);
2177 		ip6->ip6_flow = ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID) ?
2178 		    htonl(m->m_pkthdr.ph_flowid & M_FLOWID_MASK) : 0;
2179 		ip6->ip6_vfc |= IPV6_VERSION;
2180 		ip6->ip6_flow |= htonl((uint32_t)tos << 20);
2181 		ip6->ip6_plen = htons(len);
2182 		ip6->ip6_nxt = IPPROTO_GRE;
2183 		ip6->ip6_hlim = ttl;
2184 		ip6->ip6_src = tunnel->t_src6;
2185 		ip6->ip6_dst = dst->in6;
2186 
2187 		if (tunnel->t_df)
2188 			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
2189 
2190 		break;
2191 	}
2192 #endif /* INET6 */
2193 	default:
2194 		panic("%s: unsupported af %d in %p", __func__, tunnel->t_af,
2195 		    tunnel);
2196 	}
2197 
2198 	return (m);
2199 }
2200 
2201 static int
2202 gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m)
2203 {
2204 	m->m_flags &= ~(M_BCAST|M_MCAST);
2205 	m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
2206 
2207 #if NPF > 0
2208 	pf_pkt_addr_changed(m);
2209 #endif
2210 
2211 	switch (tunnel->t_af) {
2212 	case AF_INET:
2213 		ip_send(m);
2214 		break;
2215 #ifdef INET6
2216 	case AF_INET6:
2217 		ip6_send(m);
2218 		break;
2219 #endif
2220 	default:
2221 		panic("%s: unsupported af %d in %p", __func__, tunnel->t_af,
2222 		    tunnel);
2223 	}
2224 
2225 	return (0);
2226 }
2227 
2228 static int
2229 gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel,
2230     u_long cmd, void *data)
2231 {
2232 	struct ifreq *ifr = (struct ifreq *)data;
2233 	int error = 0;
2234 
2235 	switch(cmd) {
2236 	case SIOCSIFMTU:
2237 		if (ifr->ifr_mtu < 576) {
2238 			error = EINVAL;
2239 			break;
2240 		}
2241 		ifp->if_mtu = ifr->ifr_mtu;
2242 		break;
2243 	case SIOCADDMULTI:
2244 	case SIOCDELMULTI:
2245 		break;
2246 
2247 	case SIOCSVNETID:
2248 		error = gre_set_vnetid(tunnel, ifr);
2249 		break;
2250 
2251 	case SIOCGVNETID:
2252 		error = gre_get_vnetid(tunnel, ifr);
2253 		break;
2254 	case SIOCDVNETID:
2255 		error = gre_del_vnetid(tunnel);
2256 		break;
2257 
2258 	case SIOCSVNETFLOWID:
2259 		error = gre_set_vnetflowid(tunnel, ifr);
2260 		break;
2261 
2262 	case SIOCGVNETFLOWID:
2263 		error = gre_get_vnetflowid(tunnel, ifr);
2264 		break;
2265 
2266 	case SIOCSLIFPHYADDR:
2267 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1);
2268 		break;
2269 	case SIOCGLIFPHYADDR:
2270 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2271 		break;
2272 	case SIOCDIFPHYADDR:
2273 		error = gre_del_tunnel(tunnel);
2274 		break;
2275 
2276 	case SIOCSLIFPHYRTABLE:
2277 		if (ifr->ifr_rdomainid < 0 ||
2278 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2279 		    !rtable_exists(ifr->ifr_rdomainid)) {
2280 			error = EINVAL;
2281 			break;
2282 		}
2283 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2284 		break;
2285 	case SIOCGLIFPHYRTABLE:
2286 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2287 		break;
2288 
2289 	case SIOCSLIFPHYDF:
2290 		/* commit */
2291 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2292 		break;
2293 	case SIOCGLIFPHYDF:
2294 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2295 		break;
2296 
2297 	default:
2298 		error = ENOTTY;
2299 		break;
2300 	}
2301 
2302 	return (error);
2303 }
2304 
2305 static uint8_t
2306 gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m)
2307 {
2308 	uint8_t prio;
2309 
2310 	switch (t->t_txhprio) {
2311 	case IF_HDRPRIO_PACKET:
2312 		prio = m->m_pkthdr.pf.prio;
2313 		break;
2314 	default:
2315 		prio = t->t_txhprio;
2316 		break;
2317 	}
2318 
2319 	return (IFQ_PRIO2TOS(prio));
2320 }
2321 
2322 static uint8_t
2323 gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos)
2324 {
2325 	uint8_t prio;
2326 
2327 	switch (t->t_txhprio) {
2328 	case IF_HDRPRIO_PAYLOAD:
2329 		return (tos);
2330 	case IF_HDRPRIO_PACKET:
2331 		prio = m->m_pkthdr.pf.prio;
2332 		break;
2333 	default:
2334 		prio = t->t_txhprio;
2335 		break;
2336 	}
2337 
2338 	return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK));
2339 }
2340 
2341 static int
2342 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2343 {
2344 	struct gre_softc *sc = ifp->if_softc;
2345 	struct ifreq *ifr = (struct ifreq *)data;
2346 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2347 	int error = 0;
2348 
2349 	switch(cmd) {
2350 	case SIOCSIFADDR:
2351 		ifp->if_flags |= IFF_UP;
2352 		/* FALLTHROUGH */
2353 	case SIOCSIFFLAGS:
2354 		if (ISSET(ifp->if_flags, IFF_UP)) {
2355 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2356 				error = gre_up(sc);
2357 			else
2358 				error = 0;
2359 		} else {
2360 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2361 				error = gre_down(sc);
2362 		}
2363 		break;
2364 	case SIOCSIFRDOMAIN:
2365 		/* let if_rdomain do its thing */
2366 		error = ENOTTY;
2367 		break;
2368 
2369 	case SIOCSETKALIVE:
2370 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2371 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 ||
2372 		    (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0))
2373 			return (EINVAL);
2374 
2375 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2376 			sc->sc_ka_count = 0;
2377 			sc->sc_ka_timeo = 0;
2378 			sc->sc_ka_state = GRE_KA_NONE;
2379 		} else {
2380 			sc->sc_ka_count = ikar->ikar_cnt;
2381 			sc->sc_ka_timeo = ikar->ikar_timeo;
2382 			sc->sc_ka_state = GRE_KA_DOWN;
2383 
2384 			arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key));
2385 			sc->sc_ka_bias = arc4random();
2386 			sc->sc_ka_holdmax = sc->sc_ka_count;
2387 
2388 			sc->sc_ka_recvtm = ticks - hz;
2389 			timeout_add(&sc->sc_ka_send, 1);
2390 			timeout_add_sec(&sc->sc_ka_hold,
2391 			    sc->sc_ka_timeo * sc->sc_ka_count);
2392 		}
2393 		break;
2394 
2395 	case SIOCGETKALIVE:
2396 		ikar->ikar_cnt = sc->sc_ka_count;
2397 		ikar->ikar_timeo = sc->sc_ka_timeo;
2398 		break;
2399 
2400 	case SIOCSLIFPHYTTL:
2401 		if (ifr->ifr_ttl != -1 &&
2402 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2403 			error = EINVAL;
2404 			break;
2405 		}
2406 
2407 		/* commit */
2408 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2409 		break;
2410 
2411 	case SIOCGLIFPHYTTL:
2412 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2413 		break;
2414 
2415 	case SIOCSLIFPHYECN:
2416 		sc->sc_tunnel.t_ecn =
2417 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2418 		break;
2419 	case SIOCGLIFPHYECN:
2420 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2421 		break;
2422 
2423 	case SIOCSTXHPRIO:
2424 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2425 		if (error != 0)
2426 			break;
2427 
2428 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2429 		break;
2430 	case SIOCGTXHPRIO:
2431 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2432 		break;
2433 
2434 	case SIOCSRXHPRIO:
2435 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2436 		if (error != 0)
2437 			break;
2438 
2439 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2440 		break;
2441 	case SIOCGRXHPRIO:
2442 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2443 		break;
2444 
2445 	default:
2446 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2447 		break;
2448 	}
2449 
2450 	return (error);
2451 }
2452 
2453 static int
2454 mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2455 {
2456 	struct mgre_softc *sc = ifp->if_softc;
2457 	struct ifreq *ifr = (struct ifreq *)data;
2458 	int error = 0;
2459 
2460 	switch(cmd) {
2461 	case SIOCSIFADDR:
2462 		break;
2463 	case SIOCSIFFLAGS:
2464 		if (ISSET(ifp->if_flags, IFF_UP)) {
2465 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2466 				error = mgre_up(sc);
2467 			else
2468 				error = 0;
2469 		} else {
2470 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2471 				error = mgre_down(sc);
2472 		}
2473 		break;
2474 
2475 	case SIOCSLIFPHYTTL:
2476 		if (ifr->ifr_ttl != -1 &&
2477 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2478 			error = EINVAL;
2479 			break;
2480 		}
2481 
2482 		/* commit */
2483 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2484 		break;
2485 
2486 	case SIOCGLIFPHYTTL:
2487 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2488 		break;
2489 
2490 	case SIOCSLIFPHYECN:
2491 		sc->sc_tunnel.t_ecn =
2492 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2493 		break;
2494 	case SIOCGLIFPHYECN:
2495 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2496 		break;
2497 
2498 	case SIOCSLIFPHYADDR:
2499 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2500 			error = EBUSY;
2501 			break;
2502 		}
2503 		error = mgre_set_tunnel(sc, (struct if_laddrreq *)data);
2504 		break;
2505 	case SIOCGLIFPHYADDR:
2506 		error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
2507 		break;
2508 
2509 	case SIOCSTXHPRIO:
2510 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2511 		if (error != 0)
2512 			break;
2513 
2514 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2515 		break;
2516 	case SIOCGTXHPRIO:
2517 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2518 		break;
2519 
2520 	case SIOCSRXHPRIO:
2521 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2522 		if (error != 0)
2523 			break;
2524 
2525 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2526 		break;
2527 	case SIOCGRXHPRIO:
2528 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2529 		break;
2530 
2531 	case SIOCSVNETID:
2532 	case SIOCDVNETID:
2533 	case SIOCDIFPHYADDR:
2534 	case SIOCSLIFPHYRTABLE:
2535 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2536 			error = EBUSY;
2537 			break;
2538 		}
2539 
2540 		/* FALLTHROUGH */
2541 	default:
2542 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2543  		break;
2544  	}
2545 
2546 	return (error);
2547 }
2548 
2549 static int
2550 mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2551 {
2552 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2553 	struct sockaddr *addr = (struct sockaddr *)&req->addr;
2554 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2555 	struct sockaddr_in *addr4;
2556 #ifdef INET6
2557 	struct sockaddr_in6 *addr6;
2558 	int error;
2559 #endif
2560 
2561 	if (dstaddr->sa_family != AF_UNSPEC)
2562 		return (EINVAL);
2563 
2564 	/* validate */
2565 	switch (addr->sa_family) {
2566 	case AF_INET:
2567 		if (addr->sa_len != sizeof(*addr4))
2568 			return (EINVAL);
2569 
2570 		addr4 = (struct sockaddr_in *)addr;
2571 		if (in_nullhost(addr4->sin_addr) ||
2572 		    IN_MULTICAST(addr4->sin_addr.s_addr))
2573 			return (EINVAL);
2574 
2575 		tunnel->t_src4 = addr4->sin_addr;
2576 		tunnel->t_dst4.s_addr = INADDR_ANY;
2577 
2578 		break;
2579 #ifdef INET6
2580 	case AF_INET6:
2581 		if (addr->sa_len != sizeof(*addr6))
2582 			return (EINVAL);
2583 
2584 		addr6 = (struct sockaddr_in6 *)addr;
2585 		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) ||
2586 		    IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr))
2587 			return (EINVAL);
2588 
2589 		error = in6_embedscope(&tunnel->t_src6, addr6, NULL);
2590 		if (error != 0)
2591 			return (error);
2592 
2593 		memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
2594 
2595 		break;
2596 #endif
2597 	default:
2598 		return (EAFNOSUPPORT);
2599 	}
2600 
2601 	/* commit */
2602 	tunnel->t_af = addr->sa_family;
2603 
2604 	return (0);
2605 }
2606 
2607 static int
2608 mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2609 {
2610 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2611 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2612 	struct sockaddr_in *sin;
2613 #ifdef INET6
2614 	struct sockaddr_in6 *sin6;
2615 #endif
2616 
2617 	switch (tunnel->t_af) {
2618 	case AF_UNSPEC:
2619 		return (EADDRNOTAVAIL);
2620 	case AF_INET:
2621 		sin = (struct sockaddr_in *)&req->addr;
2622 		memset(sin, 0, sizeof(*sin));
2623 		sin->sin_family = AF_INET;
2624 		sin->sin_len = sizeof(*sin);
2625 		sin->sin_addr = tunnel->t_src4;
2626 		break;
2627 
2628 #ifdef INET6
2629 	case AF_INET6:
2630 		sin6 = (struct sockaddr_in6 *)&req->addr;
2631 		memset(sin6, 0, sizeof(*sin6));
2632 		sin6->sin6_family = AF_INET6;
2633 		sin6->sin6_len = sizeof(*sin6);
2634 		in6_recoverscope(sin6, &tunnel->t_src6);
2635 		break;
2636 #endif
2637 	default:
2638 		unhandled_af(tunnel->t_af);
2639 	}
2640 
2641 	dstaddr->sa_len = 2;
2642 	dstaddr->sa_family = AF_UNSPEC;
2643 
2644 	return (0);
2645 }
2646 
2647 static int
2648 egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2649 {
2650 	struct egre_softc *sc = ifp->if_softc;
2651 	struct ifreq *ifr = (struct ifreq *)data;
2652 	int error = 0;
2653 
2654 	switch(cmd) {
2655 	case SIOCSIFADDR:
2656 		break;
2657 	case SIOCSIFFLAGS:
2658 		if (ISSET(ifp->if_flags, IFF_UP)) {
2659 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2660 				error = egre_up(sc);
2661 			else
2662 				error = 0;
2663 		} else {
2664 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2665 				error = egre_down(sc);
2666 		}
2667 		break;
2668 
2669 	case SIOCSLIFPHYTTL:
2670 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2671 			error = EINVAL;
2672 			break;
2673 		}
2674 
2675 		/* commit */
2676 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2677 		break;
2678 
2679 	case SIOCGLIFPHYTTL:
2680 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2681 		break;
2682 
2683 	case SIOCSTXHPRIO:
2684 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2685 		if (error != 0)
2686 			break;
2687 
2688 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2689 		break;
2690 	case SIOCGTXHPRIO:
2691 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2692 		break;
2693 
2694 	case SIOCSRXHPRIO:
2695 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2696 		if (error != 0)
2697 			break;
2698 
2699 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2700 		break;
2701 	case SIOCGRXHPRIO:
2702 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2703 		break;
2704 
2705 	case SIOCSVNETID:
2706 	case SIOCDVNETID:
2707 	case SIOCSVNETFLOWID:
2708 	case SIOCSLIFPHYADDR:
2709 	case SIOCDIFPHYADDR:
2710 	case SIOCSLIFPHYRTABLE:
2711 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2712 			error = EBUSY;
2713 			break;
2714 		}
2715 
2716 		/* FALLTHROUGH */
2717 	default:
2718 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2719 		if (error == ENOTTY)
2720 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2721 		break;
2722 	}
2723 
2724 	if (error == ENETRESET) {
2725 		/* no hardware to program */
2726 		error = 0;
2727 	}
2728 
2729 	return (error);
2730 }
2731 
2732 static int
2733 nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2734 {
2735 	struct nvgre_softc *sc = ifp->if_softc;
2736 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2737 
2738 	struct ifreq *ifr = (struct ifreq *)data;
2739 	struct if_parent *parent = (struct if_parent *)data;
2740 	struct ifbrparam *bparam = (struct ifbrparam *)data;
2741 	struct ifnet *ifp0;
2742 
2743 	int error = 0;
2744 
2745 	switch (cmd) {
2746 	case SIOCSIFADDR:
2747 		break;
2748 	case SIOCSIFFLAGS:
2749 		if (ISSET(ifp->if_flags, IFF_UP)) {
2750 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2751 				error = nvgre_up(sc);
2752 			else
2753 				error = ENETRESET;
2754 		} else {
2755 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2756 				error = nvgre_down(sc);
2757 		}
2758 		break;
2759 
2760 	case SIOCSLIFPHYADDR:
2761 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2762 			error = EBUSY;
2763 			break;
2764 		}
2765 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0);
2766 		if (error == 0)
2767 			nvgre_flush_map(sc);
2768 		break;
2769 	case SIOCGLIFPHYADDR:
2770 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2771 		break;
2772 	case SIOCDIFPHYADDR:
2773 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2774 			error = EBUSY;
2775 			break;
2776 		}
2777 		error = gre_del_tunnel(tunnel);
2778 		if (error == 0)
2779 			nvgre_flush_map(sc);
2780 		break;
2781 
2782 	case SIOCSIFPARENT:
2783 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2784 			error = EBUSY;
2785 			break;
2786 		}
2787 		error = nvgre_set_parent(sc, parent->ifp_parent);
2788 		if (error == 0)
2789 			nvgre_flush_map(sc);
2790 		break;
2791 	case SIOCGIFPARENT:
2792 		ifp0 = if_get(sc->sc_ifp0);
2793 		if (ifp0 == NULL)
2794 			error = EADDRNOTAVAIL;
2795 		else {
2796 			memcpy(parent->ifp_parent, ifp0->if_xname,
2797 			    sizeof(parent->ifp_parent));
2798 		}
2799 		if_put(ifp0);
2800 		break;
2801 	case SIOCDIFPARENT:
2802 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2803 			error = EBUSY;
2804 			break;
2805 		}
2806 		/* commit */
2807 		sc->sc_ifp0 = 0;
2808 		nvgre_flush_map(sc);
2809 		break;
2810 
2811 	case SIOCSVNETID:
2812 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2813 			error = EBUSY;
2814 			break;
2815 		}
2816 		if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN ||
2817 		    ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) {
2818 			error = EINVAL;
2819 			break;
2820 		}
2821 
2822 		/* commit */
2823 		tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT);
2824 		nvgre_flush_map(sc);
2825 		break;
2826 	case SIOCGVNETID:
2827 		error = gre_get_vnetid(tunnel, ifr);
2828 		break;
2829 
2830 	case SIOCSLIFPHYRTABLE:
2831 		if (ifr->ifr_rdomainid < 0 ||
2832 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2833 		    !rtable_exists(ifr->ifr_rdomainid)) {
2834 			error = EINVAL;
2835 			break;
2836 		}
2837 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2838 		nvgre_flush_map(sc);
2839 		break;
2840 	case SIOCGLIFPHYRTABLE:
2841 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2842 		break;
2843 
2844 	case SIOCSLIFPHYDF:
2845 		/* commit */
2846 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2847 		break;
2848 	case SIOCGLIFPHYDF:
2849 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2850 		break;
2851 
2852 	case SIOCSLIFPHYTTL:
2853 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2854 			error = EINVAL;
2855 			break;
2856 		}
2857 
2858 		/* commit */
2859 		tunnel->t_ttl = ifr->ifr_ttl;
2860 		break;
2861 
2862 	case SIOCGLIFPHYTTL:
2863 		ifr->ifr_ttl = tunnel->t_ttl;
2864 		break;
2865 
2866 	case SIOCSTXHPRIO:
2867 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2868 		if (error != 0)
2869 			break;
2870 
2871 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2872 		break;
2873 	case SIOCGTXHPRIO:
2874 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2875 		break;
2876 
2877 	case SIOCSRXHPRIO:
2878 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2879 		if (error != 0)
2880 			break;
2881 
2882 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2883 		break;
2884 	case SIOCGRXHPRIO:
2885 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2886 		break;
2887 
2888 	case SIOCBRDGSCACHE:
2889 		if (bparam->ifbrp_csize < 1) {
2890 			error = EINVAL;
2891 			break;
2892 		}
2893 
2894 		/* commit */
2895 		sc->sc_ether_max = bparam->ifbrp_csize;
2896 		break;
2897 	case SIOCBRDGGCACHE:
2898 		bparam->ifbrp_csize = sc->sc_ether_max;
2899 		break;
2900 
2901 	case SIOCBRDGSTO:
2902 		if (bparam->ifbrp_ctime < 0 ||
2903 		    bparam->ifbrp_ctime > INT_MAX / hz) {
2904 			error = EINVAL;
2905 			break;
2906 		}
2907 		sc->sc_ether_tmo = bparam->ifbrp_ctime * hz;
2908 		break;
2909 	case SIOCBRDGGTO:
2910 		bparam->ifbrp_ctime = sc->sc_ether_tmo / hz;
2911 		break;
2912 
2913 	case SIOCBRDGRTS:
2914 		error = nvgre_rtfind(sc, (struct ifbaconf *)data);
2915 		break;
2916 	case SIOCBRDGFLUSH:
2917 		nvgre_flush_map(sc);
2918 		break;
2919 
2920 	case SIOCADDMULTI:
2921 	case SIOCDELMULTI:
2922 		break;
2923 
2924 	default:
2925 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2926 		break;
2927 	}
2928 
2929 	if (error == ENETRESET) {
2930 		/* no hardware to program */
2931 		error = 0;
2932 	}
2933 
2934 	return (error);
2935 }
2936 
2937 static int
2938 eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2939 {
2940 	struct eoip_softc *sc = ifp->if_softc;
2941 	struct ifreq *ifr = (struct ifreq *)data;
2942 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2943 	int error = 0;
2944 
2945 	switch(cmd) {
2946 	case SIOCSIFADDR:
2947 		break;
2948 	case SIOCSIFFLAGS:
2949 		if (ISSET(ifp->if_flags, IFF_UP)) {
2950 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2951 				error = eoip_up(sc);
2952 			else
2953 				error = 0;
2954 		} else {
2955 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2956 				error = eoip_down(sc);
2957 		}
2958 		break;
2959 
2960 	case SIOCSETKALIVE:
2961 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2962 			error = EBUSY;
2963 			break;
2964 		}
2965 
2966 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2967 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256)
2968 			return (EINVAL);
2969 
2970 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2971 			sc->sc_ka_count = 0;
2972 			sc->sc_ka_timeo = 0;
2973 			sc->sc_ka_state = GRE_KA_NONE;
2974 		} else {
2975 			sc->sc_ka_count = ikar->ikar_cnt;
2976 			sc->sc_ka_timeo = ikar->ikar_timeo;
2977 			sc->sc_ka_state = GRE_KA_DOWN;
2978 		}
2979 		break;
2980 
2981 	case SIOCGETKALIVE:
2982 		ikar->ikar_cnt = sc->sc_ka_count;
2983 		ikar->ikar_timeo = sc->sc_ka_timeo;
2984 		break;
2985 
2986 	case SIOCSVNETID:
2987 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2988 			error = EBUSY;
2989 			break;
2990 		}
2991 		if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff)
2992 			return (EINVAL);
2993 
2994 		sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */
2995 		sc->sc_tunnel_id = htole16(ifr->ifr_vnetid);
2996 		break;
2997 
2998 	case SIOCGVNETID:
2999 		ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id);
3000 		break;
3001 
3002 	case SIOCSLIFPHYADDR:
3003 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3004 			error = EBUSY;
3005 			break;
3006 		}
3007 
3008 		error = gre_set_tunnel(&sc->sc_tunnel,
3009 		    (struct if_laddrreq *)data, 1);
3010 		break;
3011 	case SIOCGLIFPHYADDR:
3012 		error = gre_get_tunnel(&sc->sc_tunnel,
3013 		    (struct if_laddrreq *)data);
3014 		break;
3015 	case SIOCDIFPHYADDR:
3016 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3017 			error = EBUSY;
3018 			break;
3019 		}
3020 
3021 		error = gre_del_tunnel(&sc->sc_tunnel);
3022 		break;
3023 
3024 	case SIOCSLIFPHYRTABLE:
3025 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3026 			error = EBUSY;
3027 			break;
3028 		}
3029 
3030 		if (ifr->ifr_rdomainid < 0 ||
3031 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
3032 		    !rtable_exists(ifr->ifr_rdomainid)) {
3033 			error = EINVAL;
3034 			break;
3035 		}
3036 		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
3037 		break;
3038 	case SIOCGLIFPHYRTABLE:
3039 		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
3040 		break;
3041 
3042 	case SIOCSLIFPHYTTL:
3043 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
3044 			error = EINVAL;
3045 			break;
3046 		}
3047 
3048 		/* commit */
3049 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
3050 		break;
3051 	case SIOCGLIFPHYTTL:
3052 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
3053 		break;
3054 
3055 	case SIOCSLIFPHYDF:
3056 		/* commit */
3057 		sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
3058 		break;
3059 	case SIOCGLIFPHYDF:
3060 		ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
3061 		break;
3062 
3063 	case SIOCSTXHPRIO:
3064 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
3065 		if (error != 0)
3066 			break;
3067 
3068 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
3069 		break;
3070 	case SIOCGTXHPRIO:
3071 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
3072 		break;
3073 
3074 	case SIOCSRXHPRIO:
3075 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
3076 		if (error != 0)
3077 			break;
3078 
3079 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
3080 		break;
3081 	case SIOCGRXHPRIO:
3082 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
3083 		break;
3084 
3085 	case SIOCADDMULTI:
3086 	case SIOCDELMULTI:
3087 		break;
3088 
3089 	default:
3090 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
3091 		break;
3092 	}
3093 
3094 	if (error == ENETRESET) {
3095 		/* no hardware to program */
3096 		error = 0;
3097 	}
3098 
3099 	return (error);
3100 }
3101 
3102 static int
3103 gre_up(struct gre_softc *sc)
3104 {
3105 	NET_ASSERT_LOCKED();
3106 	SET(sc->sc_if.if_flags, IFF_RUNNING);
3107 
3108 	if (sc->sc_ka_state != GRE_KA_NONE)
3109 		gre_keepalive_send(sc);
3110 
3111 	return (0);
3112 }
3113 
3114 static int
3115 gre_down(struct gre_softc *sc)
3116 {
3117 	NET_ASSERT_LOCKED();
3118 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3119 
3120 	if (sc->sc_ka_state != GRE_KA_NONE) {
3121 		timeout_del_barrier(&sc->sc_ka_hold);
3122 		timeout_del_barrier(&sc->sc_ka_send);
3123 
3124 		sc->sc_ka_state = GRE_KA_DOWN;
3125 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
3126 	}
3127 
3128 	return (0);
3129 }
3130 
3131 static void
3132 gre_link_state(struct ifnet *ifp, unsigned int state)
3133 {
3134 	int link_state = LINK_STATE_UNKNOWN;
3135 
3136 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3137 		switch (state) {
3138 		case GRE_KA_NONE:
3139 			/* maybe up? or down? it's unknown, really */
3140 			break;
3141 		case GRE_KA_UP:
3142 			link_state = LINK_STATE_UP;
3143 			break;
3144 		default:
3145 			link_state = LINK_STATE_KALIVE_DOWN;
3146 			break;
3147 		}
3148 	}
3149 
3150 	if (ifp->if_link_state != link_state) {
3151 		ifp->if_link_state = link_state;
3152 		if_link_state_change(ifp);
3153 	}
3154 }
3155 
3156 static void
3157 gre_keepalive_send(void *arg)
3158 {
3159 	struct gre_tunnel t;
3160 	struct gre_softc *sc = arg;
3161 	struct mbuf *m;
3162 	struct gre_keepalive *gk;
3163 	SIPHASH_CTX ctx;
3164 	int linkhdr, len;
3165 	uint16_t proto;
3166 	uint8_t ttl;
3167 	uint8_t tos;
3168 
3169 	/*
3170 	 * re-schedule immediately, so we deal with incomplete configuation
3171 	 * or temporary errors.
3172 	 */
3173 	if (sc->sc_ka_timeo)
3174 		timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
3175 
3176 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
3177 	    sc->sc_ka_state == GRE_KA_NONE ||
3178 	    sc->sc_tunnel.t_af == AF_UNSPEC ||
3179 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
3180 		return;
3181 
3182 	/* this is really conservative */
3183 #ifdef INET6
3184 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
3185 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
3186 #else
3187 	linkhdr = max_linkhdr + sizeof(struct ip) +
3188 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
3189 #endif
3190 	len = linkhdr + sizeof(*gk);
3191 
3192 	MGETHDR(m, M_DONTWAIT, MT_DATA);
3193 	if (m == NULL)
3194 		return;
3195 
3196 	if (len > MHLEN) {
3197 		MCLGETI(m, M_DONTWAIT, NULL, len);
3198 		if (!ISSET(m->m_flags, M_EXT)) {
3199 			m_freem(m);
3200 			return;
3201 		}
3202 	}
3203 
3204 	m->m_pkthdr.len = m->m_len = len;
3205 	m_adj(m, linkhdr);
3206 
3207 	/*
3208 	 * build the inside packet
3209 	 */
3210 	gk = mtod(m, struct gre_keepalive *);
3211 	htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks);
3212 	htobem32(&gk->gk_random, arc4random());
3213 
3214 	SipHash24_Init(&ctx, &sc->sc_ka_key);
3215 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
3216 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
3217 	SipHash24_Final(gk->gk_digest, &ctx);
3218 
3219 	ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl;
3220 
3221 	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
3222 	tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio));
3223 
3224 	t.t_af = sc->sc_tunnel.t_af;
3225 	t.t_df = sc->sc_tunnel.t_df;
3226 	t.t_src = sc->sc_tunnel.t_dst;
3227 	t.t_dst = sc->sc_tunnel.t_src;
3228 	t.t_key = sc->sc_tunnel.t_key;
3229 	t.t_key_mask = sc->sc_tunnel.t_key_mask;
3230 
3231 	m = gre_encap(&t, m, htons(0), ttl, tos);
3232 	if (m == NULL)
3233 		return;
3234 
3235 	switch (sc->sc_tunnel.t_af) {
3236 	case AF_INET: {
3237 		struct ip *ip;
3238 
3239 		ip = mtod(m, struct ip *);
3240 		ip->ip_id = htons(ip_randomid());
3241 		ip->ip_sum = 0;
3242 		ip->ip_sum = in_cksum(m, sizeof(*ip));
3243 
3244 		proto = htons(ETHERTYPE_IP);
3245 		break;
3246 	}
3247 #ifdef INET6
3248 	case AF_INET6:
3249 		proto = htons(ETHERTYPE_IPV6);
3250 		break;
3251 #endif
3252 	default:
3253 		m_freem(m);
3254 		return;
3255 	}
3256 
3257 	/*
3258 	 * put it in the tunnel
3259 	 */
3260 	m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos);
3261 	if (m == NULL)
3262 		return;
3263 
3264 	gre_ip_output(&sc->sc_tunnel, m);
3265 }
3266 
3267 static void
3268 gre_keepalive_hold(void *arg)
3269 {
3270 	struct gre_softc *sc = arg;
3271 	struct ifnet *ifp = &sc->sc_if;
3272 
3273 	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
3274 	    sc->sc_ka_state == GRE_KA_NONE)
3275 		return;
3276 
3277 	NET_LOCK();
3278 	sc->sc_ka_state = GRE_KA_DOWN;
3279 	gre_link_state(ifp, sc->sc_ka_state);
3280 	NET_UNLOCK();
3281 }
3282 
3283 static int
3284 gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast)
3285 {
3286 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3287 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3288 	struct sockaddr_in *src4, *dst4;
3289 #ifdef INET6
3290 	struct sockaddr_in6 *src6, *dst6;
3291 	int error;
3292 #endif
3293 
3294 	/* sa_family and sa_len must be equal */
3295 	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
3296 		return (EINVAL);
3297 
3298 	/* validate */
3299 	switch (dst->sa_family) {
3300 	case AF_INET:
3301 		if (dst->sa_len != sizeof(*dst4))
3302 			return (EINVAL);
3303 
3304 		src4 = (struct sockaddr_in *)src;
3305 		if (in_nullhost(src4->sin_addr) ||
3306 		    IN_MULTICAST(src4->sin_addr.s_addr))
3307 			return (EINVAL);
3308 
3309 		dst4 = (struct sockaddr_in *)dst;
3310 		if (in_nullhost(dst4->sin_addr) ||
3311 		    (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast))
3312 			return (EINVAL);
3313 
3314 		tunnel->t_src4 = src4->sin_addr;
3315 		tunnel->t_dst4 = dst4->sin_addr;
3316 
3317 		break;
3318 #ifdef INET6
3319 	case AF_INET6:
3320 		if (dst->sa_len != sizeof(*dst6))
3321 			return (EINVAL);
3322 
3323 		src6 = (struct sockaddr_in6 *)src;
3324 		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
3325 		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
3326 			return (EINVAL);
3327 
3328 		dst6 = (struct sockaddr_in6 *)dst;
3329 		if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
3330 		    IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast)
3331 			return (EINVAL);
3332 
3333 		if (src6->sin6_scope_id != dst6->sin6_scope_id)
3334 			return (EINVAL);
3335 
3336 		error = in6_embedscope(&tunnel->t_src6, src6, NULL);
3337 		if (error != 0)
3338 			return (error);
3339 
3340 		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL);
3341 		if (error != 0)
3342 			return (error);
3343 
3344 		break;
3345 #endif
3346 	default:
3347 		return (EAFNOSUPPORT);
3348 	}
3349 
3350 	/* commit */
3351 	tunnel->t_af = dst->sa_family;
3352 
3353 	return (0);
3354 }
3355 
3356 static int
3357 gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
3358 {
3359 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3360 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3361 	struct sockaddr_in *sin;
3362 #ifdef INET6 /* ifconfig already embeds the scopeid */
3363 	struct sockaddr_in6 *sin6;
3364 #endif
3365 
3366 	switch (tunnel->t_af) {
3367 	case AF_UNSPEC:
3368 		return (EADDRNOTAVAIL);
3369 	case AF_INET:
3370 		sin = (struct sockaddr_in *)src;
3371 		memset(sin, 0, sizeof(*sin));
3372 		sin->sin_family = AF_INET;
3373 		sin->sin_len = sizeof(*sin);
3374 		sin->sin_addr = tunnel->t_src4;
3375 
3376 		sin = (struct sockaddr_in *)dst;
3377 		memset(sin, 0, sizeof(*sin));
3378 		sin->sin_family = AF_INET;
3379 		sin->sin_len = sizeof(*sin);
3380 		sin->sin_addr = tunnel->t_dst4;
3381 
3382 		break;
3383 
3384 #ifdef INET6
3385 	case AF_INET6:
3386 		sin6 = (struct sockaddr_in6 *)src;
3387 		memset(sin6, 0, sizeof(*sin6));
3388 		sin6->sin6_family = AF_INET6;
3389 		sin6->sin6_len = sizeof(*sin6);
3390 		in6_recoverscope(sin6, &tunnel->t_src6);
3391 
3392 		sin6 = (struct sockaddr_in6 *)dst;
3393 		memset(sin6, 0, sizeof(*sin6));
3394 		sin6->sin6_family = AF_INET6;
3395 		sin6->sin6_len = sizeof(*sin6);
3396 		in6_recoverscope(sin6, &tunnel->t_dst6);
3397 
3398 		break;
3399 #endif
3400 	default:
3401 		return (EAFNOSUPPORT);
3402 	}
3403 
3404 	return (0);
3405 }
3406 
3407 static int
3408 gre_del_tunnel(struct gre_tunnel *tunnel)
3409 {
3410 	/* commit */
3411 	tunnel->t_af = AF_UNSPEC;
3412 
3413 	return (0);
3414 }
3415 
3416 static int
3417 gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3418 {
3419 	uint32_t key;
3420 	uint32_t min = GRE_KEY_MIN;
3421 	uint32_t max = GRE_KEY_MAX;
3422 	unsigned int shift = GRE_KEY_SHIFT;
3423 	uint32_t mask = GRE_KEY_MASK;
3424 
3425 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
3426 		min = GRE_KEY_ENTROPY_MIN;
3427 		max = GRE_KEY_ENTROPY_MAX;
3428 		shift = GRE_KEY_ENTROPY_SHIFT;
3429 		mask = GRE_KEY_ENTROPY;
3430 	}
3431 
3432 	if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max)
3433 		return (EINVAL);
3434 
3435 	key = htonl(ifr->ifr_vnetid << shift);
3436 
3437 	/* commit */
3438 	tunnel->t_key_mask = mask;
3439 	tunnel->t_key = key;
3440 
3441 	return (0);
3442 }
3443 
3444 static int
3445 gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3446 {
3447 	int shift;
3448 
3449 	switch (tunnel->t_key_mask) {
3450 	case GRE_KEY_NONE:
3451 		return (EADDRNOTAVAIL);
3452 	case GRE_KEY_ENTROPY:
3453 		shift = GRE_KEY_ENTROPY_SHIFT;
3454 		break;
3455 	case GRE_KEY_MASK:
3456 		shift = GRE_KEY_SHIFT;
3457 		break;
3458 	}
3459 
3460 	ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift;
3461 
3462 	return (0);
3463 }
3464 
3465 static int
3466 gre_del_vnetid(struct gre_tunnel *tunnel)
3467 {
3468 	tunnel->t_key_mask = GRE_KEY_NONE;
3469 
3470 	return (0);
3471 }
3472 
3473 static int
3474 gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3475 {
3476 	uint32_t mask, key;
3477 
3478 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3479 		return (EADDRNOTAVAIL);
3480 
3481 	mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK;
3482 	if (tunnel->t_key_mask == mask) {
3483 		/* nop */
3484 		return (0);
3485 	}
3486 
3487 	key = ntohl(tunnel->t_key);
3488 	if (mask == GRE_KEY_ENTROPY) {
3489 		if (key > GRE_KEY_ENTROPY_MAX)
3490 			return (ERANGE);
3491 
3492 		key = htonl(key << GRE_KEY_ENTROPY_SHIFT);
3493 	} else
3494 		key = htonl(key >> GRE_KEY_ENTROPY_SHIFT);
3495 
3496 	/* commit */
3497 	tunnel->t_key_mask = mask;
3498 	tunnel->t_key = key;
3499 
3500 	return (0);
3501 }
3502 
3503 static int
3504 gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3505 {
3506 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3507 		return (EADDRNOTAVAIL);
3508 
3509 	ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY;
3510 
3511 	return (0);
3512 }
3513 
3514 static int
3515 mgre_up(struct mgre_softc *sc)
3516 {
3517 	unsigned int hlen;
3518 
3519 	switch (sc->sc_tunnel.t_af) {
3520 	case AF_UNSPEC:
3521 		return (EDESTADDRREQ);
3522 	case AF_INET:
3523 		hlen = sizeof(struct ip);
3524 		break;
3525 #ifdef INET6
3526 	case AF_INET6:
3527 		hlen = sizeof(struct ip6_hdr);
3528 		break;
3529 #endif /* INET6 */
3530 	default:
3531 		unhandled_af(sc->sc_tunnel.t_af);
3532 	}
3533 
3534 	hlen += sizeof(struct gre_header);
3535 	if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
3536 		hlen += sizeof(struct gre_h_key);
3537 
3538 	NET_ASSERT_LOCKED();
3539 
3540 	if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL)
3541 		return (EADDRINUSE);
3542 
3543 	sc->sc_if.if_hdrlen = hlen;
3544 	SET(sc->sc_if.if_flags, IFF_RUNNING);
3545 
3546 	return (0);
3547 }
3548 
3549 static int
3550 mgre_down(struct mgre_softc *sc)
3551 {
3552 	NET_ASSERT_LOCKED();
3553 
3554 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3555 	sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */
3556 
3557 	RBT_REMOVE(mgre_tree, &mgre_tree, sc);
3558 
3559 	/* barrier? */
3560 
3561 	return (0);
3562 }
3563 
3564 static int
3565 egre_up(struct egre_softc *sc)
3566 {
3567 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3568 		return (EDESTADDRREQ);
3569 
3570 	NET_ASSERT_LOCKED();
3571 
3572 	if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL)
3573 		return (EADDRINUSE);
3574 
3575 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3576 
3577 	return (0);
3578 }
3579 
3580 static int
3581 egre_down(struct egre_softc *sc)
3582 {
3583 	NET_ASSERT_LOCKED();
3584 
3585 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3586 
3587 	RBT_REMOVE(egre_tree, &egre_tree, sc);
3588 
3589 	/* barrier? */
3590 
3591 	return (0);
3592 }
3593 
3594 static int
3595 egre_media_change(struct ifnet *ifp)
3596 {
3597 	return (ENOTTY);
3598 }
3599 
3600 static void
3601 egre_media_status(struct ifnet *ifp, struct ifmediareq *imr)
3602 {
3603 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
3604 	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
3605 }
3606 
3607 static int
3608 nvgre_up(struct nvgre_softc *sc)
3609 {
3610 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3611 	struct ifnet *ifp0;
3612 	void *inm;
3613 	int error;
3614 
3615 	if (tunnel->t_af == AF_UNSPEC)
3616 		return (EDESTADDRREQ);
3617 
3618 	ifp0 = if_get(sc->sc_ifp0);
3619 	if (ifp0 == NULL)
3620 		return (ENXIO);
3621 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3622 		error = ENODEV;
3623 		goto put;
3624 	}
3625 
3626 	NET_ASSERT_LOCKED();
3627 
3628 	if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) {
3629 		error = EADDRINUSE;
3630 		goto put;
3631 	}
3632 	if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) {
3633 		error = EADDRINUSE;
3634 		goto remove_mcast;
3635 	}
3636 
3637 	switch (tunnel->t_af) {
3638 	case AF_INET:
3639 		inm = in_addmulti(&tunnel->t_dst4, ifp0);
3640 		if (inm == NULL) {
3641 			error = ECONNABORTED;
3642 			goto remove_ucast;
3643 		}
3644 		break;
3645 #ifdef INET6
3646 	case AF_INET6:
3647 		inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error);
3648 		if (inm == NULL) {
3649 			/* error is already set */
3650 			goto remove_ucast;
3651 		}
3652 		break;
3653 #endif /* INET6 */
3654 	default:
3655 		unhandled_af(tunnel->t_af);
3656 	}
3657 
3658 	sc->sc_lhcookie = hook_establish(ifp0->if_linkstatehooks, 0,
3659 	    nvgre_link_change, sc);
3660 	if (sc->sc_lhcookie == NULL) {
3661 		error = ENOMEM;
3662 		goto delmulti;
3663 	}
3664 
3665 	sc->sc_dhcookie = hook_establish(ifp0->if_detachhooks, 0,
3666 	    nvgre_detach, sc);
3667 	if (sc->sc_dhcookie == NULL) {
3668 		error = ENOMEM;
3669 		goto dislh;
3670 	}
3671 
3672 	if_put(ifp0);
3673 
3674 	sc->sc_inm = inm;
3675 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3676 
3677 	timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO);
3678 
3679 	return (0);
3680 
3681 dislh:
3682 	hook_disestablish(ifp0->if_linkstatehooks, sc->sc_lhcookie);
3683 delmulti:
3684 	switch (tunnel->t_af) {
3685 	case AF_INET:
3686 		in_delmulti(inm);
3687 		break;
3688 #ifdef INET6
3689 	case AF_INET6:
3690 		in6_delmulti(inm);
3691 		break;
3692 #endif
3693 	default:
3694 		unhandled_af(tunnel->t_af);
3695 	}
3696 remove_ucast:
3697 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3698 remove_mcast:
3699 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3700 put:
3701 	if_put(ifp0);
3702 	return (error);
3703 }
3704 
3705 static int
3706 nvgre_down(struct nvgre_softc *sc)
3707 {
3708 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3709 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3710 	struct taskq *softnet = net_tq(ifp->if_index);
3711 	struct ifnet *ifp0;
3712 
3713 	NET_ASSERT_LOCKED();
3714 
3715 	CLR(ifp->if_flags, IFF_RUNNING);
3716 
3717 	NET_UNLOCK();
3718 	timeout_del_barrier(&sc->sc_ether_age);
3719 	ifq_barrier(&ifp->if_snd);
3720 	if (!task_del(softnet, &sc->sc_send_task))
3721 		taskq_barrier(softnet);
3722 	NET_LOCK();
3723 
3724 	mq_purge(&sc->sc_send_list);
3725 
3726 	ifp0 = if_get(sc->sc_ifp0);
3727 	if (ifp0 != NULL) {
3728 		hook_disestablish(ifp0->if_detachhooks, sc->sc_dhcookie);
3729 		hook_disestablish(ifp0->if_linkstatehooks, sc->sc_lhcookie);
3730 	}
3731 	if_put(ifp0);
3732 
3733 	switch (tunnel->t_af) {
3734 	case AF_INET:
3735 		in_delmulti(sc->sc_inm);
3736 		break;
3737 
3738 #ifdef INET6
3739 	case AF_INET6:
3740 		in6_delmulti(sc->sc_inm);
3741 		break;
3742 #endif
3743 	default:
3744 		unhandled_af(tunnel->t_af);
3745 	}
3746 
3747 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3748 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3749 
3750 	return (0);
3751 }
3752 
3753 static void
3754 nvgre_link_change(void *arg)
3755 {
3756 	/* nop */
3757 }
3758 
3759 static void
3760 nvgre_detach(void *arg)
3761 {
3762 	struct nvgre_softc *sc = arg;
3763 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3764 
3765 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3766 		nvgre_down(sc);
3767 		if_down(ifp);
3768 	}
3769 
3770 	sc->sc_ifp0 = 0;
3771 }
3772 
3773 static int
3774 nvgre_set_parent(struct nvgre_softc *sc, const char *parent)
3775 {
3776 	struct ifnet *ifp0;
3777 
3778 	ifp0 = ifunit(parent); /* doesn't need an if_put */
3779 	if (ifp0 == NULL)
3780 		return (EINVAL);
3781 
3782 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST))
3783 		return (EPROTONOSUPPORT);
3784 
3785 	/* commit */
3786 	sc->sc_ifp0 = ifp0->if_index;
3787 
3788 	return (0);
3789 }
3790 
3791 static void
3792 nvgre_age(void *arg)
3793 {
3794 	struct nvgre_softc *sc = arg;
3795 	struct nvgre_entry *nv, *nnv;
3796 	int tmo = sc->sc_ether_tmo * 2;
3797 	int diff;
3798 
3799 	if (!ISSET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING))
3800 		return;
3801 
3802 	rw_enter_write(&sc->sc_ether_lock); /* XXX */
3803 	RBT_FOREACH_SAFE(nv, nvgre_map, &sc->sc_ether_map, nnv) {
3804 		if (nv->nv_type != NVGRE_ENTRY_DYNAMIC)
3805 			continue;
3806 
3807 		diff = ticks - nv->nv_age;
3808 		if (diff < tmo)
3809 			continue;
3810 
3811 		sc->sc_ether_num--;
3812 		RBT_REMOVE(nvgre_map, &sc->sc_ether_map, nv);
3813 		if (refcnt_rele(&nv->nv_refs))
3814 			pool_put(&nvgre_pool, nv);
3815 	}
3816 	rw_exit_write(&sc->sc_ether_lock);
3817 
3818 	timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO);
3819 }
3820 
3821 static inline int
3822 nvgre_entry_valid(struct nvgre_softc *sc, const struct nvgre_entry *nv)
3823 {
3824 	int diff;
3825 
3826 	if (nv == NULL)
3827 		return (0);
3828 
3829 	if (nv->nv_type == NVGRE_ENTRY_STATIC)
3830 		return (1);
3831 
3832 	diff = ticks - nv->nv_age;
3833 	if (diff < sc->sc_ether_tmo)
3834 		return (1);
3835 
3836 	return (0);
3837 }
3838 
3839 static void
3840 nvgre_start(struct ifnet *ifp)
3841 {
3842 	struct nvgre_softc *sc = ifp->if_softc;
3843 	const struct gre_tunnel *tunnel = &sc->sc_tunnel;
3844 	union gre_addr gateway;
3845 	struct nvgre_entry *nv, key;
3846 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
3847 	struct ether_header *eh;
3848 	struct mbuf *m, *m0;
3849 #if NBPFILTER > 0
3850 	caddr_t if_bpf;
3851 #endif
3852 
3853 	if (!gre_allow) {
3854 		ifq_purge(&ifp->if_snd);
3855 		return;
3856 	}
3857 
3858 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3859 #if NBPFILTER > 0
3860 		if_bpf = ifp->if_bpf;
3861 		if (if_bpf)
3862 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3863 #endif
3864 
3865 		eh = mtod(m0, struct ether_header *);
3866 		if (ether_isbcast(eh->ether_dhost))
3867 			gateway = tunnel->t_dst;
3868 		else {
3869 			memcpy(&key.nv_dst, eh->ether_dhost,
3870 			    sizeof(key.nv_dst));
3871 
3872 			rw_enter_read(&sc->sc_ether_lock);
3873 			nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &key);
3874 			if (nvgre_entry_valid(sc, nv))
3875 				gateway = nv->nv_gateway;
3876 			else {
3877 				/* "flood" to unknown hosts */
3878 				gateway = tunnel->t_dst;
3879 			}
3880 			rw_exit_read(&sc->sc_ether_lock);
3881 		}
3882 
3883 		/* force prepend mbuf because of alignment problems */
3884 		m = m_get(M_DONTWAIT, m0->m_type);
3885 		if (m == NULL) {
3886 			m_freem(m0);
3887 			continue;
3888 		}
3889 
3890 		M_MOVE_PKTHDR(m, m0);
3891 		m->m_next = m0;
3892 
3893 		m_align(m, 0);
3894 		m->m_len = 0;
3895 
3896 		m = gre_encap_dst(tunnel, &gateway, m,
3897 		    htons(ETHERTYPE_TRANSETHER),
3898 		    tunnel->t_ttl, gre_l2_tos(tunnel, m));
3899 		if (m == NULL)
3900 			continue;
3901 
3902 		m->m_flags &= ~(M_BCAST|M_MCAST);
3903 		m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
3904 
3905 #if NPF > 0
3906 		pf_pkt_addr_changed(m);
3907 #endif
3908 
3909 		ml_enqueue(&ml, m);
3910 	}
3911 
3912 	if (!ml_empty(&ml)) {
3913 		if (mq_enlist(&sc->sc_send_list, &ml) == 0)
3914 			task_add(net_tq(ifp->if_index), &sc->sc_send_task);
3915 		/* else set OACTIVE? */
3916 	}
3917 }
3918 
3919 static uint64_t
3920 nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml)
3921 {
3922 	struct ip_moptions imo;
3923 	struct mbuf *m;
3924 	uint64_t oerrors = 0;
3925 
3926 	imo.imo_ifidx = sc->sc_ifp0;
3927 	imo.imo_ttl = sc->sc_tunnel.t_ttl;
3928 	imo.imo_loop = 0;
3929 
3930 	NET_RLOCK();
3931 	while ((m = ml_dequeue(ml)) != NULL) {
3932 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
3933 			oerrors++;
3934 	}
3935 	NET_RUNLOCK();
3936 
3937 	return (oerrors);
3938 }
3939 
3940 #ifdef INET6
3941 static uint64_t
3942 nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml)
3943 {
3944 	struct ip6_moptions im6o;
3945 	struct mbuf *m;
3946 	uint64_t oerrors = 0;
3947 
3948 	im6o.im6o_ifidx = sc->sc_ifp0;
3949 	im6o.im6o_hlim = sc->sc_tunnel.t_ttl;
3950 	im6o.im6o_loop = 0;
3951 
3952 	NET_RLOCK();
3953 	while ((m = ml_dequeue(ml)) != NULL) {
3954 		if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
3955 			oerrors++;
3956 	}
3957 	NET_RUNLOCK();
3958 
3959 	return (oerrors);
3960 }
3961 #endif /* INET6 */
3962 
3963 static void
3964 nvgre_send(void *arg)
3965 {
3966 	struct nvgre_softc *sc = arg;
3967 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3968 	sa_family_t af = sc->sc_tunnel.t_af;
3969 	struct mbuf_list ml;
3970 	uint64_t oerrors;
3971 
3972 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3973 		return;
3974 
3975 	mq_delist(&sc->sc_send_list, &ml);
3976 	if (ml_empty(&ml))
3977 		return;
3978 
3979 	switch (af) {
3980 	case AF_INET:
3981 		oerrors = nvgre_send4(sc, &ml);
3982 		break;
3983 #ifdef INET6
3984 	case AF_INET6:
3985 		oerrors = nvgre_send6(sc, &ml);
3986 		break;
3987 #endif
3988 	default:
3989 		unhandled_af(af);
3990 		/* NOTREACHED */
3991 	}
3992 
3993 	ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */
3994 }
3995 
3996 static int
3997 eoip_up(struct eoip_softc *sc)
3998 {
3999 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
4000 		return (EDESTADDRREQ);
4001 
4002 	NET_ASSERT_LOCKED();
4003 
4004 	if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL)
4005 		return (EADDRINUSE);
4006 
4007 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
4008 
4009 	if (sc->sc_ka_state != GRE_KA_NONE) {
4010 		sc->sc_ka_holdmax = sc->sc_ka_count;
4011 		eoip_keepalive_send(sc);
4012 	}
4013 
4014 	return (0);
4015 }
4016 
4017 static int
4018 eoip_down(struct eoip_softc *sc)
4019 {
4020 	NET_ASSERT_LOCKED();
4021 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
4022 
4023 	if (sc->sc_ka_state != GRE_KA_NONE) {
4024 		timeout_del_barrier(&sc->sc_ka_hold);
4025 		timeout_del_barrier(&sc->sc_ka_send);
4026 
4027 		sc->sc_ka_state = GRE_KA_DOWN;
4028 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
4029 	}
4030 
4031 	RBT_REMOVE(eoip_tree, &eoip_tree, sc);
4032 
4033 	return (0);
4034 }
4035 
4036 static void
4037 eoip_start(struct ifnet *ifp)
4038 {
4039 	struct eoip_softc *sc = ifp->if_softc;
4040 	struct mbuf *m0, *m;
4041 #if NBPFILTER > 0
4042 	caddr_t if_bpf;
4043 #endif
4044 
4045 	if (!gre_allow) {
4046 		ifq_purge(&ifp->if_snd);
4047 		return;
4048 	}
4049 
4050 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
4051 #if NBPFILTER > 0
4052 		if_bpf = ifp->if_bpf;
4053 		if (if_bpf)
4054 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
4055 #endif
4056 
4057 		/* force prepend mbuf because of alignment problems */
4058 		m = m_get(M_DONTWAIT, m0->m_type);
4059 		if (m == NULL) {
4060 			m_freem(m0);
4061 			continue;
4062 		}
4063 
4064 		M_MOVE_PKTHDR(m, m0);
4065 		m->m_next = m0;
4066 
4067 		m_align(m, 0);
4068 		m->m_len = 0;
4069 
4070 		m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
4071 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
4072 			ifp->if_oerrors++;
4073 			continue;
4074 		}
4075 	}
4076 }
4077 
4078 static struct mbuf *
4079 eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos)
4080 {
4081 	struct gre_header *gh;
4082 	struct gre_h_key_eoip *eoiph;
4083 	int len = m->m_pkthdr.len;
4084 
4085 	m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT);
4086 	if (m == NULL)
4087 		return (NULL);
4088 
4089 	gh = mtod(m, struct gre_header *);
4090 	gh->gre_flags = htons(GRE_VERS_1 | GRE_KP);
4091 	gh->gre_proto = htons(GRE_EOIP);
4092 
4093 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
4094 	htobem16(&eoiph->eoip_len, len);
4095 	eoiph->eoip_tunnel_id = sc->sc_tunnel_id;
4096 
4097 	return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
4098 }
4099 
4100 static void
4101 eoip_keepalive_send(void *arg)
4102 {
4103 	struct eoip_softc *sc = arg;
4104 	struct ifnet *ifp = &sc->sc_ac.ac_if;
4105 	struct mbuf *m;
4106 	int linkhdr;
4107 
4108 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
4109 		return;
4110 
4111 	/* this is really conservative */
4112 #ifdef INET6
4113 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
4114 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
4115 #else
4116 	linkhdr = max_linkhdr + sizeof(struct ip) +
4117 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
4118 #endif
4119 	MGETHDR(m, M_DONTWAIT, MT_DATA);
4120 	if (m == NULL)
4121 		return;
4122 
4123 	if (linkhdr > MHLEN) {
4124 		MCLGETI(m, M_DONTWAIT, NULL, linkhdr);
4125 		if (!ISSET(m->m_flags, M_EXT)) {
4126 			m_freem(m);
4127 			return;
4128 		}
4129 	}
4130 
4131 	m->m_pkthdr.pf.prio = ifp->if_llprio;
4132 	m->m_pkthdr.len = m->m_len = linkhdr;
4133 	m_adj(m, linkhdr);
4134 
4135 	m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
4136 	if (m == NULL)
4137 		return;
4138 
4139 	gre_ip_output(&sc->sc_tunnel, m);
4140 
4141 	timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
4142 }
4143 
4144 static void
4145 eoip_keepalive_hold(void *arg)
4146 {
4147 	struct eoip_softc *sc = arg;
4148 	struct ifnet *ifp = &sc->sc_ac.ac_if;
4149 
4150 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
4151 		return;
4152 
4153 	NET_LOCK();
4154 	sc->sc_ka_state = GRE_KA_DOWN;
4155 	gre_link_state(ifp, sc->sc_ka_state);
4156 	NET_UNLOCK();
4157 }
4158 
4159 static void
4160 eoip_keepalive_recv(struct eoip_softc *sc)
4161 {
4162 	switch (sc->sc_ka_state) {
4163 	case GRE_KA_NONE:
4164 		return;
4165 	case GRE_KA_DOWN:
4166 		sc->sc_ka_state = GRE_KA_HOLD;
4167 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
4168 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
4169 		    16 * sc->sc_ka_count);
4170 		break;
4171 	case GRE_KA_HOLD:
4172 		if (--sc->sc_ka_holdcnt > 0)
4173 			break;
4174 
4175 		sc->sc_ka_state = GRE_KA_UP;
4176 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
4177 		break;
4178 
4179 	case GRE_KA_UP:
4180 		sc->sc_ka_holdmax--;
4181 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
4182 		break;
4183 	}
4184 
4185 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
4186 }
4187 
4188 static struct mbuf *
4189 eoip_input(struct gre_tunnel *key, struct mbuf *m,
4190     const struct gre_header *gh, uint8_t otos, int iphlen)
4191 {
4192 	struct eoip_softc *sc;
4193 	struct gre_h_key_eoip *eoiph;
4194 	int hlen, len;
4195 	caddr_t buf;
4196 
4197 	if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1))
4198 		goto decline;
4199 
4200 	hlen = iphlen + sizeof(*gh) + sizeof(*eoiph);
4201 	if (m->m_pkthdr.len < hlen)
4202 		goto decline;
4203 
4204 	m = m_pullup(m, hlen);
4205 	if (m == NULL)
4206 		return (NULL);
4207 
4208 	buf = mtod(m, caddr_t);
4209 	gh = (struct gre_header *)(buf + iphlen);
4210 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
4211 
4212 	key->t_key = eoiph->eoip_tunnel_id;
4213 
4214 	NET_ASSERT_LOCKED();
4215 	sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key);
4216 	if (sc == NULL)
4217 		goto decline;
4218 
4219 	/* it's ours now */
4220 	len = bemtoh16(&eoiph->eoip_len);
4221 	if (len == 0) {
4222 		eoip_keepalive_recv(sc);
4223 		goto drop;
4224 	}
4225 
4226 	m = gre_ether_align(m, hlen);
4227 	if (m == NULL)
4228 		return (NULL);
4229 
4230 	if (m->m_pkthdr.len < len)
4231 		goto drop;
4232 	if (m->m_pkthdr.len != len)
4233 		m_adj(m, len - m->m_pkthdr.len);
4234 
4235 	gre_l2_prio(&sc->sc_tunnel, m, otos);
4236 
4237 	m->m_flags &= ~(M_MCAST|M_BCAST);
4238 
4239 #if NPF > 0
4240 	pf_pkt_addr_changed(m);
4241 #endif
4242 
4243 	if_vinput(&sc->sc_ac.ac_if, m);
4244 
4245 	return (NULL);
4246 
4247 decline:
4248 	return (m);
4249 drop:
4250 	m_freem(m);
4251 	return (NULL);
4252 }
4253 
4254 int
4255 gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
4256     size_t newlen)
4257 {
4258 	int error;
4259 
4260 	/* All sysctl names at this level are terminal. */
4261 	if (namelen != 1)
4262 		return (ENOTDIR);
4263 
4264 	switch (name[0]) {
4265 	case GRECTL_ALLOW:
4266 		NET_LOCK();
4267 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow);
4268 		NET_UNLOCK();
4269 		return (error);
4270 	case GRECTL_WCCP:
4271 		NET_LOCK();
4272 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp);
4273 		NET_UNLOCK();
4274 		return (error);
4275 	default:
4276 		return (ENOPROTOOPT);
4277 	}
4278 	/* NOTREACHED */
4279 }
4280 
4281 static inline int
4282 gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b)
4283 {
4284 	switch (af) {
4285 #ifdef INET6
4286 	case AF_INET6:
4287 		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
4288 #endif /* INET6 */
4289 	case AF_INET:
4290 		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
4291 	default:
4292 		panic("%s: unsupported af %d\n", __func__, af);
4293 	}
4294 
4295 	return (0);
4296 }
4297 
4298 static int
4299 gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b)
4300 {
4301 	uint32_t ka, kb;
4302 	uint32_t mask;
4303 	int rv;
4304 
4305 	/* is K set at all? */
4306 	ka = a->t_key_mask & GRE_KEY_ENTROPY;
4307 	kb = b->t_key_mask & GRE_KEY_ENTROPY;
4308 
4309 	/* sort by whether K is set */
4310 	if (ka > kb)
4311 		return (1);
4312 	if (ka < kb)
4313 		return (-1);
4314 
4315 	/* is K set on both? */
4316 	if (ka != GRE_KEY_NONE) {
4317 		/* get common prefix */
4318 		mask = a->t_key_mask & b->t_key_mask;
4319 
4320 		ka = a->t_key & mask;
4321 		kb = b->t_key & mask;
4322 
4323 		/* sort by common prefix */
4324 		if (ka > kb)
4325 			return (1);
4326 		if (ka < kb)
4327 			return (-1);
4328 	}
4329 
4330 	/* sort by routing table */
4331 	if (a->t_rtableid > b->t_rtableid)
4332 		return (1);
4333 	if (a->t_rtableid < b->t_rtableid)
4334 		return (-1);
4335 
4336 	/* sort by address */
4337 	if (a->t_af > b->t_af)
4338 		return (1);
4339 	if (a->t_af < b->t_af)
4340 		return (-1);
4341 
4342 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4343 	if (rv != 0)
4344 		return (rv);
4345 
4346 	return (0);
4347 }
4348 
4349 static int
4350 gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
4351 {
4352 	int rv;
4353 
4354 	rv = gre_cmp_src(a, b);
4355 	if (rv != 0)
4356 		return (rv);
4357 
4358 	return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst));
4359 }
4360 
4361 static inline int
4362 mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b)
4363 {
4364 	return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel));
4365 }
4366 
4367 RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
4368 
4369 static inline int
4370 egre_cmp(const struct egre_softc *a, const struct egre_softc *b)
4371 {
4372 	return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel));
4373 }
4374 
4375 RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp);
4376 
4377 static inline int
4378 nvgre_entry_cmp(const struct nvgre_entry *a, const struct nvgre_entry *b)
4379 {
4380 	return (memcmp(&a->nv_dst, &b->nv_dst, sizeof(a->nv_dst)));
4381 }
4382 
4383 RBT_GENERATE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp);
4384 
4385 static int
4386 nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b)
4387 {
4388 	uint32_t ka, kb;
4389 
4390 	ka = a->t_key & GRE_KEY_ENTROPY;
4391 	kb = b->t_key & GRE_KEY_ENTROPY;
4392 
4393 	/* sort by common prefix */
4394 	if (ka > kb)
4395 		return (1);
4396 	if (ka < kb)
4397 		return (-1);
4398 
4399 	/* sort by routing table */
4400 	if (a->t_rtableid > b->t_rtableid)
4401 		return (1);
4402 	if (a->t_rtableid < b->t_rtableid)
4403 		return (-1);
4404 
4405 	/* sort by address */
4406 	if (a->t_af > b->t_af)
4407 		return (1);
4408 	if (a->t_af < b->t_af)
4409 		return (-1);
4410 
4411 	return (0);
4412 }
4413 
4414 static inline int
4415 nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4416 {
4417 	const struct gre_tunnel *a = &na->sc_tunnel;
4418 	const struct gre_tunnel *b = &nb->sc_tunnel;
4419 	int rv;
4420 
4421 	rv = nvgre_cmp_tunnel(a, b);
4422 	if (rv != 0)
4423 		return (rv);
4424 
4425 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4426 	if (rv != 0)
4427 		return (rv);
4428 
4429 	return (0);
4430 }
4431 
4432 static int
4433 nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa,
4434     unsigned int if0idxa, const struct gre_tunnel *b,
4435     const union gre_addr *ab,unsigned int if0idxb)
4436 {
4437 	int rv;
4438 
4439 	rv = nvgre_cmp_tunnel(a, b);
4440 	if (rv != 0)
4441 		return (rv);
4442 
4443 	rv = gre_ip_cmp(a->t_af, aa, ab);
4444 	if (rv != 0)
4445 		return (rv);
4446 
4447 	if (if0idxa > if0idxb)
4448 		return (1);
4449 	if (if0idxa < if0idxb)
4450 		return (-1);
4451 
4452 	return (0);
4453 }
4454 
4455 static inline int
4456 nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4457 {
4458 	const struct gre_tunnel *a = &na->sc_tunnel;
4459 	const struct gre_tunnel *b = &nb->sc_tunnel;
4460 
4461 	return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0,
4462 	    b, &b->t_dst, nb->sc_ifp0));
4463 }
4464 
4465 RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
4466 RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
4467 
4468 static inline int
4469 eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
4470 {
4471 	const struct gre_tunnel *a = &ea->sc_tunnel;
4472 	const struct gre_tunnel *b = &eb->sc_tunnel;
4473 	int rv;
4474 
4475 	if (a->t_key > b->t_key)
4476 		return (1);
4477 	if (a->t_key < b->t_key)
4478 		return (-1);
4479 
4480 	/* sort by routing table */
4481 	if (a->t_rtableid > b->t_rtableid)
4482 		return (1);
4483 	if (a->t_rtableid < b->t_rtableid)
4484 		return (-1);
4485 
4486 	/* sort by address */
4487 	if (a->t_af > b->t_af)
4488 		return (1);
4489 	if (a->t_af < b->t_af)
4490 		return (-1);
4491 
4492 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4493 	if (rv != 0)
4494 		return (rv);
4495 
4496 	rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
4497 	if (rv != 0)
4498 		return (rv);
4499 
4500 	return (0);
4501 }
4502 
4503 RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
4504