xref: /openbsd-src/sys/net/if_gre.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: if_gre.c,v 1.155 2019/11/10 11:44:10 dlg Exp $ */
2 /*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3 
4 /*
5  * Copyright (c) 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
37  * See gre(4) for more details.
38  * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
39  */
40 
41 #include "bpfilter.h"
42 #include "pf.h"
43 
44 #include <sys/param.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/kernel.h>
49 #include <sys/systm.h>
50 #include <sys/errno.h>
51 #include <sys/timeout.h>
52 #include <sys/queue.h>
53 #include <sys/tree.h>
54 #include <sys/pool.h>
55 #include <sys/rwlock.h>
56 
57 #include <crypto/siphash.h>
58 
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_types.h>
62 #include <net/if_media.h>
63 #include <net/route.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/ip_ecn.h>
71 
72 #ifdef INET6
73 #include <netinet/ip6.h>
74 #include <netinet6/ip6_var.h>
75 #include <netinet6/in6_var.h>
76 #endif
77 
78 #ifdef PIPEX
79 #include <net/pipex.h>
80 #endif
81 
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif /* MPLS */
85 
86 #if NBPFILTER > 0
87 #include <net/bpf.h>
88 #endif
89 
90 #if NPF > 0
91 #include <net/pfvar.h>
92 #endif
93 
94 #include <net/if_gre.h>
95 
96 #include <netinet/ip_gre.h>
97 #include <sys/sysctl.h>
98 
99 /* for nvgre bridge shizz */
100 #include <sys/socket.h>
101 #include <net/if_bridge.h>
102 
103 /*
104  * packet formats
105  */
106 struct gre_header {
107 	uint16_t		gre_flags;
108 #define GRE_CP				0x8000  /* Checksum Present */
109 #define GRE_KP				0x2000  /* Key Present */
110 #define GRE_SP				0x1000  /* Sequence Present */
111 
112 #define GRE_VERS_MASK			0x0007
113 #define GRE_VERS_0			0x0000
114 #define GRE_VERS_1			0x0001
115 
116 	uint16_t		gre_proto;
117 } __packed __aligned(4);
118 
119 struct gre_h_cksum {
120 	uint16_t		gre_cksum;
121 	uint16_t		gre_reserved1;
122 } __packed __aligned(4);
123 
124 struct gre_h_key {
125 	uint32_t		gre_key;
126 } __packed __aligned(4);
127 
128 #define GRE_EOIP		0x6400
129 
130 struct gre_h_key_eoip {
131 	uint16_t		eoip_len;	/* network order */
132 	uint16_t		eoip_tunnel_id;	/* little endian */
133 } __packed __aligned(4);
134 
135 #define NVGRE_VSID_RES_MIN	0x000000 /* reserved for future use */
136 #define NVGRE_VSID_RES_MAX	0x000fff
137 #define NVGRE_VSID_NVE2NVE	0xffffff /* vendor specific NVE-to-NVE comms */
138 
139 struct gre_h_seq {
140 	uint32_t		gre_seq;
141 } __packed __aligned(4);
142 
143 struct gre_h_wccp {
144 	uint8_t			wccp_flags;
145 	uint8_t			service_id;
146 	uint8_t			alt_bucket;
147 	uint8_t			pri_bucket;
148 } __packed __aligned(4);
149 
150 #define GRE_WCCP 0x883e
151 
152 #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
153 
154 /*
155  * GRE tunnel metadata
156  */
157 
158 #define GRE_KA_NONE		0
159 #define GRE_KA_DOWN		1
160 #define GRE_KA_HOLD		2
161 #define GRE_KA_UP		3
162 
163 union gre_addr {
164 	struct in_addr		in4;
165 	struct in6_addr		in6;
166 };
167 
168 static inline int
169 		gre_ip_cmp(int, const union gre_addr *,
170 		    const union gre_addr *);
171 
172 #define GRE_KEY_MIN		0x00000000U
173 #define GRE_KEY_MAX		0xffffffffU
174 #define GRE_KEY_SHIFT		0
175 
176 #define GRE_KEY_ENTROPY_MIN	0x00000000U
177 #define GRE_KEY_ENTROPY_MAX	0x00ffffffU
178 #define GRE_KEY_ENTROPY_SHIFT	8
179 
180 struct gre_tunnel {
181 	uint32_t		t_key_mask;
182 #define GRE_KEY_NONE			htonl(0x00000000U)
183 #define GRE_KEY_ENTROPY			htonl(0xffffff00U)
184 #define GRE_KEY_MASK			htonl(0xffffffffU)
185 	uint32_t		t_key;
186 
187 	u_int			t_rtableid;
188 	union gre_addr		t_src;
189 #define t_src4	t_src.in4
190 #define t_src6	t_src.in6
191 	union gre_addr		t_dst;
192 #define t_dst4	t_dst.in4
193 #define t_dst6	t_dst.in6
194 	int			t_ttl;
195 	int			t_txhprio;
196 	int			t_rxhprio;
197 	int			t_ecn;
198 	uint16_t		t_df;
199 	sa_family_t		t_af;
200 };
201 
202 static int
203 		gre_cmp_src(const struct gre_tunnel *,
204 		    const struct gre_tunnel *);
205 static int
206 		gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
207 
208 static int	gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int);
209 static int	gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
210 static int	gre_del_tunnel(struct gre_tunnel *);
211 
212 static int	gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
213 static int	gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
214 static int	gre_del_vnetid(struct gre_tunnel *);
215 
216 static int	gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *);
217 static int	gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *);
218 
219 static struct mbuf *
220 		gre_encap_dst(const struct gre_tunnel *, const union gre_addr *,
221 		    struct mbuf *, uint16_t, uint8_t, uint8_t);
222 #define gre_encap(_t, _m, _p, _ttl, _tos) \
223 		gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos))
224 
225 static struct mbuf *
226 		gre_encap_dst_ip(const struct gre_tunnel *,
227 		    const union gre_addr *, struct mbuf *, uint8_t, uint8_t);
228 #define gre_encap_ip(_t, _m, _ttl, _tos) \
229 		gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos))
230 
231 static int
232 		gre_ip_output(const struct gre_tunnel *, struct mbuf *);
233 
234 static int	gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *,
235 		    u_long, void *);
236 
237 static uint8_t	gre_l2_tos(const struct gre_tunnel *, const struct mbuf *);
238 static uint8_t	gre_l3_tos(const struct gre_tunnel *,
239 		    const struct mbuf *, uint8_t);
240 
241 /*
242  * layer 3 GRE tunnels
243  */
244 
245 struct gre_softc {
246 	struct gre_tunnel	sc_tunnel; /* must be first */
247 	TAILQ_ENTRY(gre_softc)	sc_entry;
248 
249 	struct ifnet		sc_if;
250 
251 	struct timeout		sc_ka_send;
252 	struct timeout		sc_ka_hold;
253 
254 	unsigned int		sc_ka_state;
255 	unsigned int		sc_ka_timeo;
256 	unsigned int		sc_ka_count;
257 
258 	unsigned int		sc_ka_holdmax;
259 	unsigned int		sc_ka_holdcnt;
260 
261 	SIPHASH_KEY		sc_ka_key;
262 	uint32_t		sc_ka_bias;
263 	int			sc_ka_recvtm;
264 };
265 
266 TAILQ_HEAD(gre_list, gre_softc);
267 
268 struct gre_keepalive {
269 	uint32_t		gk_uptime;
270 	uint32_t		gk_random;
271 	uint8_t			gk_digest[SIPHASH_DIGEST_LENGTH];
272 } __packed __aligned(4);
273 
274 static int	gre_clone_create(struct if_clone *, int);
275 static int	gre_clone_destroy(struct ifnet *);
276 
277 struct if_clone gre_cloner =
278     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
279 
280 /* protected by NET_LOCK */
281 struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list);
282 
283 static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
284 		    struct rtentry *);
285 static void	gre_start(struct ifnet *);
286 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
287 
288 static int	gre_up(struct gre_softc *);
289 static int	gre_down(struct gre_softc *);
290 static void	gre_link_state(struct ifnet *, unsigned int);
291 
292 static int	gre_input_key(struct mbuf **, int *, int, int, uint8_t,
293 		    struct gre_tunnel *);
294 
295 static struct mbuf *
296 		gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *,
297 		    uint8_t *, uint8_t);
298 #ifdef INET6
299 static struct mbuf *
300 		gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *,
301 		    uint8_t *, uint8_t);
302 #endif
303 #ifdef MPLS
304 static struct mbuf *
305 		gre_mpls_patch(const struct gre_tunnel *, struct mbuf *,
306 		    uint8_t *, uint8_t);
307 #endif
308 static void	gre_keepalive_send(void *);
309 static void	gre_keepalive_recv(struct ifnet *ifp, struct mbuf *);
310 static void	gre_keepalive_hold(void *);
311 
312 static struct mbuf *
313 		gre_l3_encap_dst(const struct gre_tunnel *, const void *,
314 		    struct mbuf *m, sa_family_t);
315 
316 #define gre_l3_encap(_t, _m, _af) \
317 		gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af))
318 
319 struct mgre_softc {
320 	struct gre_tunnel	sc_tunnel; /* must be first */
321 	RBT_ENTRY(mgre_softc)	sc_entry;
322 
323 	struct ifnet		sc_if;
324 };
325 
326 RBT_HEAD(mgre_tree, mgre_softc);
327 
328 static inline int
329 		mgre_cmp(const struct mgre_softc *, const struct mgre_softc *);
330 
331 RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
332 
333 static int	mgre_clone_create(struct if_clone *, int);
334 static int	mgre_clone_destroy(struct ifnet *);
335 
336 struct if_clone mgre_cloner =
337     IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy);
338 
339 static void	mgre_rtrequest(struct ifnet *, int, struct rtentry *);
340 static int	mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
341 		    struct rtentry *);
342 static void	mgre_start(struct ifnet *);
343 static int	mgre_ioctl(struct ifnet *, u_long, caddr_t);
344 
345 static int	mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *);
346 static int	mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *);
347 static int	mgre_up(struct mgre_softc *);
348 static int	mgre_down(struct mgre_softc *);
349 
350 /* protected by NET_LOCK */
351 struct mgre_tree mgre_tree = RBT_INITIALIZER();
352 
353 /*
354  * Ethernet GRE tunnels
355  */
356 
357 static struct mbuf *
358 		gre_ether_align(struct mbuf *, int);
359 
360 struct egre_softc {
361 	struct gre_tunnel	sc_tunnel; /* must be first */
362 	RBT_ENTRY(egre_softc)	sc_entry;
363 
364 	struct arpcom		sc_ac;
365 	struct ifmedia		sc_media;
366 };
367 
368 RBT_HEAD(egre_tree, egre_softc);
369 
370 static inline int
371 		egre_cmp(const struct egre_softc *, const struct egre_softc *);
372 
373 RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp);
374 
375 static int	egre_clone_create(struct if_clone *, int);
376 static int	egre_clone_destroy(struct ifnet *);
377 
378 static void	egre_start(struct ifnet *);
379 static int	egre_ioctl(struct ifnet *, u_long, caddr_t);
380 static int	egre_media_change(struct ifnet *);
381 static void	egre_media_status(struct ifnet *, struct ifmediareq *);
382 
383 static int	egre_up(struct egre_softc *);
384 static int	egre_down(struct egre_softc *);
385 
386 static int	egre_input(const struct gre_tunnel *, struct mbuf *, int,
387 		    uint8_t);
388 struct if_clone egre_cloner =
389     IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy);
390 
391 /* protected by NET_LOCK */
392 struct egre_tree egre_tree = RBT_INITIALIZER();
393 
394 /*
395  * Network Virtualisation Using Generic Routing Encapsulation (NVGRE)
396  */
397 
398 #define NVGRE_AGE_TMO		100	/* seconds */
399 
400 struct nvgre_entry {
401 	RB_ENTRY(nvgre_entry)	 nv_entry;
402 	struct ether_addr	 nv_dst;
403 	uint8_t			 nv_type;
404 #define NVGRE_ENTRY_DYNAMIC		0
405 #define NVGRE_ENTRY_STATIC		1
406 	union gre_addr		 nv_gateway;
407 	struct refcnt		 nv_refs;
408 	int			 nv_age;
409 };
410 
411 RBT_HEAD(nvgre_map, nvgre_entry);
412 
413 static inline int
414 		nvgre_entry_cmp(const struct nvgre_entry *,
415 		    const struct nvgre_entry *);
416 
417 RBT_PROTOTYPE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp);
418 
419 struct nvgre_softc {
420 	struct gre_tunnel	 sc_tunnel; /* must be first */
421 	unsigned int		 sc_ifp0;
422 	RBT_ENTRY(nvgre_softc)	 sc_uentry;
423 	RBT_ENTRY(nvgre_softc)	 sc_mentry;
424 
425 	struct arpcom		 sc_ac;
426 	struct ifmedia		 sc_media;
427 
428 	struct mbuf_queue	 sc_send_list;
429 	struct task		 sc_send_task;
430 
431 	void			*sc_inm;
432 	struct task		 sc_ltask;
433 	struct task		 sc_dtask;
434 
435 	struct rwlock		 sc_ether_lock;
436 	struct nvgre_map	 sc_ether_map;
437 	unsigned int		 sc_ether_num;
438 	unsigned int		 sc_ether_max;
439 	int			 sc_ether_tmo;
440 	struct timeout		 sc_ether_age;
441 };
442 
443 RBT_HEAD(nvgre_ucast_tree, nvgre_softc);
444 RBT_HEAD(nvgre_mcast_tree, nvgre_softc);
445 
446 static inline int
447 		nvgre_cmp_ucast(const struct nvgre_softc *,
448 		    const struct nvgre_softc *);
449 static int
450 		nvgre_cmp_mcast(const struct gre_tunnel *,
451 		    const union gre_addr *, unsigned int,
452 		    const struct gre_tunnel *, const union gre_addr *,
453 		    unsigned int);
454 static inline int
455 		nvgre_cmp_mcast_sc(const struct nvgre_softc *,
456 		    const struct nvgre_softc *);
457 
458 RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
459 RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
460 
461 static int	nvgre_clone_create(struct if_clone *, int);
462 static int	nvgre_clone_destroy(struct ifnet *);
463 
464 static void	nvgre_start(struct ifnet *);
465 static int	nvgre_ioctl(struct ifnet *, u_long, caddr_t);
466 
467 static int	nvgre_up(struct nvgre_softc *);
468 static int	nvgre_down(struct nvgre_softc *);
469 static int	nvgre_set_parent(struct nvgre_softc *, const char *);
470 static void	nvgre_link_change(void *);
471 static void	nvgre_detach(void *);
472 
473 static int	nvgre_input(const struct gre_tunnel *, struct mbuf *, int,
474 		    uint8_t);
475 static void	nvgre_send(void *);
476 
477 static int	nvgre_rtfind(struct nvgre_softc *, struct ifbaconf *);
478 static void	nvgre_flush_map(struct nvgre_softc *);
479 static void	nvgre_input_map(struct nvgre_softc *,
480 		    const struct gre_tunnel *, const struct ether_header *);
481 static void	nvgre_age(void *);
482 
483 struct if_clone nvgre_cloner =
484     IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy);
485 
486 struct pool nvgre_pool;
487 
488 /* protected by NET_LOCK */
489 struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER();
490 struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER();
491 
492 /*
493  * MikroTik Ethernet over IP protocol (eoip)
494  */
495 
496 struct eoip_softc {
497 	struct gre_tunnel	sc_tunnel; /* must be first */
498 	uint16_t		sc_tunnel_id;
499 	RBT_ENTRY(eoip_softc)	sc_entry;
500 
501 	struct arpcom		sc_ac;
502 	struct ifmedia		sc_media;
503 
504 	struct timeout		sc_ka_send;
505 	struct timeout		sc_ka_hold;
506 
507 	unsigned int		sc_ka_state;
508 	unsigned int		sc_ka_timeo;
509 	unsigned int		sc_ka_count;
510 
511 	unsigned int		sc_ka_holdmax;
512 	unsigned int		sc_ka_holdcnt;
513 };
514 
515 RBT_HEAD(eoip_tree, eoip_softc);
516 
517 static inline int
518 		eoip_cmp(const struct eoip_softc *, const struct eoip_softc *);
519 
520 RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
521 
522 static int	eoip_clone_create(struct if_clone *, int);
523 static int	eoip_clone_destroy(struct ifnet *);
524 
525 static void	eoip_start(struct ifnet *);
526 static int	eoip_ioctl(struct ifnet *, u_long, caddr_t);
527 
528 static void	eoip_keepalive_send(void *);
529 static void	eoip_keepalive_recv(struct eoip_softc *);
530 static void	eoip_keepalive_hold(void *);
531 
532 static int	eoip_up(struct eoip_softc *);
533 static int	eoip_down(struct eoip_softc *);
534 
535 static struct mbuf *
536 		eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t);
537 
538 static struct mbuf *
539 		eoip_input(struct gre_tunnel *, struct mbuf *,
540 		    const struct gre_header *, uint8_t, int);
541 struct if_clone eoip_cloner =
542     IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy);
543 
544 /* protected by NET_LOCK */
545 struct eoip_tree eoip_tree = RBT_INITIALIZER();
546 
547 /*
548  * It is not easy to calculate the right value for a GRE MTU.
549  * We leave this task to the admin and use the same default that
550  * other vendors use.
551  */
552 #define GREMTU 1476
553 
554 /*
555  * We can control the acceptance of GRE and MobileIP packets by
556  * altering the sysctl net.inet.gre.allow values
557  * respectively. Zero means drop them, all else is acceptance.  We can also
558  * control acceptance of WCCPv1-style GRE packets through the
559  * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
560  * allowed as well.
561  *
562  */
563 int gre_allow = 0;
564 int gre_wccp = 0;
565 
566 void
567 greattach(int n)
568 {
569 	if_clone_attach(&gre_cloner);
570 	if_clone_attach(&mgre_cloner);
571 	if_clone_attach(&egre_cloner);
572 	if_clone_attach(&nvgre_cloner);
573 	if_clone_attach(&eoip_cloner);
574 }
575 
576 static int
577 gre_clone_create(struct if_clone *ifc, int unit)
578 {
579 	struct gre_softc *sc;
580 	struct ifnet *ifp;
581 
582 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
583 	snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
584 	    ifc->ifc_name, unit);
585 
586 	ifp = &sc->sc_if;
587 	ifp->if_softc = sc;
588 	ifp->if_type = IFT_TUNNEL;
589 	ifp->if_hdrlen = GRE_HDRLEN;
590 	ifp->if_mtu = GREMTU;
591 	ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
592 	ifp->if_xflags = IFXF_CLONED;
593 	ifp->if_output = gre_output;
594 	ifp->if_start = gre_start;
595 	ifp->if_ioctl = gre_ioctl;
596 	ifp->if_rtrequest = p2p_rtrequest;
597 
598 	sc->sc_tunnel.t_ttl = ip_defttl;
599 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
600 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
601 	sc->sc_tunnel.t_df = htons(0);
602 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
603 
604 	timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc);
605 	timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc);
606 	sc->sc_ka_state = GRE_KA_NONE;
607 
608 	if_counters_alloc(ifp);
609 	if_attach(ifp);
610 	if_alloc_sadl(ifp);
611 
612 #if NBPFILTER > 0
613 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
614 #endif
615 
616 	ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL);
617 
618 	NET_LOCK();
619 	TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry);
620 	NET_UNLOCK();
621 
622 	return (0);
623 }
624 
625 static int
626 gre_clone_destroy(struct ifnet *ifp)
627 {
628 	struct gre_softc *sc = ifp->if_softc;
629 
630 	NET_LOCK();
631 	if (ISSET(ifp->if_flags, IFF_RUNNING))
632 		gre_down(sc);
633 
634 	TAILQ_REMOVE(&gre_list, sc, sc_entry);
635 	NET_UNLOCK();
636 
637 	if_detach(ifp);
638 
639 	free(sc, M_DEVBUF, sizeof(*sc));
640 
641 	return (0);
642 }
643 
644 static int
645 mgre_clone_create(struct if_clone *ifc, int unit)
646 {
647 	struct mgre_softc *sc;
648 	struct ifnet *ifp;
649 
650 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
651 	ifp = &sc->sc_if;
652 
653 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
654 	    "%s%d", ifc->ifc_name, unit);
655 
656 	ifp->if_softc = sc;
657 	ifp->if_type = IFT_L3IPVLAN;
658 	ifp->if_hdrlen = GRE_HDRLEN;
659 	ifp->if_mtu = GREMTU;
660 	ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX;
661 	ifp->if_xflags = IFXF_CLONED;
662 	ifp->if_rtrequest = mgre_rtrequest;
663 	ifp->if_output = mgre_output;
664 	ifp->if_start = mgre_start;
665 	ifp->if_ioctl = mgre_ioctl;
666 
667 	sc->sc_tunnel.t_ttl = ip_defttl;
668 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
669 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
670 	sc->sc_tunnel.t_df = htons(0);
671 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
672 
673 	if_counters_alloc(ifp);
674 	if_attach(ifp);
675 	if_alloc_sadl(ifp);
676 
677 #if NBPFILTER > 0
678 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
679 #endif
680 
681 	return (0);
682 }
683 
684 static int
685 mgre_clone_destroy(struct ifnet *ifp)
686 {
687 	struct mgre_softc *sc = ifp->if_softc;
688 
689 	NET_LOCK();
690 	if (ISSET(ifp->if_flags, IFF_RUNNING))
691 		mgre_down(sc);
692 	NET_UNLOCK();
693 
694 	if_detach(ifp);
695 
696 	free(sc, M_DEVBUF, sizeof(*sc));
697 
698 	return (0);
699 }
700 
701 static int
702 egre_clone_create(struct if_clone *ifc, int unit)
703 {
704 	struct egre_softc *sc;
705 	struct ifnet *ifp;
706 
707 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
708 	ifp = &sc->sc_ac.ac_if;
709 
710 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
711 	    ifc->ifc_name, unit);
712 
713 	ifp->if_softc = sc;
714 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
715 	ifp->if_ioctl = egre_ioctl;
716 	ifp->if_start = egre_start;
717 	ifp->if_xflags = IFXF_CLONED;
718 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
719 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
720 	ether_fakeaddr(ifp);
721 
722 	sc->sc_tunnel.t_ttl = ip_defttl;
723 	sc->sc_tunnel.t_txhprio = 0;
724 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
725 	sc->sc_tunnel.t_df = htons(0);
726 
727 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
728 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
729 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
730 
731 	if_counters_alloc(ifp);
732 	if_attach(ifp);
733 	ether_ifattach(ifp);
734 
735 	return (0);
736 }
737 
738 static int
739 egre_clone_destroy(struct ifnet *ifp)
740 {
741 	struct egre_softc *sc = ifp->if_softc;
742 
743 	NET_LOCK();
744 	if (ISSET(ifp->if_flags, IFF_RUNNING))
745 		egre_down(sc);
746 	NET_UNLOCK();
747 
748 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
749 	ether_ifdetach(ifp);
750 	if_detach(ifp);
751 
752 	free(sc, M_DEVBUF, sizeof(*sc));
753 
754 	return (0);
755 }
756 
757 static int
758 nvgre_clone_create(struct if_clone *ifc, int unit)
759 {
760 	struct nvgre_softc *sc;
761 	struct ifnet *ifp;
762 	struct gre_tunnel *tunnel;
763 
764 	if (nvgre_pool.pr_size == 0) {
765 		pool_init(&nvgre_pool, sizeof(struct nvgre_entry), 0,
766 		    IPL_SOFTNET, 0, "nvgren", NULL);
767 	}
768 
769 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
770 	ifp = &sc->sc_ac.ac_if;
771 
772 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
773 	    ifc->ifc_name, unit);
774 
775 	ifp->if_softc = sc;
776 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
777 	ifp->if_ioctl = nvgre_ioctl;
778 	ifp->if_start = nvgre_start;
779 	ifp->if_xflags = IFXF_CLONED;
780 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
781 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
782 	ether_fakeaddr(ifp);
783 
784 	tunnel = &sc->sc_tunnel;
785 	tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL;
786 	tunnel->t_txhprio = 0;
787 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
788 	tunnel->t_df = htons(IP_DF);
789 	tunnel->t_key_mask = GRE_KEY_ENTROPY;
790 	tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) <<
791 	    GRE_KEY_ENTROPY_SHIFT);
792 
793 	mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET);
794 	task_set(&sc->sc_send_task, nvgre_send, sc);
795 	task_set(&sc->sc_ltask, nvgre_link_change, sc);
796 	task_set(&sc->sc_dtask, nvgre_detach, sc);
797 
798 	rw_init(&sc->sc_ether_lock, "nvgrelk");
799 	RBT_INIT(nvgre_map, &sc->sc_ether_map);
800 	sc->sc_ether_num = 0;
801 	sc->sc_ether_max = 100;
802 	sc->sc_ether_tmo = 240 * hz;
803 	timeout_set_proc(&sc->sc_ether_age, nvgre_age, sc); /* ugh */
804 
805 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
806 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
807 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
808 
809 	if_counters_alloc(ifp);
810 	if_attach(ifp);
811 	ether_ifattach(ifp);
812 
813 	return (0);
814 }
815 
816 static int
817 nvgre_clone_destroy(struct ifnet *ifp)
818 {
819 	struct nvgre_softc *sc = ifp->if_softc;
820 
821 	NET_LOCK();
822 	if (ISSET(ifp->if_flags, IFF_RUNNING))
823 		nvgre_down(sc);
824 	NET_UNLOCK();
825 
826 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
827 	ether_ifdetach(ifp);
828 	if_detach(ifp);
829 
830 	free(sc, M_DEVBUF, sizeof(*sc));
831 
832 	return (0);
833 }
834 
835 static int
836 eoip_clone_create(struct if_clone *ifc, int unit)
837 {
838 	struct eoip_softc *sc;
839 	struct ifnet *ifp;
840 
841 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
842 	ifp = &sc->sc_ac.ac_if;
843 
844 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
845 	    ifc->ifc_name, unit);
846 
847 	ifp->if_softc = sc;
848 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
849 	ifp->if_ioctl = eoip_ioctl;
850 	ifp->if_start = eoip_start;
851 	ifp->if_xflags = IFXF_CLONED;
852 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
853 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
854 	ether_fakeaddr(ifp);
855 
856 	sc->sc_tunnel.t_ttl = ip_defttl;
857 	sc->sc_tunnel.t_txhprio = 0;
858 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
859 	sc->sc_tunnel.t_df = htons(0);
860 
861 	sc->sc_ka_timeo = 10;
862 	sc->sc_ka_count = 10;
863 
864 	timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc);
865 	timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc);
866 	sc->sc_ka_state = GRE_KA_DOWN;
867 
868 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
869 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
870 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
871 
872 	if_counters_alloc(ifp);
873 	if_attach(ifp);
874 	ether_ifattach(ifp);
875 
876 	return (0);
877 }
878 
879 static int
880 eoip_clone_destroy(struct ifnet *ifp)
881 {
882 	struct eoip_softc *sc = ifp->if_softc;
883 
884 	NET_LOCK();
885 	if (ISSET(ifp->if_flags, IFF_RUNNING))
886 		eoip_down(sc);
887 	NET_UNLOCK();
888 
889 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
890 	ether_ifdetach(ifp);
891 	if_detach(ifp);
892 
893 	free(sc, M_DEVBUF, sizeof(*sc));
894 
895 	return (0);
896 }
897 
898 int
899 gre_input(struct mbuf **mp, int *offp, int type, int af)
900 {
901 	struct mbuf *m = *mp;
902 	struct gre_tunnel key;
903 	struct ip *ip;
904 
905 	ip = mtod(m, struct ip *);
906 
907 	/* XXX check if ip_src is sane for nvgre? */
908 
909 	key.t_af = AF_INET;
910 	key.t_src4 = ip->ip_dst;
911 	key.t_dst4 = ip->ip_src;
912 
913 	if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1)
914 		return (rip_input(mp, offp, type, af));
915 
916 	return (IPPROTO_DONE);
917 }
918 
919 #ifdef INET6
920 int
921 gre_input6(struct mbuf **mp, int *offp, int type, int af)
922 {
923 	struct mbuf *m = *mp;
924 	struct gre_tunnel key;
925 	struct ip6_hdr *ip6;
926 	uint32_t flow;
927 
928 	ip6 = mtod(m, struct ip6_hdr *);
929 
930 	/* XXX check if ip6_src is sane for nvgre? */
931 
932 	key.t_af = AF_INET6;
933 	key.t_src6 = ip6->ip6_dst;
934 	key.t_dst6 = ip6->ip6_src;
935 
936 	flow = bemtoh32(&ip6->ip6_flow);
937 
938 	if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1)
939 		return (rip6_input(mp, offp, type, af));
940 
941 	return (IPPROTO_DONE);
942 }
943 #endif /* INET6 */
944 
945 static inline struct ifnet *
946 gre_find(const struct gre_tunnel *key)
947 {
948 	struct gre_softc *sc;
949 
950 	TAILQ_FOREACH(sc, &gre_list, sc_entry) {
951 		if (gre_cmp(key, &sc->sc_tunnel) != 0)
952 			continue;
953 
954 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
955 			continue;
956 
957 		return (&sc->sc_if);
958 	}
959 
960 	return (NULL);
961 }
962 
963 static inline struct ifnet *
964 mgre_find(const struct gre_tunnel *key)
965 {
966 	struct mgre_softc *sc;
967 
968 	NET_ASSERT_LOCKED();
969 	sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key);
970 	if (sc != NULL)
971 		return (&sc->sc_if);
972 
973 	return (NULL);
974 }
975 
976 static struct mbuf *
977 gre_input_1(struct gre_tunnel *key, struct mbuf *m,
978     const struct gre_header *gh, uint8_t otos, int iphlen)
979 {
980 	switch (gh->gre_proto) {
981 	case htons(ETHERTYPE_PPP):
982 #ifdef PIPEX
983 		if (pipex_enable) {
984 			struct pipex_session *session;
985 
986 			session = pipex_pptp_lookup_session(m);
987 			if (session != NULL &&
988 			    pipex_pptp_input(m, session) == NULL)
989 				return (NULL);
990 		}
991 #endif
992 		break;
993 	case htons(GRE_EOIP):
994 		return (eoip_input(key, m, gh, otos, iphlen));
995 		break;
996 	}
997 
998 	return (m);
999 }
1000 
1001 static int
1002 gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos,
1003     struct gre_tunnel *key)
1004 {
1005 	struct mbuf *m = *mp;
1006 	int iphlen = *offp, hlen, rxprio;
1007 	struct ifnet *ifp;
1008 	const struct gre_tunnel *tunnel;
1009 	caddr_t buf;
1010 	struct gre_header *gh;
1011 	struct gre_h_key *gkh;
1012 	void (*input)(struct ifnet *, struct mbuf *);
1013 	struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *,
1014 	    uint8_t *, uint8_t);
1015 	int bpf_af = AF_UNSPEC; /* bpf */
1016 	int mcast = 0;
1017 	uint8_t itos;
1018 
1019 	if (!gre_allow)
1020 		goto decline;
1021 
1022 	key->t_rtableid = m->m_pkthdr.ph_rtableid;
1023 
1024 	hlen = iphlen + sizeof(*gh);
1025 	if (m->m_pkthdr.len < hlen)
1026 		goto decline;
1027 
1028 	m = m_pullup(m, hlen);
1029 	if (m == NULL)
1030 		return (IPPROTO_DONE);
1031 
1032 	buf = mtod(m, caddr_t);
1033 	gh = (struct gre_header *)(buf + iphlen);
1034 
1035 	/* check the version */
1036 	switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
1037 	case htons(GRE_VERS_0):
1038 		break;
1039 
1040 	case htons(GRE_VERS_1):
1041 		m = gre_input_1(key, m, gh, otos, iphlen);
1042 		if (m == NULL)
1043 			return (IPPROTO_DONE);
1044 		/* FALLTHROUGH */
1045 	default:
1046 		goto decline;
1047 	}
1048 
1049 	/* the only optional bit in the header is K flag */
1050 	if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
1051 		goto decline;
1052 
1053 	if (gh->gre_flags & htons(GRE_KP)) {
1054 		hlen += sizeof(*gkh);
1055 		if (m->m_pkthdr.len < hlen)
1056 			goto decline;
1057 
1058 		m = m_pullup(m, hlen);
1059 		if (m == NULL)
1060 			return (IPPROTO_DONE);
1061 
1062 		buf = mtod(m, caddr_t);
1063 		gh = (struct gre_header *)(buf + iphlen);
1064 		gkh = (struct gre_h_key *)(gh + 1);
1065 
1066 		key->t_key_mask = GRE_KEY_MASK;
1067 		key->t_key = gkh->gre_key;
1068 	} else
1069 		key->t_key_mask = GRE_KEY_NONE;
1070 
1071 	if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) {
1072 		if (egre_input(key, m, hlen, otos) == -1 &&
1073 		    nvgre_input(key, m, hlen, otos) == -1)
1074 			goto decline;
1075 
1076 		return (IPPROTO_DONE);
1077 	}
1078 
1079 	ifp = gre_find(key);
1080 	if (ifp == NULL) {
1081 		ifp = mgre_find(key);
1082 		if (ifp == NULL)
1083 			goto decline;
1084 	}
1085 
1086 	switch (gh->gre_proto) {
1087 	case htons(GRE_WCCP): {
1088 		struct mbuf *n;
1089 		int off;
1090 
1091 		/* WCCP/GRE:
1092 		 *   So far as I can see (and test) it seems that Cisco's WCCP
1093 		 *   GRE tunnel is precisely a IP-in-GRE tunnel that differs
1094 		 *   only in its protocol number.  At least, it works for me.
1095 		 *
1096 		 *   The Internet Drafts can be found if you look for
1097 		 *   the following:
1098 		 *     draft-forster-wrec-wccp-v1-00.txt
1099 		 *     draft-wilson-wrec-wccp-v2-01.txt
1100 		 */
1101 
1102 		if (!gre_wccp && !ISSET(ifp->if_flags, IFF_LINK0))
1103 			goto decline;
1104 
1105 		/*
1106 		 * If the first nibble of the payload does not look like
1107 		 * IPv4, assume it is WCCP v2.
1108 		 */
1109 		n = m_getptr(m, hlen, &off);
1110 		if (n == NULL)
1111 			goto decline;
1112 		if (n->m_data[off] >> 4 != IPVERSION)
1113 			hlen += sizeof(gre_wccp);
1114 
1115 		/* FALLTHROUGH */
1116 	}
1117 	case htons(ETHERTYPE_IP):
1118 #if NBPFILTER > 0
1119 		bpf_af = AF_INET;
1120 #endif
1121 		patch = gre_ipv4_patch;
1122 		input = ipv4_input;
1123 		break;
1124 #ifdef INET6
1125 	case htons(ETHERTYPE_IPV6):
1126 #if NBPFILTER > 0
1127 		bpf_af = AF_INET6;
1128 #endif
1129 		patch = gre_ipv6_patch;
1130 		input = ipv6_input;
1131 		break;
1132 #endif
1133 #ifdef MPLS
1134 	case htons(ETHERTYPE_MPLS_MCAST):
1135 		mcast = M_MCAST|M_BCAST;
1136 		/* fallthrough */
1137 	case htons(ETHERTYPE_MPLS):
1138 #if NBPFILTER > 0
1139 		bpf_af = AF_MPLS;
1140 #endif
1141 		patch = gre_mpls_patch;
1142 		input = mpls_input;
1143 		break;
1144 #endif
1145 	case htons(0):
1146 		if (ifp->if_type != IFT_TUNNEL) {
1147 			/* keepalives dont make sense for mgre */
1148 			goto decline;
1149 		}
1150 
1151 		m_adj(m, hlen);
1152 		gre_keepalive_recv(ifp, m);
1153 		return (IPPROTO_DONE);
1154 
1155 	default:
1156 		goto decline;
1157 	}
1158 
1159 	/* it's ours now */
1160 
1161 	m_adj(m, hlen);
1162 
1163 	tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */
1164 
1165 	m = (*patch)(tunnel, m, &itos, otos);
1166 	if (m == NULL)
1167 		return (IPPROTO_DONE);
1168 
1169 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
1170 		m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
1171 		    (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY);
1172 	}
1173 
1174 	rxprio = tunnel->t_rxhprio;
1175 	switch (rxprio) {
1176 	case IF_HDRPRIO_PACKET:
1177 		/* nop */
1178 		break;
1179 	case IF_HDRPRIO_OUTER:
1180 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
1181 		break;
1182 	case IF_HDRPRIO_PAYLOAD:
1183 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
1184 		break;
1185 	default:
1186 		m->m_pkthdr.pf.prio = rxprio;
1187 		break;
1188 	}
1189 
1190 	m->m_flags &= ~(M_MCAST|M_BCAST);
1191 	m->m_flags |= mcast;
1192 	m->m_pkthdr.ph_ifidx = ifp->if_index;
1193 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
1194 
1195 #if NPF > 0
1196 	pf_pkt_addr_changed(m);
1197 #endif
1198 
1199 	counters_pkt(ifp->if_counters,
1200 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
1201 
1202 #if NBPFILTER > 0
1203 	if (ifp->if_bpf)
1204 		bpf_mtap_af(ifp->if_bpf, bpf_af, m, BPF_DIRECTION_IN);
1205 #endif
1206 
1207 	(*input)(ifp, m);
1208 	return (IPPROTO_DONE);
1209 decline:
1210 	*mp = m;
1211 	return (-1);
1212 }
1213 
1214 static struct mbuf *
1215 gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1216     uint8_t *itosp, uint8_t otos)
1217 {
1218 	struct ip *ip;
1219 	uint8_t itos;
1220 
1221 	m = m_pullup(m, sizeof(*ip));
1222 	if (m == NULL)
1223 		return (NULL);
1224 
1225 	ip = mtod(m, struct ip *);
1226 
1227 	itos = ip->ip_tos;
1228 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1229 		m_freem(m);
1230 		return (NULL);
1231 	}
1232 	if (itos != ip->ip_tos)
1233 		ip_tos_patch(ip, itos);
1234 
1235 	*itosp = itos;
1236 
1237 	return (m);
1238 }
1239 
1240 #ifdef INET6
1241 static struct mbuf *
1242 gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1243     uint8_t *itosp, uint8_t otos)
1244 {
1245 	struct ip6_hdr *ip6;
1246 	uint32_t flow;
1247 	uint8_t itos;
1248 
1249 	m = m_pullup(m, sizeof(*ip6));
1250 	if (m == NULL)
1251 		return (NULL);
1252 
1253 	ip6 = mtod(m, struct ip6_hdr *);
1254 
1255 	flow = bemtoh32(&ip6->ip6_flow);
1256 	itos = flow >> 20;
1257 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1258 		m_freem(m);
1259 		return (NULL);
1260 	}
1261 
1262 	CLR(flow, 0xff << 20);
1263 	SET(flow, itos << 20);
1264 	htobem32(&ip6->ip6_flow, flow);
1265 
1266 	*itosp = itos;
1267 
1268 	return (m);
1269 }
1270 #endif
1271 
1272 #ifdef MPLS
1273 static struct mbuf *
1274 gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1275     uint8_t *itosp, uint8_t otos)
1276 {
1277 	uint8_t itos;
1278 	uint32_t shim;
1279 
1280 	m = m_pullup(m, sizeof(shim));
1281 	if (m == NULL)
1282 		return (NULL);
1283 
1284 	shim = *mtod(m, uint32_t *);
1285 	itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5;
1286 
1287 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1288 		m_freem(m);
1289 		return (NULL);
1290 	}
1291 
1292 	*itosp = itos;
1293 
1294 	return (m);
1295 }
1296 #endif
1297 
1298 #define gre_l2_prio(_t, _m, _otos) do {					\
1299 	int rxprio = (_t)->t_rxhprio;					\
1300 	switch (rxprio) {						\
1301 	case IF_HDRPRIO_PACKET:						\
1302 		/* nop */						\
1303 		break;							\
1304 	case IF_HDRPRIO_OUTER:						\
1305 		(_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos));		\
1306 		break;							\
1307 	default:							\
1308 		(_m)->m_pkthdr.pf.prio = rxprio;			\
1309 		break;							\
1310 	}								\
1311 } while (0)
1312 
1313 static int
1314 egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos)
1315 {
1316 	struct egre_softc *sc;
1317 
1318 	NET_ASSERT_LOCKED();
1319 	sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key);
1320 	if (sc == NULL)
1321 		return (-1);
1322 
1323 	/* it's ours now */
1324 	m = gre_ether_align(m, hlen);
1325 	if (m == NULL)
1326 		return (0);
1327 
1328 	if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
1329 		m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
1330 		    (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY);
1331 	}
1332 
1333 	m->m_flags &= ~(M_MCAST|M_BCAST);
1334 
1335 #if NPF > 0
1336 	pf_pkt_addr_changed(m);
1337 #endif
1338 
1339 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1340 
1341 	if_vinput(&sc->sc_ac.ac_if, m);
1342 
1343 	return (0);
1344 }
1345 
1346 static int
1347 nvgre_rtfind(struct nvgre_softc *sc, struct ifbaconf *baconf)
1348 {
1349 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1350 	struct nvgre_entry *nv;
1351 	struct ifbareq bareq;
1352 	caddr_t uaddr, end;
1353 	int error;
1354 	int age;
1355 
1356 	if (baconf->ifbac_len == 0) {
1357 		/* single read is atomic */
1358 		baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq);
1359 		return (0);
1360 	}
1361 
1362 	uaddr = baconf->ifbac_buf;
1363 	end = uaddr + baconf->ifbac_len;
1364 
1365 	rw_enter_read(&sc->sc_ether_lock);
1366 	RBT_FOREACH(nv, nvgre_map, &sc->sc_ether_map) {
1367 		if (uaddr >= end)
1368 			break;
1369 
1370 		memcpy(bareq.ifba_name, ifp->if_xname,
1371 		    sizeof(bareq.ifba_name));
1372 		memcpy(bareq.ifba_ifsname, ifp->if_xname,
1373 		    sizeof(bareq.ifba_ifsname));
1374 		memcpy(&bareq.ifba_dst, &nv->nv_dst,
1375 		    sizeof(bareq.ifba_dst));
1376 
1377 		memset(&bareq.ifba_dstsa, 0, sizeof(bareq.ifba_dstsa));
1378 		switch (sc->sc_tunnel.t_af) {
1379 		case AF_INET: {
1380 			struct sockaddr_in *sin;
1381 
1382 			sin = (struct sockaddr_in *)&bareq.ifba_dstsa;
1383 			sin->sin_len = sizeof(*sin);
1384 			sin->sin_family = AF_INET;
1385 			sin->sin_addr = nv->nv_gateway.in4;
1386 
1387 			break;
1388 		}
1389 #ifdef INET6
1390 		case AF_INET6: {
1391 			struct sockaddr_in6 *sin6;
1392 
1393 			sin6 = (struct sockaddr_in6 *)&bareq.ifba_dstsa;
1394 			sin6->sin6_len = sizeof(*sin6);
1395 			sin6->sin6_family = AF_INET6;
1396 			sin6->sin6_addr = nv->nv_gateway.in6;
1397 
1398 			break;
1399 		}
1400 #endif /* INET6 */
1401 		default:
1402 			unhandled_af(sc->sc_tunnel.t_af);
1403 		}
1404 
1405 		switch (nv->nv_type) {
1406 		case NVGRE_ENTRY_DYNAMIC:
1407 			age = (ticks - nv->nv_age) / hz;
1408 			bareq.ifba_age = MIN(age, 0xff);
1409 			bareq.ifba_flags = IFBAF_DYNAMIC;
1410 			break;
1411 		case NVGRE_ENTRY_STATIC:
1412 			bareq.ifba_age = 0;
1413 			bareq.ifba_flags = IFBAF_STATIC;
1414 			break;
1415 		}
1416 
1417 		error = copyout(&bareq, uaddr, sizeof(bareq));
1418 		if (error != 0) {
1419 			rw_exit_read(&sc->sc_ether_lock);
1420 			return (error);
1421 		}
1422 
1423 		uaddr += sizeof(bareq);
1424 	}
1425 	baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq);
1426 	rw_exit_read(&sc->sc_ether_lock);
1427 
1428 	return (0);
1429 }
1430 
1431 static void
1432 nvgre_flush_map(struct nvgre_softc *sc)
1433 {
1434 	struct nvgre_map map;
1435 	struct nvgre_entry *nv, *nnv;
1436 
1437 	rw_enter_write(&sc->sc_ether_lock);
1438 	map = sc->sc_ether_map;
1439 	RBT_INIT(nvgre_map, &sc->sc_ether_map);
1440 	sc->sc_ether_num = 0;
1441 	rw_exit_write(&sc->sc_ether_lock);
1442 
1443 	RBT_FOREACH_SAFE(nv, nvgre_map, &map, nnv) {
1444 		RBT_REMOVE(nvgre_map, &map, nv);
1445 		if (refcnt_rele(&nv->nv_refs))
1446 			pool_put(&nvgre_pool, nv);
1447 	}
1448 }
1449 
1450 static void
1451 nvgre_input_map(struct nvgre_softc *sc, const struct gre_tunnel *key,
1452     const struct ether_header *eh)
1453 {
1454 	struct nvgre_entry *nv, nkey;
1455 	int new = 0;
1456 
1457 	if (ETHER_IS_BROADCAST(eh->ether_shost) ||
1458 	    ETHER_IS_MULTICAST(eh->ether_shost))
1459 		return;
1460 
1461 	memcpy(&nkey.nv_dst, eh->ether_shost, ETHER_ADDR_LEN);
1462 
1463 	/* remember where it came from */
1464 	rw_enter_read(&sc->sc_ether_lock);
1465 	nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &nkey);
1466 	if (nv == NULL)
1467 		new = 1;
1468 	else {
1469 		nv->nv_age = ticks;
1470 
1471 		if (nv->nv_type != NVGRE_ENTRY_DYNAMIC ||
1472 		    gre_ip_cmp(key->t_af, &key->t_dst, &nv->nv_gateway))
1473 			nv = NULL;
1474 		else
1475 			refcnt_take(&nv->nv_refs);
1476 	}
1477 	rw_exit_read(&sc->sc_ether_lock);
1478 
1479 	if (new) {
1480 		struct nvgre_entry *onv;
1481 		unsigned int num;
1482 
1483 		nv = pool_get(&nvgre_pool, PR_NOWAIT);
1484 		if (nv == NULL) {
1485 			/* oh well */
1486 			return;
1487 		}
1488 
1489 		memcpy(&nv->nv_dst, eh->ether_shost, ETHER_ADDR_LEN);
1490 		nv->nv_type = NVGRE_ENTRY_DYNAMIC;
1491 		nv->nv_gateway = key->t_dst;
1492 		refcnt_init(&nv->nv_refs);
1493 		nv->nv_age = ticks;
1494 
1495 		rw_enter_write(&sc->sc_ether_lock);
1496 		num = sc->sc_ether_num;
1497 		if (++num > sc->sc_ether_max)
1498 			onv = nv;
1499 		else {
1500 			/* try to give the ref to the map */
1501 			onv = RBT_INSERT(nvgre_map, &sc->sc_ether_map, nv);
1502 			if (onv == NULL) {
1503 				/* count the successful insert */
1504 				sc->sc_ether_num = num;
1505 			}
1506 		}
1507 		rw_exit_write(&sc->sc_ether_lock);
1508 
1509 		if (onv != NULL)
1510 			pool_put(&nvgre_pool, nv);
1511 	} else if (nv != NULL) {
1512 		rw_enter_write(&sc->sc_ether_lock);
1513 		nv->nv_gateway = key->t_dst;
1514 		rw_exit_write(&sc->sc_ether_lock);
1515 
1516 		if (refcnt_rele(&nv->nv_refs)) {
1517 			/* ioctl may have deleted the entry */
1518 			pool_put(&nvgre_pool, nv);
1519 		}
1520 	}
1521 }
1522 
1523 static inline struct nvgre_softc *
1524 nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx)
1525 {
1526 	struct nvgre_softc *sc;
1527 	int rv;
1528 
1529 	/*
1530 	 * building an nvgre_softc to use with RBT_FIND is expensive, and
1531 	 * would need to swap the src and dst addresses in the key. so do the
1532 	 * find by hand.
1533 	 */
1534 
1535 	NET_ASSERT_LOCKED();
1536 	sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree);
1537 	while (sc != NULL) {
1538 		rv = nvgre_cmp_mcast(key, &key->t_src, if0idx,
1539 		    &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0);
1540 		if (rv == 0)
1541 			return (sc);
1542 		if (rv < 0)
1543 			sc = RBT_LEFT(nvgre_mcast_tree, sc);
1544 		else
1545 			sc = RBT_RIGHT(nvgre_mcast_tree, sc);
1546 	}
1547 
1548 	return (NULL);
1549 }
1550 
1551 static inline struct nvgre_softc *
1552 nvgre_ucast_find(const struct gre_tunnel *key)
1553 {
1554 	NET_ASSERT_LOCKED();
1555 	return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree,
1556 	    (struct nvgre_softc *)key));
1557 }
1558 
1559 static int
1560 nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen,
1561     uint8_t otos)
1562 {
1563 	struct nvgre_softc *sc;
1564 
1565 	if (ISSET(m->m_flags, M_MCAST|M_BCAST))
1566 		sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx);
1567 	else
1568 		sc = nvgre_ucast_find(key);
1569 
1570 	if (sc == NULL)
1571 		return (-1);
1572 
1573 	/* it's ours now */
1574 	m = gre_ether_align(m, hlen);
1575 	if (m == NULL)
1576 		return (0);
1577 
1578 	nvgre_input_map(sc, key, mtod(m, struct ether_header *));
1579 
1580 	m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
1581 	    (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY);
1582 
1583 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1584 
1585 	m->m_flags &= ~(M_MCAST|M_BCAST);
1586 
1587 #if NPF > 0
1588 	pf_pkt_addr_changed(m);
1589 #endif
1590 
1591 	if_vinput(&sc->sc_ac.ac_if, m);
1592 
1593 	return (0);
1594 }
1595 
1596 static struct mbuf *
1597 gre_ether_align(struct mbuf *m, int hlen)
1598 {
1599 	struct mbuf *n;
1600 	int off;
1601 
1602 	m_adj(m, hlen);
1603 
1604 	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
1605 		m_freem(m);
1606 		return (NULL);
1607 	}
1608 
1609 	m = m_pullup(m, sizeof(struct ether_header));
1610 	if (m == NULL)
1611 		return (NULL);
1612 
1613 	n = m_getptr(m, sizeof(struct ether_header), &off);
1614 	if (n == NULL) {
1615 		m_freem(m);
1616 		return (NULL);
1617 	}
1618 
1619 	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
1620 		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
1621 		m_freem(m);
1622 		if (n == NULL)
1623 			return (NULL);
1624 		m = n;
1625 	}
1626 
1627 	return (m);
1628 }
1629 
1630 static void
1631 gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m)
1632 {
1633 	struct gre_softc *sc = ifp->if_softc;
1634 	struct gre_keepalive *gk;
1635 	SIPHASH_CTX ctx;
1636 	uint8_t digest[SIPHASH_DIGEST_LENGTH];
1637 	int uptime, delta;
1638 	int tick = ticks;
1639 
1640 	if (sc->sc_ka_state == GRE_KA_NONE ||
1641 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
1642 		goto drop;
1643 
1644 	if (m->m_pkthdr.len < sizeof(*gk))
1645 		goto drop;
1646 	m = m_pullup(m, sizeof(*gk));
1647 	if (m == NULL)
1648 		return;
1649 
1650 	gk = mtod(m, struct gre_keepalive *);
1651 	uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias;
1652 	delta = tick - uptime;
1653 	if (delta < 0)
1654 		goto drop;
1655 	if (delta > hz * 10) /* magic */
1656 		goto drop;
1657 
1658 	/* avoid too much siphash work */
1659 	delta = tick - sc->sc_ka_recvtm;
1660 	if (delta > 0 && delta < (hz / 10))
1661 		goto drop;
1662 
1663 	SipHash24_Init(&ctx, &sc->sc_ka_key);
1664 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
1665 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
1666 	SipHash24_Final(digest, &ctx);
1667 
1668 	if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0)
1669 		goto drop;
1670 
1671 	sc->sc_ka_recvtm = tick;
1672 
1673 	switch (sc->sc_ka_state) {
1674 	case GRE_KA_DOWN:
1675 		sc->sc_ka_state = GRE_KA_HOLD;
1676 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
1677 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
1678 		    16 * sc->sc_ka_count);
1679 		break;
1680 	case GRE_KA_HOLD:
1681 		if (--sc->sc_ka_holdcnt > 0)
1682 			break;
1683 
1684 		sc->sc_ka_state = GRE_KA_UP;
1685 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
1686 		break;
1687 
1688 	case GRE_KA_UP:
1689 		sc->sc_ka_holdmax--;
1690 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
1691 		break;
1692 	}
1693 
1694 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
1695 
1696 drop:
1697 	m_freem(m);
1698 }
1699 
1700 static int
1701 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1702     struct rtentry *rt)
1703 {
1704 	struct m_tag *mtag;
1705 	int error = 0;
1706 
1707 	if (!gre_allow) {
1708 		error = EACCES;
1709 		goto drop;
1710 	}
1711 
1712 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1713 		error = ENETDOWN;
1714 		goto drop;
1715 	}
1716 
1717 	switch (dst->sa_family) {
1718 	case AF_INET:
1719 #ifdef INET6
1720 	case AF_INET6:
1721 #endif
1722 #ifdef MPLS
1723 	case AF_MPLS:
1724 #endif
1725 		break;
1726 	default:
1727 		error = EAFNOSUPPORT;
1728 		goto drop;
1729 	}
1730 
1731 	/* Try to limit infinite recursion through misconfiguration. */
1732 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1733 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1734 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1735 		    sizeof(ifp->if_index)) == 0) {
1736 			m_freem(m);
1737 			error = EIO;
1738 			goto end;
1739 		}
1740 	}
1741 
1742 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1743 	if (mtag == NULL) {
1744 		m_freem(m);
1745 		error = ENOBUFS;
1746 		goto end;
1747 	}
1748 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1749 	m_tag_prepend(m, mtag);
1750 
1751 	m->m_pkthdr.ph_family = dst->sa_family;
1752 
1753 	error = if_enqueue(ifp, m);
1754 end:
1755 	if (error)
1756 		ifp->if_oerrors++;
1757 	return (error);
1758 
1759 drop:
1760 	m_freem(m);
1761 	return (error);
1762 }
1763 
1764 void
1765 gre_start(struct ifnet *ifp)
1766 {
1767 	struct gre_softc *sc = ifp->if_softc;
1768 	struct mbuf *m;
1769 	int af;
1770 #if NBPFILTER > 0
1771 	caddr_t if_bpf;
1772 #endif
1773 
1774 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1775 		af = m->m_pkthdr.ph_family;
1776 
1777 #if NBPFILTER > 0
1778 		if_bpf = ifp->if_bpf;
1779 		if (if_bpf)
1780 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
1781 #endif
1782 
1783 		m = gre_l3_encap(&sc->sc_tunnel, m, af);
1784 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1785 			ifp->if_oerrors++;
1786 			continue;
1787 		}
1788 	}
1789 }
1790 
1791 void
1792 mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1793 {
1794 	struct ifnet *lo0ifp;
1795 	struct ifaddr *ifa, *lo0ifa;
1796 
1797 	switch (req) {
1798 	case RTM_ADD:
1799 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1800 			break;
1801 
1802 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1803 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1804 			    rt_key(rt)->sa_len) == 0)
1805 				break;
1806 		}
1807 
1808 		if (ifa == NULL)
1809 			break;
1810 
1811 		KASSERT(ifa == rt->rt_ifa);
1812 
1813 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1814 		KASSERT(lo0ifp != NULL);
1815 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1816 			if (lo0ifa->ifa_addr->sa_family ==
1817 			    ifa->ifa_addr->sa_family)
1818 				break;
1819 		}
1820 		if_put(lo0ifp);
1821 
1822 		if (lo0ifa == NULL)
1823 			break;
1824 
1825 		rt->rt_flags &= ~RTF_LLINFO;
1826 		break;
1827 	case RTM_DELETE:
1828 	case RTM_RESOLVE:
1829 	default:
1830 		break;
1831 	}
1832 }
1833 
1834 static int
1835 mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest,
1836     struct rtentry *rt0)
1837 {
1838 	struct mgre_softc *sc = ifp->if_softc;
1839 	struct sockaddr *gate;
1840 	struct rtentry *rt;
1841 	struct m_tag *mtag;
1842 	int error = 0;
1843 	sa_family_t af;
1844 	const void *addr;
1845 
1846 	if (!gre_allow) {
1847 		error = EACCES;
1848 		goto drop;
1849 	}
1850 
1851 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1852 		error = ENETDOWN;
1853 		goto drop;
1854 	}
1855 
1856 	switch (dest->sa_family) {
1857 	case AF_INET:
1858 #ifdef INET6
1859 	case AF_INET6:
1860 #endif
1861 #ifdef MPLS
1862 	case AF_MPLS:
1863 #endif
1864 		break;
1865 	default:
1866 		error = EAFNOSUPPORT;
1867 		goto drop;
1868 	}
1869 
1870 	if (ISSET(m->m_flags, M_MCAST|M_BCAST)) {
1871 		error = ENETUNREACH;
1872 		goto drop;
1873 	}
1874 
1875 	rt = rt_getll(rt0);
1876 
1877 	/* chech rt_expire? */
1878 	if (ISSET(rt->rt_flags, RTF_REJECT)) {
1879 		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
1880 		goto drop;
1881 	}
1882 	if (!ISSET(rt->rt_flags, RTF_HOST)) {
1883 		error = EHOSTUNREACH;
1884 		goto drop;
1885 	}
1886 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1887 		error = EINVAL;
1888 		goto drop;
1889 	}
1890 
1891 	gate = rt->rt_gateway;
1892 	af = gate->sa_family;
1893 	if (af != sc->sc_tunnel.t_af) {
1894 		error = EAGAIN;
1895 		goto drop;
1896 	}
1897 
1898 	/* Try to limit infinite recursion through misconfiguration. */
1899 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1900 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1901 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1902 		    sizeof(ifp->if_index)) == 0) {
1903 			error = EIO;
1904 			goto drop;
1905 		}
1906 	}
1907 
1908 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1909 	if (mtag == NULL) {
1910 		error = ENOBUFS;
1911 		goto drop;
1912 	}
1913 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1914 	m_tag_prepend(m, mtag);
1915 
1916 	switch (af) {
1917 	case AF_INET: {
1918 		struct sockaddr_in *sin = (struct sockaddr_in *)gate;
1919 		addr = &sin->sin_addr;
1920 		break;
1921 	}
1922 #ifdef INET6
1923 	case AF_INET6: {
1924 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate;
1925 		addr = &sin6->sin6_addr;
1926 		break;
1927 	}
1928  #endif
1929 	default:
1930 		unhandled_af(af);
1931 		/* NOTREACHED */
1932 	}
1933 
1934 	m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family);
1935 	if (m == NULL) {
1936 		ifp->if_oerrors++;
1937 		return (ENOBUFS);
1938 	}
1939 
1940 	m->m_pkthdr.ph_family = dest->sa_family;
1941 
1942 	error = if_enqueue(ifp, m);
1943 	if (error)
1944 		ifp->if_oerrors++;
1945 	return (error);
1946 
1947 drop:
1948 	m_freem(m);
1949 	return (error);
1950 }
1951 
1952 static void
1953 mgre_start(struct ifnet *ifp)
1954 {
1955 	struct mgre_softc *sc = ifp->if_softc;
1956 	struct mbuf *m;
1957 #if NBPFILTER > 0
1958 	caddr_t if_bpf;
1959 #endif
1960 
1961 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1962 #if NBPFILTER > 0
1963 		if_bpf = ifp->if_bpf;
1964 		if (if_bpf) {
1965 			struct m_hdr mh;
1966 			struct mbuf *n;
1967 			int off;
1968 
1969 			n = m_getptr(m, ifp->if_hdrlen, &off);
1970 			KASSERT(n != NULL);
1971 
1972 			mh.mh_flags = 0;
1973 			mh.mh_next = n->m_next;
1974 			mh.mh_len = n->m_len - off;
1975 			mh.mh_data = n->m_data + off;
1976 
1977 			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
1978 			    (struct mbuf *)&mh, BPF_DIRECTION_OUT);
1979 		}
1980 #endif
1981 
1982 		if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
1983 			ifp->if_oerrors++;
1984 			continue;
1985 		}
1986 	}
1987 }
1988 
1989 static void
1990 egre_start(struct ifnet *ifp)
1991 {
1992 	struct egre_softc *sc = ifp->if_softc;
1993 	struct mbuf *m0, *m;
1994 #if NBPFILTER > 0
1995 	caddr_t if_bpf;
1996 #endif
1997 
1998 	if (!gre_allow) {
1999 		ifq_purge(&ifp->if_snd);
2000 		return;
2001 	}
2002 
2003 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
2004 #if NBPFILTER > 0
2005 		if_bpf = ifp->if_bpf;
2006 		if (if_bpf)
2007 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
2008 #endif
2009 
2010 		/* force prepend mbuf because of alignment problems */
2011 		m = m_get(M_DONTWAIT, m0->m_type);
2012 		if (m == NULL) {
2013 			m_freem(m0);
2014 			continue;
2015 		}
2016 
2017 		M_MOVE_PKTHDR(m, m0);
2018 		m->m_next = m0;
2019 
2020 		m_align(m, 0);
2021 		m->m_len = 0;
2022 
2023 		m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER),
2024 		    sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m));
2025 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
2026 			ifp->if_oerrors++;
2027 			continue;
2028 		}
2029 	}
2030 }
2031 
2032 static struct mbuf *
2033 gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst,
2034     struct mbuf *m, sa_family_t af)
2035 {
2036 	uint16_t proto;
2037 	uint8_t ttl, itos, otos;
2038 	int tttl = tunnel->t_ttl;
2039 	int ttloff;
2040 
2041 	switch (af) {
2042 	case AF_INET: {
2043 		struct ip *ip;
2044 
2045 		m = m_pullup(m, sizeof(*ip));
2046 		if (m == NULL)
2047 			return (NULL);
2048 
2049 		ip = mtod(m, struct ip *);
2050 		itos = ip->ip_tos;
2051 
2052 		ttloff = offsetof(struct ip, ip_ttl);
2053 		proto = htons(ETHERTYPE_IP);
2054 		break;
2055 	}
2056 #ifdef INET6
2057 	case AF_INET6: {
2058 		struct ip6_hdr *ip6;
2059 
2060 		m = m_pullup(m, sizeof(*ip6));
2061 		if (m == NULL)
2062 			return (NULL);
2063 
2064 		ip6 = mtod(m, struct ip6_hdr *);
2065 		itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20;
2066 
2067 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
2068 		proto = htons(ETHERTYPE_IPV6);
2069 		break;
2070 	}
2071  #endif
2072 #ifdef MPLS
2073 	case AF_MPLS: {
2074 		uint32_t shim;
2075 
2076 		m = m_pullup(m, sizeof(shim));
2077 		if (m == NULL)
2078 			return (NULL);
2079 
2080 		shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK;
2081 		itos = (shim >> MPLS_EXP_OFFSET) << 5;
2082 
2083 		ttloff = 3;
2084 
2085 		if (m->m_flags & (M_BCAST | M_MCAST))
2086 			proto = htons(ETHERTYPE_MPLS_MCAST);
2087 		else
2088 			proto = htons(ETHERTYPE_MPLS);
2089 		break;
2090 	}
2091 #endif
2092 	default:
2093 		unhandled_af(af);
2094 	}
2095 
2096 	if (tttl == -1) {
2097 		KASSERT(m->m_len > ttloff); /* m_pullup has happened */
2098 
2099 		ttl = *(m->m_data + ttloff);
2100 	} else
2101 		ttl = tttl;
2102 
2103 	itos = gre_l3_tos(tunnel, m, itos);
2104 	ip_ecn_ingress(tunnel->t_ecn, &otos, &itos);
2105 
2106 	return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos));
2107 }
2108 
2109 static struct mbuf *
2110 gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst,
2111     struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos)
2112 {
2113 	struct gre_header *gh;
2114 	struct gre_h_key *gkh;
2115 	int hlen;
2116 
2117 	hlen = sizeof(*gh);
2118 	if (tunnel->t_key_mask != GRE_KEY_NONE)
2119 		hlen += sizeof(*gkh);
2120 
2121 	m = m_prepend(m, hlen, M_DONTWAIT);
2122 	if (m == NULL)
2123 		return (NULL);
2124 
2125 	gh = mtod(m, struct gre_header *);
2126 	gh->gre_flags = GRE_VERS_0;
2127 	gh->gre_proto = proto;
2128 	if (tunnel->t_key_mask != GRE_KEY_NONE) {
2129 		gh->gre_flags |= htons(GRE_KP);
2130 
2131 		gkh = (struct gre_h_key *)(gh + 1);
2132 		gkh->gre_key = tunnel->t_key;
2133 
2134 		if (tunnel->t_key_mask == GRE_KEY_ENTROPY &&
2135 		    ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) {
2136 			gkh->gre_key |= htonl(~GRE_KEY_ENTROPY &
2137 			    (m->m_pkthdr.ph_flowid & M_FLOWID_MASK));
2138 		}
2139 	}
2140 
2141 	return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos));
2142 }
2143 
2144 static struct mbuf *
2145 gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst,
2146     struct mbuf *m, uint8_t ttl, uint8_t tos)
2147 {
2148 	switch (tunnel->t_af) {
2149 	case AF_UNSPEC:
2150 		/* packets may arrive before tunnel is set up */
2151 		m_freem(m);
2152 		return (NULL);
2153 	case AF_INET: {
2154 		struct ip *ip;
2155 
2156 		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
2157 		if (m == NULL)
2158 			return (NULL);
2159 
2160 		ip = mtod(m, struct ip *);
2161 		ip->ip_v = IPVERSION;
2162 		ip->ip_hl = sizeof(*ip) >> 2;
2163 		ip->ip_off = tunnel->t_df;
2164 		ip->ip_tos = tos;
2165 		ip->ip_len = htons(m->m_pkthdr.len);
2166 		ip->ip_ttl = ttl;
2167 		ip->ip_p = IPPROTO_GRE;
2168 		ip->ip_src = tunnel->t_src4;
2169 		ip->ip_dst = dst->in4;
2170 		break;
2171 	}
2172 #ifdef INET6
2173 	case AF_INET6: {
2174 		struct ip6_hdr *ip6;
2175 		int len = m->m_pkthdr.len;
2176 
2177 		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
2178 		if (m == NULL)
2179 			return (NULL);
2180 
2181 		ip6 = mtod(m, struct ip6_hdr *);
2182 		ip6->ip6_flow = ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID) ?
2183 		    htonl(m->m_pkthdr.ph_flowid & M_FLOWID_MASK) : 0;
2184 		ip6->ip6_vfc |= IPV6_VERSION;
2185 		ip6->ip6_flow |= htonl((uint32_t)tos << 20);
2186 		ip6->ip6_plen = htons(len);
2187 		ip6->ip6_nxt = IPPROTO_GRE;
2188 		ip6->ip6_hlim = ttl;
2189 		ip6->ip6_src = tunnel->t_src6;
2190 		ip6->ip6_dst = dst->in6;
2191 
2192 		if (tunnel->t_df)
2193 			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
2194 
2195 		break;
2196 	}
2197 #endif /* INET6 */
2198 	default:
2199 		unhandled_af(tunnel->t_af);
2200 	}
2201 
2202 	return (m);
2203 }
2204 
2205 static int
2206 gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m)
2207 {
2208 	m->m_flags &= ~(M_BCAST|M_MCAST);
2209 	m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
2210 
2211 #if NPF > 0
2212 	pf_pkt_addr_changed(m);
2213 #endif
2214 
2215 	switch (tunnel->t_af) {
2216 	case AF_INET:
2217 		ip_send(m);
2218 		break;
2219 #ifdef INET6
2220 	case AF_INET6:
2221 		ip6_send(m);
2222 		break;
2223 #endif
2224 	default:
2225 		unhandled_af(tunnel->t_af);
2226 	}
2227 
2228 	return (0);
2229 }
2230 
2231 static int
2232 gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel,
2233     u_long cmd, void *data)
2234 {
2235 	struct ifreq *ifr = (struct ifreq *)data;
2236 	int error = 0;
2237 
2238 	switch(cmd) {
2239 	case SIOCSIFMTU:
2240 		if (ifr->ifr_mtu < 576) {
2241 			error = EINVAL;
2242 			break;
2243 		}
2244 		ifp->if_mtu = ifr->ifr_mtu;
2245 		break;
2246 	case SIOCADDMULTI:
2247 	case SIOCDELMULTI:
2248 		break;
2249 
2250 	case SIOCSVNETID:
2251 		error = gre_set_vnetid(tunnel, ifr);
2252 		break;
2253 
2254 	case SIOCGVNETID:
2255 		error = gre_get_vnetid(tunnel, ifr);
2256 		break;
2257 	case SIOCDVNETID:
2258 		error = gre_del_vnetid(tunnel);
2259 		break;
2260 
2261 	case SIOCSVNETFLOWID:
2262 		error = gre_set_vnetflowid(tunnel, ifr);
2263 		break;
2264 
2265 	case SIOCGVNETFLOWID:
2266 		error = gre_get_vnetflowid(tunnel, ifr);
2267 		break;
2268 
2269 	case SIOCSLIFPHYADDR:
2270 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1);
2271 		break;
2272 	case SIOCGLIFPHYADDR:
2273 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2274 		break;
2275 	case SIOCDIFPHYADDR:
2276 		error = gre_del_tunnel(tunnel);
2277 		break;
2278 
2279 	case SIOCSLIFPHYRTABLE:
2280 		if (ifr->ifr_rdomainid < 0 ||
2281 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2282 		    !rtable_exists(ifr->ifr_rdomainid)) {
2283 			error = EINVAL;
2284 			break;
2285 		}
2286 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2287 		break;
2288 	case SIOCGLIFPHYRTABLE:
2289 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2290 		break;
2291 
2292 	case SIOCSLIFPHYDF:
2293 		/* commit */
2294 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2295 		break;
2296 	case SIOCGLIFPHYDF:
2297 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2298 		break;
2299 
2300 	default:
2301 		error = ENOTTY;
2302 		break;
2303 	}
2304 
2305 	return (error);
2306 }
2307 
2308 static uint8_t
2309 gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m)
2310 {
2311 	uint8_t prio;
2312 
2313 	switch (t->t_txhprio) {
2314 	case IF_HDRPRIO_PACKET:
2315 		prio = m->m_pkthdr.pf.prio;
2316 		break;
2317 	default:
2318 		prio = t->t_txhprio;
2319 		break;
2320 	}
2321 
2322 	return (IFQ_PRIO2TOS(prio));
2323 }
2324 
2325 static uint8_t
2326 gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos)
2327 {
2328 	uint8_t prio;
2329 
2330 	switch (t->t_txhprio) {
2331 	case IF_HDRPRIO_PAYLOAD:
2332 		return (tos);
2333 	case IF_HDRPRIO_PACKET:
2334 		prio = m->m_pkthdr.pf.prio;
2335 		break;
2336 	default:
2337 		prio = t->t_txhprio;
2338 		break;
2339 	}
2340 
2341 	return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK));
2342 }
2343 
2344 static int
2345 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2346 {
2347 	struct gre_softc *sc = ifp->if_softc;
2348 	struct ifreq *ifr = (struct ifreq *)data;
2349 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2350 	int error = 0;
2351 
2352 	switch(cmd) {
2353 	case SIOCSIFADDR:
2354 		ifp->if_flags |= IFF_UP;
2355 		/* FALLTHROUGH */
2356 	case SIOCSIFFLAGS:
2357 		if (ISSET(ifp->if_flags, IFF_UP)) {
2358 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2359 				error = gre_up(sc);
2360 			else
2361 				error = 0;
2362 		} else {
2363 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2364 				error = gre_down(sc);
2365 		}
2366 		break;
2367 	case SIOCSIFRDOMAIN:
2368 		/* let if_rdomain do its thing */
2369 		error = ENOTTY;
2370 		break;
2371 
2372 	case SIOCSETKALIVE:
2373 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2374 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 ||
2375 		    (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0))
2376 			return (EINVAL);
2377 
2378 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2379 			sc->sc_ka_count = 0;
2380 			sc->sc_ka_timeo = 0;
2381 			sc->sc_ka_state = GRE_KA_NONE;
2382 		} else {
2383 			sc->sc_ka_count = ikar->ikar_cnt;
2384 			sc->sc_ka_timeo = ikar->ikar_timeo;
2385 			sc->sc_ka_state = GRE_KA_DOWN;
2386 
2387 			arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key));
2388 			sc->sc_ka_bias = arc4random();
2389 			sc->sc_ka_holdmax = sc->sc_ka_count;
2390 
2391 			sc->sc_ka_recvtm = ticks - hz;
2392 			timeout_add(&sc->sc_ka_send, 1);
2393 			timeout_add_sec(&sc->sc_ka_hold,
2394 			    sc->sc_ka_timeo * sc->sc_ka_count);
2395 		}
2396 		break;
2397 
2398 	case SIOCGETKALIVE:
2399 		ikar->ikar_cnt = sc->sc_ka_count;
2400 		ikar->ikar_timeo = sc->sc_ka_timeo;
2401 		break;
2402 
2403 	case SIOCSLIFPHYTTL:
2404 		if (ifr->ifr_ttl != -1 &&
2405 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2406 			error = EINVAL;
2407 			break;
2408 		}
2409 
2410 		/* commit */
2411 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2412 		break;
2413 
2414 	case SIOCGLIFPHYTTL:
2415 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2416 		break;
2417 
2418 	case SIOCSLIFPHYECN:
2419 		sc->sc_tunnel.t_ecn =
2420 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2421 		break;
2422 	case SIOCGLIFPHYECN:
2423 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2424 		break;
2425 
2426 	case SIOCSTXHPRIO:
2427 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2428 		if (error != 0)
2429 			break;
2430 
2431 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2432 		break;
2433 	case SIOCGTXHPRIO:
2434 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2435 		break;
2436 
2437 	case SIOCSRXHPRIO:
2438 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2439 		if (error != 0)
2440 			break;
2441 
2442 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2443 		break;
2444 	case SIOCGRXHPRIO:
2445 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2446 		break;
2447 
2448 	default:
2449 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2450 		break;
2451 	}
2452 
2453 	return (error);
2454 }
2455 
2456 static int
2457 mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2458 {
2459 	struct mgre_softc *sc = ifp->if_softc;
2460 	struct ifreq *ifr = (struct ifreq *)data;
2461 	int error = 0;
2462 
2463 	switch(cmd) {
2464 	case SIOCSIFADDR:
2465 		break;
2466 	case SIOCSIFFLAGS:
2467 		if (ISSET(ifp->if_flags, IFF_UP)) {
2468 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2469 				error = mgre_up(sc);
2470 			else
2471 				error = 0;
2472 		} else {
2473 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2474 				error = mgre_down(sc);
2475 		}
2476 		break;
2477 
2478 	case SIOCSLIFPHYTTL:
2479 		if (ifr->ifr_ttl != -1 &&
2480 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2481 			error = EINVAL;
2482 			break;
2483 		}
2484 
2485 		/* commit */
2486 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2487 		break;
2488 
2489 	case SIOCGLIFPHYTTL:
2490 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2491 		break;
2492 
2493 	case SIOCSLIFPHYECN:
2494 		sc->sc_tunnel.t_ecn =
2495 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2496 		break;
2497 	case SIOCGLIFPHYECN:
2498 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2499 		break;
2500 
2501 	case SIOCSLIFPHYADDR:
2502 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2503 			error = EBUSY;
2504 			break;
2505 		}
2506 		error = mgre_set_tunnel(sc, (struct if_laddrreq *)data);
2507 		break;
2508 	case SIOCGLIFPHYADDR:
2509 		error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
2510 		break;
2511 
2512 	case SIOCSTXHPRIO:
2513 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2514 		if (error != 0)
2515 			break;
2516 
2517 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2518 		break;
2519 	case SIOCGTXHPRIO:
2520 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2521 		break;
2522 
2523 	case SIOCSRXHPRIO:
2524 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2525 		if (error != 0)
2526 			break;
2527 
2528 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2529 		break;
2530 	case SIOCGRXHPRIO:
2531 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2532 		break;
2533 
2534 	case SIOCSVNETID:
2535 	case SIOCDVNETID:
2536 	case SIOCDIFPHYADDR:
2537 	case SIOCSLIFPHYRTABLE:
2538 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2539 			error = EBUSY;
2540 			break;
2541 		}
2542 
2543 		/* FALLTHROUGH */
2544 	default:
2545 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2546 		break;
2547 	}
2548 
2549 	return (error);
2550 }
2551 
2552 static int
2553 mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2554 {
2555 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2556 	struct sockaddr *addr = (struct sockaddr *)&req->addr;
2557 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2558 	struct sockaddr_in *addr4;
2559 #ifdef INET6
2560 	struct sockaddr_in6 *addr6;
2561 	int error;
2562 #endif
2563 
2564 	if (dstaddr->sa_family != AF_UNSPEC)
2565 		return (EINVAL);
2566 
2567 	/* validate */
2568 	switch (addr->sa_family) {
2569 	case AF_INET:
2570 		if (addr->sa_len != sizeof(*addr4))
2571 			return (EINVAL);
2572 
2573 		addr4 = (struct sockaddr_in *)addr;
2574 		if (in_nullhost(addr4->sin_addr) ||
2575 		    IN_MULTICAST(addr4->sin_addr.s_addr))
2576 			return (EINVAL);
2577 
2578 		tunnel->t_src4 = addr4->sin_addr;
2579 		tunnel->t_dst4.s_addr = INADDR_ANY;
2580 
2581 		break;
2582 #ifdef INET6
2583 	case AF_INET6:
2584 		if (addr->sa_len != sizeof(*addr6))
2585 			return (EINVAL);
2586 
2587 		addr6 = (struct sockaddr_in6 *)addr;
2588 		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) ||
2589 		    IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr))
2590 			return (EINVAL);
2591 
2592 		error = in6_embedscope(&tunnel->t_src6, addr6, NULL);
2593 		if (error != 0)
2594 			return (error);
2595 
2596 		memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
2597 
2598 		break;
2599 #endif
2600 	default:
2601 		return (EAFNOSUPPORT);
2602 	}
2603 
2604 	/* commit */
2605 	tunnel->t_af = addr->sa_family;
2606 
2607 	return (0);
2608 }
2609 
2610 static int
2611 mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2612 {
2613 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2614 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2615 	struct sockaddr_in *sin;
2616 #ifdef INET6
2617 	struct sockaddr_in6 *sin6;
2618 #endif
2619 
2620 	switch (tunnel->t_af) {
2621 	case AF_UNSPEC:
2622 		return (EADDRNOTAVAIL);
2623 	case AF_INET:
2624 		sin = (struct sockaddr_in *)&req->addr;
2625 		memset(sin, 0, sizeof(*sin));
2626 		sin->sin_family = AF_INET;
2627 		sin->sin_len = sizeof(*sin);
2628 		sin->sin_addr = tunnel->t_src4;
2629 		break;
2630 
2631 #ifdef INET6
2632 	case AF_INET6:
2633 		sin6 = (struct sockaddr_in6 *)&req->addr;
2634 		memset(sin6, 0, sizeof(*sin6));
2635 		sin6->sin6_family = AF_INET6;
2636 		sin6->sin6_len = sizeof(*sin6);
2637 		in6_recoverscope(sin6, &tunnel->t_src6);
2638 		break;
2639 #endif
2640 	default:
2641 		unhandled_af(tunnel->t_af);
2642 	}
2643 
2644 	dstaddr->sa_len = 2;
2645 	dstaddr->sa_family = AF_UNSPEC;
2646 
2647 	return (0);
2648 }
2649 
2650 static int
2651 egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2652 {
2653 	struct egre_softc *sc = ifp->if_softc;
2654 	struct ifreq *ifr = (struct ifreq *)data;
2655 	int error = 0;
2656 
2657 	switch(cmd) {
2658 	case SIOCSIFADDR:
2659 		break;
2660 	case SIOCSIFFLAGS:
2661 		if (ISSET(ifp->if_flags, IFF_UP)) {
2662 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2663 				error = egre_up(sc);
2664 			else
2665 				error = 0;
2666 		} else {
2667 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2668 				error = egre_down(sc);
2669 		}
2670 		break;
2671 
2672 	case SIOCSLIFPHYTTL:
2673 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2674 			error = EINVAL;
2675 			break;
2676 		}
2677 
2678 		/* commit */
2679 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2680 		break;
2681 
2682 	case SIOCGLIFPHYTTL:
2683 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2684 		break;
2685 
2686 	case SIOCSTXHPRIO:
2687 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2688 		if (error != 0)
2689 			break;
2690 
2691 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2692 		break;
2693 	case SIOCGTXHPRIO:
2694 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2695 		break;
2696 
2697 	case SIOCSRXHPRIO:
2698 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2699 		if (error != 0)
2700 			break;
2701 
2702 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2703 		break;
2704 	case SIOCGRXHPRIO:
2705 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2706 		break;
2707 
2708 	case SIOCSVNETID:
2709 	case SIOCDVNETID:
2710 	case SIOCSVNETFLOWID:
2711 	case SIOCSLIFPHYADDR:
2712 	case SIOCDIFPHYADDR:
2713 	case SIOCSLIFPHYRTABLE:
2714 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2715 			error = EBUSY;
2716 			break;
2717 		}
2718 
2719 		/* FALLTHROUGH */
2720 	default:
2721 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2722 		if (error == ENOTTY)
2723 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2724 		break;
2725 	}
2726 
2727 	if (error == ENETRESET) {
2728 		/* no hardware to program */
2729 		error = 0;
2730 	}
2731 
2732 	return (error);
2733 }
2734 
2735 static int
2736 nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2737 {
2738 	struct nvgre_softc *sc = ifp->if_softc;
2739 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2740 
2741 	struct ifreq *ifr = (struct ifreq *)data;
2742 	struct if_parent *parent = (struct if_parent *)data;
2743 	struct ifbrparam *bparam = (struct ifbrparam *)data;
2744 	struct ifnet *ifp0;
2745 
2746 	int error = 0;
2747 
2748 	switch (cmd) {
2749 	case SIOCSIFADDR:
2750 		break;
2751 	case SIOCSIFFLAGS:
2752 		if (ISSET(ifp->if_flags, IFF_UP)) {
2753 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2754 				error = nvgre_up(sc);
2755 			else
2756 				error = ENETRESET;
2757 		} else {
2758 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2759 				error = nvgre_down(sc);
2760 		}
2761 		break;
2762 
2763 	case SIOCSLIFPHYADDR:
2764 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2765 			error = EBUSY;
2766 			break;
2767 		}
2768 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0);
2769 		if (error == 0)
2770 			nvgre_flush_map(sc);
2771 		break;
2772 	case SIOCGLIFPHYADDR:
2773 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2774 		break;
2775 	case SIOCDIFPHYADDR:
2776 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2777 			error = EBUSY;
2778 			break;
2779 		}
2780 		error = gre_del_tunnel(tunnel);
2781 		if (error == 0)
2782 			nvgre_flush_map(sc);
2783 		break;
2784 
2785 	case SIOCSIFPARENT:
2786 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2787 			error = EBUSY;
2788 			break;
2789 		}
2790 		error = nvgre_set_parent(sc, parent->ifp_parent);
2791 		if (error == 0)
2792 			nvgre_flush_map(sc);
2793 		break;
2794 	case SIOCGIFPARENT:
2795 		ifp0 = if_get(sc->sc_ifp0);
2796 		if (ifp0 == NULL)
2797 			error = EADDRNOTAVAIL;
2798 		else {
2799 			memcpy(parent->ifp_parent, ifp0->if_xname,
2800 			    sizeof(parent->ifp_parent));
2801 		}
2802 		if_put(ifp0);
2803 		break;
2804 	case SIOCDIFPARENT:
2805 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2806 			error = EBUSY;
2807 			break;
2808 		}
2809 		/* commit */
2810 		sc->sc_ifp0 = 0;
2811 		nvgre_flush_map(sc);
2812 		break;
2813 
2814 	case SIOCSVNETID:
2815 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2816 			error = EBUSY;
2817 			break;
2818 		}
2819 		if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN ||
2820 		    ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) {
2821 			error = EINVAL;
2822 			break;
2823 		}
2824 
2825 		/* commit */
2826 		tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT);
2827 		nvgre_flush_map(sc);
2828 		break;
2829 	case SIOCGVNETID:
2830 		error = gre_get_vnetid(tunnel, ifr);
2831 		break;
2832 
2833 	case SIOCSLIFPHYRTABLE:
2834 		if (ifr->ifr_rdomainid < 0 ||
2835 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2836 		    !rtable_exists(ifr->ifr_rdomainid)) {
2837 			error = EINVAL;
2838 			break;
2839 		}
2840 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2841 		nvgre_flush_map(sc);
2842 		break;
2843 	case SIOCGLIFPHYRTABLE:
2844 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2845 		break;
2846 
2847 	case SIOCSLIFPHYDF:
2848 		/* commit */
2849 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2850 		break;
2851 	case SIOCGLIFPHYDF:
2852 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2853 		break;
2854 
2855 	case SIOCSLIFPHYTTL:
2856 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2857 			error = EINVAL;
2858 			break;
2859 		}
2860 
2861 		/* commit */
2862 		tunnel->t_ttl = ifr->ifr_ttl;
2863 		break;
2864 
2865 	case SIOCGLIFPHYTTL:
2866 		ifr->ifr_ttl = tunnel->t_ttl;
2867 		break;
2868 
2869 	case SIOCSTXHPRIO:
2870 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2871 		if (error != 0)
2872 			break;
2873 
2874 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2875 		break;
2876 	case SIOCGTXHPRIO:
2877 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2878 		break;
2879 
2880 	case SIOCSRXHPRIO:
2881 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2882 		if (error != 0)
2883 			break;
2884 
2885 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2886 		break;
2887 	case SIOCGRXHPRIO:
2888 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2889 		break;
2890 
2891 	case SIOCBRDGSCACHE:
2892 		if (bparam->ifbrp_csize < 1) {
2893 			error = EINVAL;
2894 			break;
2895 		}
2896 
2897 		/* commit */
2898 		sc->sc_ether_max = bparam->ifbrp_csize;
2899 		break;
2900 	case SIOCBRDGGCACHE:
2901 		bparam->ifbrp_csize = sc->sc_ether_max;
2902 		break;
2903 
2904 	case SIOCBRDGSTO:
2905 		if (bparam->ifbrp_ctime < 0 ||
2906 		    bparam->ifbrp_ctime > INT_MAX / hz) {
2907 			error = EINVAL;
2908 			break;
2909 		}
2910 		sc->sc_ether_tmo = bparam->ifbrp_ctime * hz;
2911 		break;
2912 	case SIOCBRDGGTO:
2913 		bparam->ifbrp_ctime = sc->sc_ether_tmo / hz;
2914 		break;
2915 
2916 	case SIOCBRDGRTS:
2917 		error = nvgre_rtfind(sc, (struct ifbaconf *)data);
2918 		break;
2919 	case SIOCBRDGFLUSH:
2920 		nvgre_flush_map(sc);
2921 		break;
2922 
2923 	case SIOCADDMULTI:
2924 	case SIOCDELMULTI:
2925 		break;
2926 
2927 	default:
2928 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2929 		break;
2930 	}
2931 
2932 	if (error == ENETRESET) {
2933 		/* no hardware to program */
2934 		error = 0;
2935 	}
2936 
2937 	return (error);
2938 }
2939 
2940 static int
2941 eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2942 {
2943 	struct eoip_softc *sc = ifp->if_softc;
2944 	struct ifreq *ifr = (struct ifreq *)data;
2945 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2946 	int error = 0;
2947 
2948 	switch(cmd) {
2949 	case SIOCSIFADDR:
2950 		break;
2951 	case SIOCSIFFLAGS:
2952 		if (ISSET(ifp->if_flags, IFF_UP)) {
2953 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2954 				error = eoip_up(sc);
2955 			else
2956 				error = 0;
2957 		} else {
2958 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2959 				error = eoip_down(sc);
2960 		}
2961 		break;
2962 
2963 	case SIOCSETKALIVE:
2964 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2965 			error = EBUSY;
2966 			break;
2967 		}
2968 
2969 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2970 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256)
2971 			return (EINVAL);
2972 
2973 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2974 			sc->sc_ka_count = 0;
2975 			sc->sc_ka_timeo = 0;
2976 			sc->sc_ka_state = GRE_KA_NONE;
2977 		} else {
2978 			sc->sc_ka_count = ikar->ikar_cnt;
2979 			sc->sc_ka_timeo = ikar->ikar_timeo;
2980 			sc->sc_ka_state = GRE_KA_DOWN;
2981 		}
2982 		break;
2983 
2984 	case SIOCGETKALIVE:
2985 		ikar->ikar_cnt = sc->sc_ka_count;
2986 		ikar->ikar_timeo = sc->sc_ka_timeo;
2987 		break;
2988 
2989 	case SIOCSVNETID:
2990 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2991 			error = EBUSY;
2992 			break;
2993 		}
2994 		if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff)
2995 			return (EINVAL);
2996 
2997 		sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */
2998 		sc->sc_tunnel_id = htole16(ifr->ifr_vnetid);
2999 		break;
3000 
3001 	case SIOCGVNETID:
3002 		ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id);
3003 		break;
3004 
3005 	case SIOCSLIFPHYADDR:
3006 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3007 			error = EBUSY;
3008 			break;
3009 		}
3010 
3011 		error = gre_set_tunnel(&sc->sc_tunnel,
3012 		    (struct if_laddrreq *)data, 1);
3013 		break;
3014 	case SIOCGLIFPHYADDR:
3015 		error = gre_get_tunnel(&sc->sc_tunnel,
3016 		    (struct if_laddrreq *)data);
3017 		break;
3018 	case SIOCDIFPHYADDR:
3019 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3020 			error = EBUSY;
3021 			break;
3022 		}
3023 
3024 		error = gre_del_tunnel(&sc->sc_tunnel);
3025 		break;
3026 
3027 	case SIOCSLIFPHYRTABLE:
3028 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3029 			error = EBUSY;
3030 			break;
3031 		}
3032 
3033 		if (ifr->ifr_rdomainid < 0 ||
3034 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
3035 		    !rtable_exists(ifr->ifr_rdomainid)) {
3036 			error = EINVAL;
3037 			break;
3038 		}
3039 		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
3040 		break;
3041 	case SIOCGLIFPHYRTABLE:
3042 		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
3043 		break;
3044 
3045 	case SIOCSLIFPHYTTL:
3046 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
3047 			error = EINVAL;
3048 			break;
3049 		}
3050 
3051 		/* commit */
3052 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
3053 		break;
3054 	case SIOCGLIFPHYTTL:
3055 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
3056 		break;
3057 
3058 	case SIOCSLIFPHYDF:
3059 		/* commit */
3060 		sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
3061 		break;
3062 	case SIOCGLIFPHYDF:
3063 		ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
3064 		break;
3065 
3066 	case SIOCSTXHPRIO:
3067 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
3068 		if (error != 0)
3069 			break;
3070 
3071 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
3072 		break;
3073 	case SIOCGTXHPRIO:
3074 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
3075 		break;
3076 
3077 	case SIOCSRXHPRIO:
3078 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
3079 		if (error != 0)
3080 			break;
3081 
3082 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
3083 		break;
3084 	case SIOCGRXHPRIO:
3085 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
3086 		break;
3087 
3088 	case SIOCADDMULTI:
3089 	case SIOCDELMULTI:
3090 		break;
3091 
3092 	default:
3093 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
3094 		break;
3095 	}
3096 
3097 	if (error == ENETRESET) {
3098 		/* no hardware to program */
3099 		error = 0;
3100 	}
3101 
3102 	return (error);
3103 }
3104 
3105 static int
3106 gre_up(struct gre_softc *sc)
3107 {
3108 	NET_ASSERT_LOCKED();
3109 	SET(sc->sc_if.if_flags, IFF_RUNNING);
3110 
3111 	if (sc->sc_ka_state != GRE_KA_NONE)
3112 		gre_keepalive_send(sc);
3113 
3114 	return (0);
3115 }
3116 
3117 static int
3118 gre_down(struct gre_softc *sc)
3119 {
3120 	NET_ASSERT_LOCKED();
3121 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3122 
3123 	if (sc->sc_ka_state != GRE_KA_NONE) {
3124 		timeout_del_barrier(&sc->sc_ka_hold);
3125 		timeout_del_barrier(&sc->sc_ka_send);
3126 
3127 		sc->sc_ka_state = GRE_KA_DOWN;
3128 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
3129 	}
3130 
3131 	return (0);
3132 }
3133 
3134 static void
3135 gre_link_state(struct ifnet *ifp, unsigned int state)
3136 {
3137 	int link_state = LINK_STATE_UNKNOWN;
3138 
3139 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3140 		switch (state) {
3141 		case GRE_KA_NONE:
3142 			/* maybe up? or down? it's unknown, really */
3143 			break;
3144 		case GRE_KA_UP:
3145 			link_state = LINK_STATE_UP;
3146 			break;
3147 		default:
3148 			link_state = LINK_STATE_KALIVE_DOWN;
3149 			break;
3150 		}
3151 	}
3152 
3153 	if (ifp->if_link_state != link_state) {
3154 		ifp->if_link_state = link_state;
3155 		if_link_state_change(ifp);
3156 	}
3157 }
3158 
3159 static void
3160 gre_keepalive_send(void *arg)
3161 {
3162 	struct gre_tunnel t;
3163 	struct gre_softc *sc = arg;
3164 	struct mbuf *m;
3165 	struct gre_keepalive *gk;
3166 	SIPHASH_CTX ctx;
3167 	int linkhdr, len;
3168 	uint16_t proto;
3169 	uint8_t ttl;
3170 	uint8_t tos;
3171 
3172 	/*
3173 	 * re-schedule immediately, so we deal with incomplete configuation
3174 	 * or temporary errors.
3175 	 */
3176 	if (sc->sc_ka_timeo)
3177 		timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
3178 
3179 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
3180 	    sc->sc_ka_state == GRE_KA_NONE ||
3181 	    sc->sc_tunnel.t_af == AF_UNSPEC ||
3182 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
3183 		return;
3184 
3185 	/* this is really conservative */
3186 #ifdef INET6
3187 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
3188 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
3189 #else
3190 	linkhdr = max_linkhdr + sizeof(struct ip) +
3191 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
3192 #endif
3193 	len = linkhdr + sizeof(*gk);
3194 
3195 	MGETHDR(m, M_DONTWAIT, MT_DATA);
3196 	if (m == NULL)
3197 		return;
3198 
3199 	if (len > MHLEN) {
3200 		MCLGETI(m, M_DONTWAIT, NULL, len);
3201 		if (!ISSET(m->m_flags, M_EXT)) {
3202 			m_freem(m);
3203 			return;
3204 		}
3205 	}
3206 
3207 	m->m_pkthdr.len = m->m_len = len;
3208 	m_adj(m, linkhdr);
3209 
3210 	/*
3211 	 * build the inside packet
3212 	 */
3213 	gk = mtod(m, struct gre_keepalive *);
3214 	htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks);
3215 	htobem32(&gk->gk_random, arc4random());
3216 
3217 	SipHash24_Init(&ctx, &sc->sc_ka_key);
3218 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
3219 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
3220 	SipHash24_Final(gk->gk_digest, &ctx);
3221 
3222 	ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl;
3223 
3224 	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
3225 	tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio));
3226 
3227 	t.t_af = sc->sc_tunnel.t_af;
3228 	t.t_df = sc->sc_tunnel.t_df;
3229 	t.t_src = sc->sc_tunnel.t_dst;
3230 	t.t_dst = sc->sc_tunnel.t_src;
3231 	t.t_key = sc->sc_tunnel.t_key;
3232 	t.t_key_mask = sc->sc_tunnel.t_key_mask;
3233 
3234 	m = gre_encap(&t, m, htons(0), ttl, tos);
3235 	if (m == NULL)
3236 		return;
3237 
3238 	switch (sc->sc_tunnel.t_af) {
3239 	case AF_INET: {
3240 		struct ip *ip;
3241 
3242 		ip = mtod(m, struct ip *);
3243 		ip->ip_id = htons(ip_randomid());
3244 		ip->ip_sum = 0;
3245 		ip->ip_sum = in_cksum(m, sizeof(*ip));
3246 
3247 		proto = htons(ETHERTYPE_IP);
3248 		break;
3249 	}
3250 #ifdef INET6
3251 	case AF_INET6:
3252 		proto = htons(ETHERTYPE_IPV6);
3253 		break;
3254 #endif
3255 	default:
3256 		m_freem(m);
3257 		return;
3258 	}
3259 
3260 	/*
3261 	 * put it in the tunnel
3262 	 */
3263 	m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos);
3264 	if (m == NULL)
3265 		return;
3266 
3267 	gre_ip_output(&sc->sc_tunnel, m);
3268 }
3269 
3270 static void
3271 gre_keepalive_hold(void *arg)
3272 {
3273 	struct gre_softc *sc = arg;
3274 	struct ifnet *ifp = &sc->sc_if;
3275 
3276 	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
3277 	    sc->sc_ka_state == GRE_KA_NONE)
3278 		return;
3279 
3280 	NET_LOCK();
3281 	sc->sc_ka_state = GRE_KA_DOWN;
3282 	gre_link_state(ifp, sc->sc_ka_state);
3283 	NET_UNLOCK();
3284 }
3285 
3286 static int
3287 gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast)
3288 {
3289 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3290 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3291 	struct sockaddr_in *src4, *dst4;
3292 #ifdef INET6
3293 	struct sockaddr_in6 *src6, *dst6;
3294 	int error;
3295 #endif
3296 
3297 	/* sa_family and sa_len must be equal */
3298 	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
3299 		return (EINVAL);
3300 
3301 	/* validate */
3302 	switch (dst->sa_family) {
3303 	case AF_INET:
3304 		if (dst->sa_len != sizeof(*dst4))
3305 			return (EINVAL);
3306 
3307 		src4 = (struct sockaddr_in *)src;
3308 		if (in_nullhost(src4->sin_addr) ||
3309 		    IN_MULTICAST(src4->sin_addr.s_addr))
3310 			return (EINVAL);
3311 
3312 		dst4 = (struct sockaddr_in *)dst;
3313 		if (in_nullhost(dst4->sin_addr) ||
3314 		    (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast))
3315 			return (EINVAL);
3316 
3317 		tunnel->t_src4 = src4->sin_addr;
3318 		tunnel->t_dst4 = dst4->sin_addr;
3319 
3320 		break;
3321 #ifdef INET6
3322 	case AF_INET6:
3323 		if (dst->sa_len != sizeof(*dst6))
3324 			return (EINVAL);
3325 
3326 		src6 = (struct sockaddr_in6 *)src;
3327 		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
3328 		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
3329 			return (EINVAL);
3330 
3331 		dst6 = (struct sockaddr_in6 *)dst;
3332 		if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
3333 		    IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast)
3334 			return (EINVAL);
3335 
3336 		if (src6->sin6_scope_id != dst6->sin6_scope_id)
3337 			return (EINVAL);
3338 
3339 		error = in6_embedscope(&tunnel->t_src6, src6, NULL);
3340 		if (error != 0)
3341 			return (error);
3342 
3343 		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL);
3344 		if (error != 0)
3345 			return (error);
3346 
3347 		break;
3348 #endif
3349 	default:
3350 		return (EAFNOSUPPORT);
3351 	}
3352 
3353 	/* commit */
3354 	tunnel->t_af = dst->sa_family;
3355 
3356 	return (0);
3357 }
3358 
3359 static int
3360 gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
3361 {
3362 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3363 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3364 	struct sockaddr_in *sin;
3365 #ifdef INET6 /* ifconfig already embeds the scopeid */
3366 	struct sockaddr_in6 *sin6;
3367 #endif
3368 
3369 	switch (tunnel->t_af) {
3370 	case AF_UNSPEC:
3371 		return (EADDRNOTAVAIL);
3372 	case AF_INET:
3373 		sin = (struct sockaddr_in *)src;
3374 		memset(sin, 0, sizeof(*sin));
3375 		sin->sin_family = AF_INET;
3376 		sin->sin_len = sizeof(*sin);
3377 		sin->sin_addr = tunnel->t_src4;
3378 
3379 		sin = (struct sockaddr_in *)dst;
3380 		memset(sin, 0, sizeof(*sin));
3381 		sin->sin_family = AF_INET;
3382 		sin->sin_len = sizeof(*sin);
3383 		sin->sin_addr = tunnel->t_dst4;
3384 
3385 		break;
3386 
3387 #ifdef INET6
3388 	case AF_INET6:
3389 		sin6 = (struct sockaddr_in6 *)src;
3390 		memset(sin6, 0, sizeof(*sin6));
3391 		sin6->sin6_family = AF_INET6;
3392 		sin6->sin6_len = sizeof(*sin6);
3393 		in6_recoverscope(sin6, &tunnel->t_src6);
3394 
3395 		sin6 = (struct sockaddr_in6 *)dst;
3396 		memset(sin6, 0, sizeof(*sin6));
3397 		sin6->sin6_family = AF_INET6;
3398 		sin6->sin6_len = sizeof(*sin6);
3399 		in6_recoverscope(sin6, &tunnel->t_dst6);
3400 
3401 		break;
3402 #endif
3403 	default:
3404 		return (EAFNOSUPPORT);
3405 	}
3406 
3407 	return (0);
3408 }
3409 
3410 static int
3411 gre_del_tunnel(struct gre_tunnel *tunnel)
3412 {
3413 	/* commit */
3414 	tunnel->t_af = AF_UNSPEC;
3415 
3416 	return (0);
3417 }
3418 
3419 static int
3420 gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3421 {
3422 	uint32_t key;
3423 	uint32_t min = GRE_KEY_MIN;
3424 	uint32_t max = GRE_KEY_MAX;
3425 	unsigned int shift = GRE_KEY_SHIFT;
3426 	uint32_t mask = GRE_KEY_MASK;
3427 
3428 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
3429 		min = GRE_KEY_ENTROPY_MIN;
3430 		max = GRE_KEY_ENTROPY_MAX;
3431 		shift = GRE_KEY_ENTROPY_SHIFT;
3432 		mask = GRE_KEY_ENTROPY;
3433 	}
3434 
3435 	if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max)
3436 		return (EINVAL);
3437 
3438 	key = htonl(ifr->ifr_vnetid << shift);
3439 
3440 	/* commit */
3441 	tunnel->t_key_mask = mask;
3442 	tunnel->t_key = key;
3443 
3444 	return (0);
3445 }
3446 
3447 static int
3448 gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3449 {
3450 	int shift;
3451 
3452 	switch (tunnel->t_key_mask) {
3453 	case GRE_KEY_NONE:
3454 		return (EADDRNOTAVAIL);
3455 	case GRE_KEY_ENTROPY:
3456 		shift = GRE_KEY_ENTROPY_SHIFT;
3457 		break;
3458 	case GRE_KEY_MASK:
3459 		shift = GRE_KEY_SHIFT;
3460 		break;
3461 	}
3462 
3463 	ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift;
3464 
3465 	return (0);
3466 }
3467 
3468 static int
3469 gre_del_vnetid(struct gre_tunnel *tunnel)
3470 {
3471 	tunnel->t_key_mask = GRE_KEY_NONE;
3472 
3473 	return (0);
3474 }
3475 
3476 static int
3477 gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3478 {
3479 	uint32_t mask, key;
3480 
3481 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3482 		return (EADDRNOTAVAIL);
3483 
3484 	mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK;
3485 	if (tunnel->t_key_mask == mask) {
3486 		/* nop */
3487 		return (0);
3488 	}
3489 
3490 	key = ntohl(tunnel->t_key);
3491 	if (mask == GRE_KEY_ENTROPY) {
3492 		if (key > GRE_KEY_ENTROPY_MAX)
3493 			return (ERANGE);
3494 
3495 		key = htonl(key << GRE_KEY_ENTROPY_SHIFT);
3496 	} else
3497 		key = htonl(key >> GRE_KEY_ENTROPY_SHIFT);
3498 
3499 	/* commit */
3500 	tunnel->t_key_mask = mask;
3501 	tunnel->t_key = key;
3502 
3503 	return (0);
3504 }
3505 
3506 static int
3507 gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3508 {
3509 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3510 		return (EADDRNOTAVAIL);
3511 
3512 	ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY;
3513 
3514 	return (0);
3515 }
3516 
3517 static int
3518 mgre_up(struct mgre_softc *sc)
3519 {
3520 	unsigned int hlen;
3521 
3522 	switch (sc->sc_tunnel.t_af) {
3523 	case AF_UNSPEC:
3524 		return (EDESTADDRREQ);
3525 	case AF_INET:
3526 		hlen = sizeof(struct ip);
3527 		break;
3528 #ifdef INET6
3529 	case AF_INET6:
3530 		hlen = sizeof(struct ip6_hdr);
3531 		break;
3532 #endif /* INET6 */
3533 	default:
3534 		unhandled_af(sc->sc_tunnel.t_af);
3535 	}
3536 
3537 	hlen += sizeof(struct gre_header);
3538 	if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
3539 		hlen += sizeof(struct gre_h_key);
3540 
3541 	NET_ASSERT_LOCKED();
3542 
3543 	if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL)
3544 		return (EADDRINUSE);
3545 
3546 	sc->sc_if.if_hdrlen = hlen;
3547 	SET(sc->sc_if.if_flags, IFF_RUNNING);
3548 
3549 	return (0);
3550 }
3551 
3552 static int
3553 mgre_down(struct mgre_softc *sc)
3554 {
3555 	NET_ASSERT_LOCKED();
3556 
3557 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3558 	sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */
3559 
3560 	RBT_REMOVE(mgre_tree, &mgre_tree, sc);
3561 
3562 	/* barrier? */
3563 
3564 	return (0);
3565 }
3566 
3567 static int
3568 egre_up(struct egre_softc *sc)
3569 {
3570 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3571 		return (EDESTADDRREQ);
3572 
3573 	NET_ASSERT_LOCKED();
3574 
3575 	if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL)
3576 		return (EADDRINUSE);
3577 
3578 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3579 
3580 	return (0);
3581 }
3582 
3583 static int
3584 egre_down(struct egre_softc *sc)
3585 {
3586 	NET_ASSERT_LOCKED();
3587 
3588 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3589 
3590 	RBT_REMOVE(egre_tree, &egre_tree, sc);
3591 
3592 	/* barrier? */
3593 
3594 	return (0);
3595 }
3596 
3597 static int
3598 egre_media_change(struct ifnet *ifp)
3599 {
3600 	return (ENOTTY);
3601 }
3602 
3603 static void
3604 egre_media_status(struct ifnet *ifp, struct ifmediareq *imr)
3605 {
3606 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
3607 	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
3608 }
3609 
3610 static int
3611 nvgre_up(struct nvgre_softc *sc)
3612 {
3613 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3614 	struct ifnet *ifp0;
3615 	void *inm;
3616 	int error;
3617 
3618 	if (tunnel->t_af == AF_UNSPEC)
3619 		return (EDESTADDRREQ);
3620 
3621 	ifp0 = if_get(sc->sc_ifp0);
3622 	if (ifp0 == NULL)
3623 		return (ENXIO);
3624 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3625 		error = ENODEV;
3626 		goto put;
3627 	}
3628 
3629 	NET_ASSERT_LOCKED();
3630 
3631 	if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) {
3632 		error = EADDRINUSE;
3633 		goto put;
3634 	}
3635 	if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) {
3636 		error = EADDRINUSE;
3637 		goto remove_mcast;
3638 	}
3639 
3640 	switch (tunnel->t_af) {
3641 	case AF_INET:
3642 		inm = in_addmulti(&tunnel->t_dst4, ifp0);
3643 		if (inm == NULL) {
3644 			error = ECONNABORTED;
3645 			goto remove_ucast;
3646 		}
3647 		break;
3648 #ifdef INET6
3649 	case AF_INET6:
3650 		inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error);
3651 		if (inm == NULL) {
3652 			/* error is already set */
3653 			goto remove_ucast;
3654 		}
3655 		break;
3656 #endif /* INET6 */
3657 	default:
3658 		unhandled_af(tunnel->t_af);
3659 	}
3660 
3661 	if_linkstatehook_add(ifp0, &sc->sc_ltask);
3662 	if_detachhook_add(ifp0, &sc->sc_dtask);
3663 
3664 	if_put(ifp0);
3665 
3666 	sc->sc_inm = inm;
3667 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3668 
3669 	timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO);
3670 
3671 	return (0);
3672 
3673 remove_ucast:
3674 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3675 remove_mcast:
3676 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3677 put:
3678 	if_put(ifp0);
3679 	return (error);
3680 }
3681 
3682 static int
3683 nvgre_down(struct nvgre_softc *sc)
3684 {
3685 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3686 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3687 	struct taskq *softnet = net_tq(ifp->if_index);
3688 	struct ifnet *ifp0;
3689 
3690 	NET_ASSERT_LOCKED();
3691 
3692 	CLR(ifp->if_flags, IFF_RUNNING);
3693 
3694 	NET_UNLOCK();
3695 	timeout_del_barrier(&sc->sc_ether_age);
3696 	ifq_barrier(&ifp->if_snd);
3697 	if (!task_del(softnet, &sc->sc_send_task))
3698 		taskq_barrier(softnet);
3699 	NET_LOCK();
3700 
3701 	mq_purge(&sc->sc_send_list);
3702 
3703 	ifp0 = if_get(sc->sc_ifp0);
3704 	if (ifp0 != NULL) {
3705 		if_detachhook_del(ifp0, &sc->sc_dtask);
3706 		if_linkstatehook_del(ifp0, &sc->sc_ltask);
3707 	}
3708 	if_put(ifp0);
3709 
3710 	switch (tunnel->t_af) {
3711 	case AF_INET:
3712 		in_delmulti(sc->sc_inm);
3713 		break;
3714 
3715 #ifdef INET6
3716 	case AF_INET6:
3717 		in6_delmulti(sc->sc_inm);
3718 		break;
3719 #endif
3720 	default:
3721 		unhandled_af(tunnel->t_af);
3722 	}
3723 
3724 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3725 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3726 
3727 	return (0);
3728 }
3729 
3730 static void
3731 nvgre_link_change(void *arg)
3732 {
3733 	/* nop */
3734 }
3735 
3736 static void
3737 nvgre_detach(void *arg)
3738 {
3739 	struct nvgre_softc *sc = arg;
3740 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3741 
3742 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3743 		nvgre_down(sc);
3744 		if_down(ifp);
3745 	}
3746 
3747 	sc->sc_ifp0 = 0;
3748 }
3749 
3750 static int
3751 nvgre_set_parent(struct nvgre_softc *sc, const char *parent)
3752 {
3753 	struct ifnet *ifp0;
3754 
3755 	ifp0 = ifunit(parent); /* doesn't need an if_put */
3756 	if (ifp0 == NULL)
3757 		return (EINVAL);
3758 
3759 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST))
3760 		return (EPROTONOSUPPORT);
3761 
3762 	/* commit */
3763 	sc->sc_ifp0 = ifp0->if_index;
3764 
3765 	return (0);
3766 }
3767 
3768 static void
3769 nvgre_age(void *arg)
3770 {
3771 	struct nvgre_softc *sc = arg;
3772 	struct nvgre_entry *nv, *nnv;
3773 	int tmo = sc->sc_ether_tmo * 2;
3774 	int diff;
3775 
3776 	if (!ISSET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING))
3777 		return;
3778 
3779 	rw_enter_write(&sc->sc_ether_lock); /* XXX */
3780 	RBT_FOREACH_SAFE(nv, nvgre_map, &sc->sc_ether_map, nnv) {
3781 		if (nv->nv_type != NVGRE_ENTRY_DYNAMIC)
3782 			continue;
3783 
3784 		diff = ticks - nv->nv_age;
3785 		if (diff < tmo)
3786 			continue;
3787 
3788 		sc->sc_ether_num--;
3789 		RBT_REMOVE(nvgre_map, &sc->sc_ether_map, nv);
3790 		if (refcnt_rele(&nv->nv_refs))
3791 			pool_put(&nvgre_pool, nv);
3792 	}
3793 	rw_exit_write(&sc->sc_ether_lock);
3794 
3795 	timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO);
3796 }
3797 
3798 static inline int
3799 nvgre_entry_valid(struct nvgre_softc *sc, const struct nvgre_entry *nv)
3800 {
3801 	int diff;
3802 
3803 	if (nv == NULL)
3804 		return (0);
3805 
3806 	if (nv->nv_type == NVGRE_ENTRY_STATIC)
3807 		return (1);
3808 
3809 	diff = ticks - nv->nv_age;
3810 	if (diff < sc->sc_ether_tmo)
3811 		return (1);
3812 
3813 	return (0);
3814 }
3815 
3816 static void
3817 nvgre_start(struct ifnet *ifp)
3818 {
3819 	struct nvgre_softc *sc = ifp->if_softc;
3820 	const struct gre_tunnel *tunnel = &sc->sc_tunnel;
3821 	union gre_addr gateway;
3822 	struct nvgre_entry *nv, key;
3823 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
3824 	struct ether_header *eh;
3825 	struct mbuf *m, *m0;
3826 #if NBPFILTER > 0
3827 	caddr_t if_bpf;
3828 #endif
3829 
3830 	if (!gre_allow) {
3831 		ifq_purge(&ifp->if_snd);
3832 		return;
3833 	}
3834 
3835 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3836 #if NBPFILTER > 0
3837 		if_bpf = ifp->if_bpf;
3838 		if (if_bpf)
3839 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3840 #endif
3841 
3842 		eh = mtod(m0, struct ether_header *);
3843 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
3844 			gateway = tunnel->t_dst;
3845 		else {
3846 			memcpy(&key.nv_dst, eh->ether_dhost,
3847 			    sizeof(key.nv_dst));
3848 
3849 			rw_enter_read(&sc->sc_ether_lock);
3850 			nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &key);
3851 			if (nvgre_entry_valid(sc, nv))
3852 				gateway = nv->nv_gateway;
3853 			else {
3854 				/* "flood" to unknown hosts */
3855 				gateway = tunnel->t_dst;
3856 			}
3857 			rw_exit_read(&sc->sc_ether_lock);
3858 		}
3859 
3860 		/* force prepend mbuf because of alignment problems */
3861 		m = m_get(M_DONTWAIT, m0->m_type);
3862 		if (m == NULL) {
3863 			m_freem(m0);
3864 			continue;
3865 		}
3866 
3867 		M_MOVE_PKTHDR(m, m0);
3868 		m->m_next = m0;
3869 
3870 		m_align(m, 0);
3871 		m->m_len = 0;
3872 
3873 		m = gre_encap_dst(tunnel, &gateway, m,
3874 		    htons(ETHERTYPE_TRANSETHER),
3875 		    tunnel->t_ttl, gre_l2_tos(tunnel, m));
3876 		if (m == NULL)
3877 			continue;
3878 
3879 		m->m_flags &= ~(M_BCAST|M_MCAST);
3880 		m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
3881 
3882 #if NPF > 0
3883 		pf_pkt_addr_changed(m);
3884 #endif
3885 
3886 		ml_enqueue(&ml, m);
3887 	}
3888 
3889 	if (!ml_empty(&ml)) {
3890 		if (mq_enlist(&sc->sc_send_list, &ml) == 0)
3891 			task_add(net_tq(ifp->if_index), &sc->sc_send_task);
3892 		/* else set OACTIVE? */
3893 	}
3894 }
3895 
3896 static uint64_t
3897 nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml)
3898 {
3899 	struct ip_moptions imo;
3900 	struct mbuf *m;
3901 	uint64_t oerrors = 0;
3902 
3903 	imo.imo_ifidx = sc->sc_ifp0;
3904 	imo.imo_ttl = sc->sc_tunnel.t_ttl;
3905 	imo.imo_loop = 0;
3906 
3907 	NET_RLOCK();
3908 	while ((m = ml_dequeue(ml)) != NULL) {
3909 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
3910 			oerrors++;
3911 	}
3912 	NET_RUNLOCK();
3913 
3914 	return (oerrors);
3915 }
3916 
3917 #ifdef INET6
3918 static uint64_t
3919 nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml)
3920 {
3921 	struct ip6_moptions im6o;
3922 	struct mbuf *m;
3923 	uint64_t oerrors = 0;
3924 
3925 	im6o.im6o_ifidx = sc->sc_ifp0;
3926 	im6o.im6o_hlim = sc->sc_tunnel.t_ttl;
3927 	im6o.im6o_loop = 0;
3928 
3929 	NET_RLOCK();
3930 	while ((m = ml_dequeue(ml)) != NULL) {
3931 		if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
3932 			oerrors++;
3933 	}
3934 	NET_RUNLOCK();
3935 
3936 	return (oerrors);
3937 }
3938 #endif /* INET6 */
3939 
3940 static void
3941 nvgre_send(void *arg)
3942 {
3943 	struct nvgre_softc *sc = arg;
3944 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3945 	sa_family_t af = sc->sc_tunnel.t_af;
3946 	struct mbuf_list ml;
3947 	uint64_t oerrors;
3948 
3949 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3950 		return;
3951 
3952 	mq_delist(&sc->sc_send_list, &ml);
3953 	if (ml_empty(&ml))
3954 		return;
3955 
3956 	switch (af) {
3957 	case AF_INET:
3958 		oerrors = nvgre_send4(sc, &ml);
3959 		break;
3960 #ifdef INET6
3961 	case AF_INET6:
3962 		oerrors = nvgre_send6(sc, &ml);
3963 		break;
3964 #endif
3965 	default:
3966 		unhandled_af(af);
3967 		/* NOTREACHED */
3968 	}
3969 
3970 	ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */
3971 }
3972 
3973 static int
3974 eoip_up(struct eoip_softc *sc)
3975 {
3976 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3977 		return (EDESTADDRREQ);
3978 
3979 	NET_ASSERT_LOCKED();
3980 
3981 	if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL)
3982 		return (EADDRINUSE);
3983 
3984 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3985 
3986 	if (sc->sc_ka_state != GRE_KA_NONE) {
3987 		sc->sc_ka_holdmax = sc->sc_ka_count;
3988 		eoip_keepalive_send(sc);
3989 	}
3990 
3991 	return (0);
3992 }
3993 
3994 static int
3995 eoip_down(struct eoip_softc *sc)
3996 {
3997 	NET_ASSERT_LOCKED();
3998 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3999 
4000 	if (sc->sc_ka_state != GRE_KA_NONE) {
4001 		timeout_del_barrier(&sc->sc_ka_hold);
4002 		timeout_del_barrier(&sc->sc_ka_send);
4003 
4004 		sc->sc_ka_state = GRE_KA_DOWN;
4005 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
4006 	}
4007 
4008 	RBT_REMOVE(eoip_tree, &eoip_tree, sc);
4009 
4010 	return (0);
4011 }
4012 
4013 static void
4014 eoip_start(struct ifnet *ifp)
4015 {
4016 	struct eoip_softc *sc = ifp->if_softc;
4017 	struct mbuf *m0, *m;
4018 #if NBPFILTER > 0
4019 	caddr_t if_bpf;
4020 #endif
4021 
4022 	if (!gre_allow) {
4023 		ifq_purge(&ifp->if_snd);
4024 		return;
4025 	}
4026 
4027 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
4028 #if NBPFILTER > 0
4029 		if_bpf = ifp->if_bpf;
4030 		if (if_bpf)
4031 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
4032 #endif
4033 
4034 		/* force prepend mbuf because of alignment problems */
4035 		m = m_get(M_DONTWAIT, m0->m_type);
4036 		if (m == NULL) {
4037 			m_freem(m0);
4038 			continue;
4039 		}
4040 
4041 		M_MOVE_PKTHDR(m, m0);
4042 		m->m_next = m0;
4043 
4044 		m_align(m, 0);
4045 		m->m_len = 0;
4046 
4047 		m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
4048 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
4049 			ifp->if_oerrors++;
4050 			continue;
4051 		}
4052 	}
4053 }
4054 
4055 static struct mbuf *
4056 eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos)
4057 {
4058 	struct gre_header *gh;
4059 	struct gre_h_key_eoip *eoiph;
4060 	int len = m->m_pkthdr.len;
4061 
4062 	m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT);
4063 	if (m == NULL)
4064 		return (NULL);
4065 
4066 	gh = mtod(m, struct gre_header *);
4067 	gh->gre_flags = htons(GRE_VERS_1 | GRE_KP);
4068 	gh->gre_proto = htons(GRE_EOIP);
4069 
4070 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
4071 	htobem16(&eoiph->eoip_len, len);
4072 	eoiph->eoip_tunnel_id = sc->sc_tunnel_id;
4073 
4074 	return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
4075 }
4076 
4077 static void
4078 eoip_keepalive_send(void *arg)
4079 {
4080 	struct eoip_softc *sc = arg;
4081 	struct ifnet *ifp = &sc->sc_ac.ac_if;
4082 	struct mbuf *m;
4083 	int linkhdr;
4084 
4085 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
4086 		return;
4087 
4088 	/* this is really conservative */
4089 #ifdef INET6
4090 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
4091 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
4092 #else
4093 	linkhdr = max_linkhdr + sizeof(struct ip) +
4094 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
4095 #endif
4096 	MGETHDR(m, M_DONTWAIT, MT_DATA);
4097 	if (m == NULL)
4098 		return;
4099 
4100 	if (linkhdr > MHLEN) {
4101 		MCLGETI(m, M_DONTWAIT, NULL, linkhdr);
4102 		if (!ISSET(m->m_flags, M_EXT)) {
4103 			m_freem(m);
4104 			return;
4105 		}
4106 	}
4107 
4108 	m->m_pkthdr.pf.prio = ifp->if_llprio;
4109 	m->m_pkthdr.len = m->m_len = linkhdr;
4110 	m_adj(m, linkhdr);
4111 
4112 	m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
4113 	if (m == NULL)
4114 		return;
4115 
4116 	gre_ip_output(&sc->sc_tunnel, m);
4117 
4118 	timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
4119 }
4120 
4121 static void
4122 eoip_keepalive_hold(void *arg)
4123 {
4124 	struct eoip_softc *sc = arg;
4125 	struct ifnet *ifp = &sc->sc_ac.ac_if;
4126 
4127 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
4128 		return;
4129 
4130 	NET_LOCK();
4131 	sc->sc_ka_state = GRE_KA_DOWN;
4132 	gre_link_state(ifp, sc->sc_ka_state);
4133 	NET_UNLOCK();
4134 }
4135 
4136 static void
4137 eoip_keepalive_recv(struct eoip_softc *sc)
4138 {
4139 	switch (sc->sc_ka_state) {
4140 	case GRE_KA_NONE:
4141 		return;
4142 	case GRE_KA_DOWN:
4143 		sc->sc_ka_state = GRE_KA_HOLD;
4144 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
4145 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
4146 		    16 * sc->sc_ka_count);
4147 		break;
4148 	case GRE_KA_HOLD:
4149 		if (--sc->sc_ka_holdcnt > 0)
4150 			break;
4151 
4152 		sc->sc_ka_state = GRE_KA_UP;
4153 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
4154 		break;
4155 
4156 	case GRE_KA_UP:
4157 		sc->sc_ka_holdmax--;
4158 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
4159 		break;
4160 	}
4161 
4162 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
4163 }
4164 
4165 static struct mbuf *
4166 eoip_input(struct gre_tunnel *key, struct mbuf *m,
4167     const struct gre_header *gh, uint8_t otos, int iphlen)
4168 {
4169 	struct eoip_softc *sc;
4170 	struct gre_h_key_eoip *eoiph;
4171 	int hlen, len;
4172 	caddr_t buf;
4173 
4174 	if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1))
4175 		goto decline;
4176 
4177 	hlen = iphlen + sizeof(*gh) + sizeof(*eoiph);
4178 	if (m->m_pkthdr.len < hlen)
4179 		goto decline;
4180 
4181 	m = m_pullup(m, hlen);
4182 	if (m == NULL)
4183 		return (NULL);
4184 
4185 	buf = mtod(m, caddr_t);
4186 	gh = (struct gre_header *)(buf + iphlen);
4187 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
4188 
4189 	key->t_key = eoiph->eoip_tunnel_id;
4190 
4191 	NET_ASSERT_LOCKED();
4192 	sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key);
4193 	if (sc == NULL)
4194 		goto decline;
4195 
4196 	/* it's ours now */
4197 	len = bemtoh16(&eoiph->eoip_len);
4198 	if (len == 0) {
4199 		eoip_keepalive_recv(sc);
4200 		goto drop;
4201 	}
4202 
4203 	m = gre_ether_align(m, hlen);
4204 	if (m == NULL)
4205 		return (NULL);
4206 
4207 	if (m->m_pkthdr.len < len)
4208 		goto drop;
4209 	if (m->m_pkthdr.len != len)
4210 		m_adj(m, len - m->m_pkthdr.len);
4211 
4212 	gre_l2_prio(&sc->sc_tunnel, m, otos);
4213 
4214 	m->m_flags &= ~(M_MCAST|M_BCAST);
4215 
4216 #if NPF > 0
4217 	pf_pkt_addr_changed(m);
4218 #endif
4219 
4220 	if_vinput(&sc->sc_ac.ac_if, m);
4221 
4222 	return (NULL);
4223 
4224 decline:
4225 	return (m);
4226 drop:
4227 	m_freem(m);
4228 	return (NULL);
4229 }
4230 
4231 int
4232 gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
4233     size_t newlen)
4234 {
4235 	int error;
4236 
4237 	/* All sysctl names at this level are terminal. */
4238 	if (namelen != 1)
4239 		return (ENOTDIR);
4240 
4241 	switch (name[0]) {
4242 	case GRECTL_ALLOW:
4243 		NET_LOCK();
4244 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow);
4245 		NET_UNLOCK();
4246 		return (error);
4247 	case GRECTL_WCCP:
4248 		NET_LOCK();
4249 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp);
4250 		NET_UNLOCK();
4251 		return (error);
4252 	default:
4253 		return (ENOPROTOOPT);
4254 	}
4255 	/* NOTREACHED */
4256 }
4257 
4258 static inline int
4259 gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b)
4260 {
4261 	switch (af) {
4262 #ifdef INET6
4263 	case AF_INET6:
4264 		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
4265 #endif /* INET6 */
4266 	case AF_INET:
4267 		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
4268 	default:
4269 		unhandled_af(af);
4270 	}
4271 
4272 	return (0);
4273 }
4274 
4275 static int
4276 gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b)
4277 {
4278 	uint32_t ka, kb;
4279 	uint32_t mask;
4280 	int rv;
4281 
4282 	/* is K set at all? */
4283 	ka = a->t_key_mask & GRE_KEY_ENTROPY;
4284 	kb = b->t_key_mask & GRE_KEY_ENTROPY;
4285 
4286 	/* sort by whether K is set */
4287 	if (ka > kb)
4288 		return (1);
4289 	if (ka < kb)
4290 		return (-1);
4291 
4292 	/* is K set on both? */
4293 	if (ka != GRE_KEY_NONE) {
4294 		/* get common prefix */
4295 		mask = a->t_key_mask & b->t_key_mask;
4296 
4297 		ka = a->t_key & mask;
4298 		kb = b->t_key & mask;
4299 
4300 		/* sort by common prefix */
4301 		if (ka > kb)
4302 			return (1);
4303 		if (ka < kb)
4304 			return (-1);
4305 	}
4306 
4307 	/* sort by routing table */
4308 	if (a->t_rtableid > b->t_rtableid)
4309 		return (1);
4310 	if (a->t_rtableid < b->t_rtableid)
4311 		return (-1);
4312 
4313 	/* sort by address */
4314 	if (a->t_af > b->t_af)
4315 		return (1);
4316 	if (a->t_af < b->t_af)
4317 		return (-1);
4318 
4319 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4320 	if (rv != 0)
4321 		return (rv);
4322 
4323 	return (0);
4324 }
4325 
4326 static int
4327 gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
4328 {
4329 	int rv;
4330 
4331 	rv = gre_cmp_src(a, b);
4332 	if (rv != 0)
4333 		return (rv);
4334 
4335 	return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst));
4336 }
4337 
4338 static inline int
4339 mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b)
4340 {
4341 	return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel));
4342 }
4343 
4344 RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
4345 
4346 static inline int
4347 egre_cmp(const struct egre_softc *a, const struct egre_softc *b)
4348 {
4349 	return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel));
4350 }
4351 
4352 RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp);
4353 
4354 static inline int
4355 nvgre_entry_cmp(const struct nvgre_entry *a, const struct nvgre_entry *b)
4356 {
4357 	return (memcmp(&a->nv_dst, &b->nv_dst, sizeof(a->nv_dst)));
4358 }
4359 
4360 RBT_GENERATE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp);
4361 
4362 static int
4363 nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b)
4364 {
4365 	uint32_t ka, kb;
4366 
4367 	ka = a->t_key & GRE_KEY_ENTROPY;
4368 	kb = b->t_key & GRE_KEY_ENTROPY;
4369 
4370 	/* sort by common prefix */
4371 	if (ka > kb)
4372 		return (1);
4373 	if (ka < kb)
4374 		return (-1);
4375 
4376 	/* sort by routing table */
4377 	if (a->t_rtableid > b->t_rtableid)
4378 		return (1);
4379 	if (a->t_rtableid < b->t_rtableid)
4380 		return (-1);
4381 
4382 	/* sort by address */
4383 	if (a->t_af > b->t_af)
4384 		return (1);
4385 	if (a->t_af < b->t_af)
4386 		return (-1);
4387 
4388 	return (0);
4389 }
4390 
4391 static inline int
4392 nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4393 {
4394 	const struct gre_tunnel *a = &na->sc_tunnel;
4395 	const struct gre_tunnel *b = &nb->sc_tunnel;
4396 	int rv;
4397 
4398 	rv = nvgre_cmp_tunnel(a, b);
4399 	if (rv != 0)
4400 		return (rv);
4401 
4402 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4403 	if (rv != 0)
4404 		return (rv);
4405 
4406 	return (0);
4407 }
4408 
4409 static int
4410 nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa,
4411     unsigned int if0idxa, const struct gre_tunnel *b,
4412     const union gre_addr *ab,unsigned int if0idxb)
4413 {
4414 	int rv;
4415 
4416 	rv = nvgre_cmp_tunnel(a, b);
4417 	if (rv != 0)
4418 		return (rv);
4419 
4420 	rv = gre_ip_cmp(a->t_af, aa, ab);
4421 	if (rv != 0)
4422 		return (rv);
4423 
4424 	if (if0idxa > if0idxb)
4425 		return (1);
4426 	if (if0idxa < if0idxb)
4427 		return (-1);
4428 
4429 	return (0);
4430 }
4431 
4432 static inline int
4433 nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4434 {
4435 	const struct gre_tunnel *a = &na->sc_tunnel;
4436 	const struct gre_tunnel *b = &nb->sc_tunnel;
4437 
4438 	return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0,
4439 	    b, &b->t_dst, nb->sc_ifp0));
4440 }
4441 
4442 RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
4443 RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
4444 
4445 static inline int
4446 eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
4447 {
4448 	const struct gre_tunnel *a = &ea->sc_tunnel;
4449 	const struct gre_tunnel *b = &eb->sc_tunnel;
4450 	int rv;
4451 
4452 	if (a->t_key > b->t_key)
4453 		return (1);
4454 	if (a->t_key < b->t_key)
4455 		return (-1);
4456 
4457 	/* sort by routing table */
4458 	if (a->t_rtableid > b->t_rtableid)
4459 		return (1);
4460 	if (a->t_rtableid < b->t_rtableid)
4461 		return (-1);
4462 
4463 	/* sort by address */
4464 	if (a->t_af > b->t_af)
4465 		return (1);
4466 	if (a->t_af < b->t_af)
4467 		return (-1);
4468 
4469 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4470 	if (rv != 0)
4471 		return (rv);
4472 
4473 	rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
4474 	if (rv != 0)
4475 		return (rv);
4476 
4477 	return (0);
4478 }
4479 
4480 RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
4481