1 /* $OpenBSD: if_gre.c,v 1.160 2020/08/28 12:01:48 mvs Exp $ */ 2 /* $NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */ 3 4 /* 5 * Copyright (c) 1998 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Heiko W.Rupp <hwr@pilhuhn.de> 10 * 11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de> 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Encapsulate L3 protocols into IP, per RFC 1701 and 1702. 37 * See gre(4) for more details. 38 * Also supported: IP in IP encapsulation (proto 55) per RFC 2004. 39 */ 40 41 #include "bpfilter.h" 42 #include "pf.h" 43 44 #include <sys/param.h> 45 #include <sys/mbuf.h> 46 #include <sys/socket.h> 47 #include <sys/sockio.h> 48 #include <sys/kernel.h> 49 #include <sys/systm.h> 50 #include <sys/errno.h> 51 #include <sys/timeout.h> 52 #include <sys/queue.h> 53 #include <sys/tree.h> 54 #include <sys/pool.h> 55 #include <sys/rwlock.h> 56 57 #include <crypto/siphash.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/if_media.h> 63 #include <net/route.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_var.h> 67 #include <netinet/if_ether.h> 68 #include <netinet/ip.h> 69 #include <netinet/ip_var.h> 70 #include <netinet/ip_ecn.h> 71 72 #ifdef INET6 73 #include <netinet/ip6.h> 74 #include <netinet6/ip6_var.h> 75 #include <netinet6/in6_var.h> 76 #endif 77 78 #ifdef PIPEX 79 #include <net/pipex.h> 80 #endif 81 82 #ifdef MPLS 83 #include <netmpls/mpls.h> 84 #endif /* MPLS */ 85 86 #if NBPFILTER > 0 87 #include <net/bpf.h> 88 #endif 89 90 #if NPF > 0 91 #include <net/pfvar.h> 92 #endif 93 94 #include <net/if_gre.h> 95 96 #include <netinet/ip_gre.h> 97 #include <sys/sysctl.h> 98 99 /* for nvgre bridge shizz */ 100 #include <sys/socket.h> 101 #include <net/if_bridge.h> 102 103 /* 104 * packet formats 105 */ 106 struct gre_header { 107 uint16_t gre_flags; 108 #define GRE_CP 0x8000 /* Checksum Present */ 109 #define GRE_KP 0x2000 /* Key Present */ 110 #define GRE_SP 0x1000 /* Sequence Present */ 111 112 #define GRE_VERS_MASK 0x0007 113 #define GRE_VERS_0 0x0000 114 #define GRE_VERS_1 0x0001 115 116 uint16_t gre_proto; 117 } __packed __aligned(4); 118 119 struct gre_h_cksum { 120 uint16_t gre_cksum; 121 uint16_t gre_reserved1; 122 } __packed __aligned(4); 123 124 struct gre_h_key { 125 uint32_t gre_key; 126 } __packed __aligned(4); 127 128 #define GRE_EOIP 0x6400 129 130 struct gre_h_key_eoip { 131 uint16_t eoip_len; /* network order */ 132 uint16_t eoip_tunnel_id; /* little endian */ 133 } __packed __aligned(4); 134 135 #define NVGRE_VSID_RES_MIN 0x000000 /* reserved for future use */ 136 #define NVGRE_VSID_RES_MAX 0x000fff 137 #define NVGRE_VSID_NVE2NVE 0xffffff /* vendor specific NVE-to-NVE comms */ 138 139 struct gre_h_seq { 140 uint32_t gre_seq; 141 } __packed __aligned(4); 142 143 struct gre_h_wccp { 144 uint8_t wccp_flags; 145 uint8_t service_id; 146 uint8_t alt_bucket; 147 uint8_t pri_bucket; 148 } __packed __aligned(4); 149 150 #define GRE_WCCP 0x883e 151 152 #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header)) 153 154 /* 155 * GRE tunnel metadata 156 */ 157 158 #define GRE_KA_NONE 0 159 #define GRE_KA_DOWN 1 160 #define GRE_KA_HOLD 2 161 #define GRE_KA_UP 3 162 163 union gre_addr { 164 struct in_addr in4; 165 struct in6_addr in6; 166 }; 167 168 static inline int 169 gre_ip_cmp(int, const union gre_addr *, 170 const union gre_addr *); 171 172 #define GRE_KEY_MIN 0x00000000U 173 #define GRE_KEY_MAX 0xffffffffU 174 #define GRE_KEY_SHIFT 0 175 176 #define GRE_KEY_ENTROPY_MIN 0x00000000U 177 #define GRE_KEY_ENTROPY_MAX 0x00ffffffU 178 #define GRE_KEY_ENTROPY_SHIFT 8 179 180 struct gre_tunnel { 181 uint32_t t_key_mask; 182 #define GRE_KEY_NONE htonl(0x00000000U) 183 #define GRE_KEY_ENTROPY htonl(0xffffff00U) 184 #define GRE_KEY_MASK htonl(0xffffffffU) 185 uint32_t t_key; 186 187 u_int t_rtableid; 188 union gre_addr t_src; 189 #define t_src4 t_src.in4 190 #define t_src6 t_src.in6 191 union gre_addr t_dst; 192 #define t_dst4 t_dst.in4 193 #define t_dst6 t_dst.in6 194 int t_ttl; 195 int t_txhprio; 196 int t_rxhprio; 197 int t_ecn; 198 uint16_t t_df; 199 sa_family_t t_af; 200 }; 201 202 static int 203 gre_cmp_src(const struct gre_tunnel *, 204 const struct gre_tunnel *); 205 static int 206 gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *); 207 208 static int gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int); 209 static int gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *); 210 static int gre_del_tunnel(struct gre_tunnel *); 211 212 static int gre_set_vnetid(struct gre_tunnel *, struct ifreq *); 213 static int gre_get_vnetid(struct gre_tunnel *, struct ifreq *); 214 static int gre_del_vnetid(struct gre_tunnel *); 215 216 static int gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *); 217 static int gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *); 218 219 static struct mbuf * 220 gre_encap_dst(const struct gre_tunnel *, const union gre_addr *, 221 struct mbuf *, uint16_t, uint8_t, uint8_t); 222 #define gre_encap(_t, _m, _p, _ttl, _tos) \ 223 gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos)) 224 225 static struct mbuf * 226 gre_encap_dst_ip(const struct gre_tunnel *, 227 const union gre_addr *, struct mbuf *, uint8_t, uint8_t); 228 #define gre_encap_ip(_t, _m, _ttl, _tos) \ 229 gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos)) 230 231 static int 232 gre_ip_output(const struct gre_tunnel *, struct mbuf *); 233 234 static int gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *, 235 u_long, void *); 236 237 static uint8_t gre_l2_tos(const struct gre_tunnel *, const struct mbuf *); 238 static uint8_t gre_l3_tos(const struct gre_tunnel *, 239 const struct mbuf *, uint8_t); 240 241 /* 242 * layer 3 GRE tunnels 243 */ 244 245 struct gre_softc { 246 struct gre_tunnel sc_tunnel; /* must be first */ 247 TAILQ_ENTRY(gre_softc) sc_entry; 248 249 struct ifnet sc_if; 250 251 struct timeout sc_ka_send; 252 struct timeout sc_ka_hold; 253 254 unsigned int sc_ka_state; 255 unsigned int sc_ka_timeo; 256 unsigned int sc_ka_count; 257 258 unsigned int sc_ka_holdmax; 259 unsigned int sc_ka_holdcnt; 260 261 SIPHASH_KEY sc_ka_key; 262 uint32_t sc_ka_bias; 263 int sc_ka_recvtm; 264 }; 265 266 TAILQ_HEAD(gre_list, gre_softc); 267 268 struct gre_keepalive { 269 uint32_t gk_uptime; 270 uint32_t gk_random; 271 uint8_t gk_digest[SIPHASH_DIGEST_LENGTH]; 272 } __packed __aligned(4); 273 274 static int gre_clone_create(struct if_clone *, int); 275 static int gre_clone_destroy(struct ifnet *); 276 277 struct if_clone gre_cloner = 278 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy); 279 280 /* protected by NET_LOCK */ 281 struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list); 282 283 static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *, 284 struct rtentry *); 285 static void gre_start(struct ifnet *); 286 static int gre_ioctl(struct ifnet *, u_long, caddr_t); 287 288 static int gre_up(struct gre_softc *); 289 static int gre_down(struct gre_softc *); 290 static void gre_link_state(struct ifnet *, unsigned int); 291 292 static int gre_input_key(struct mbuf **, int *, int, int, uint8_t, 293 struct gre_tunnel *); 294 295 static struct mbuf * 296 gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *, 297 uint8_t *, uint8_t); 298 #ifdef INET6 299 static struct mbuf * 300 gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *, 301 uint8_t *, uint8_t); 302 #endif 303 #ifdef MPLS 304 static struct mbuf * 305 gre_mpls_patch(const struct gre_tunnel *, struct mbuf *, 306 uint8_t *, uint8_t); 307 #endif 308 static void gre_keepalive_send(void *); 309 static void gre_keepalive_recv(struct ifnet *ifp, struct mbuf *); 310 static void gre_keepalive_hold(void *); 311 312 static struct mbuf * 313 gre_l3_encap_dst(const struct gre_tunnel *, const void *, 314 struct mbuf *m, sa_family_t); 315 316 #define gre_l3_encap(_t, _m, _af) \ 317 gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af)) 318 319 struct mgre_softc { 320 struct gre_tunnel sc_tunnel; /* must be first */ 321 RBT_ENTRY(mgre_softc) sc_entry; 322 323 struct ifnet sc_if; 324 }; 325 326 RBT_HEAD(mgre_tree, mgre_softc); 327 328 static inline int 329 mgre_cmp(const struct mgre_softc *, const struct mgre_softc *); 330 331 RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp); 332 333 static int mgre_clone_create(struct if_clone *, int); 334 static int mgre_clone_destroy(struct ifnet *); 335 336 struct if_clone mgre_cloner = 337 IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy); 338 339 static void mgre_rtrequest(struct ifnet *, int, struct rtentry *); 340 static int mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *, 341 struct rtentry *); 342 static void mgre_start(struct ifnet *); 343 static int mgre_ioctl(struct ifnet *, u_long, caddr_t); 344 345 static int mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *); 346 static int mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *); 347 static int mgre_up(struct mgre_softc *); 348 static int mgre_down(struct mgre_softc *); 349 350 /* protected by NET_LOCK */ 351 struct mgre_tree mgre_tree = RBT_INITIALIZER(); 352 353 /* 354 * Ethernet GRE tunnels 355 */ 356 357 static struct mbuf * 358 gre_ether_align(struct mbuf *, int); 359 360 struct egre_softc { 361 struct gre_tunnel sc_tunnel; /* must be first */ 362 RBT_ENTRY(egre_softc) sc_entry; 363 364 struct arpcom sc_ac; 365 struct ifmedia sc_media; 366 }; 367 368 RBT_HEAD(egre_tree, egre_softc); 369 370 static inline int 371 egre_cmp(const struct egre_softc *, const struct egre_softc *); 372 373 RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp); 374 375 static int egre_clone_create(struct if_clone *, int); 376 static int egre_clone_destroy(struct ifnet *); 377 378 static void egre_start(struct ifnet *); 379 static int egre_ioctl(struct ifnet *, u_long, caddr_t); 380 static int egre_media_change(struct ifnet *); 381 static void egre_media_status(struct ifnet *, struct ifmediareq *); 382 383 static int egre_up(struct egre_softc *); 384 static int egre_down(struct egre_softc *); 385 386 static int egre_input(const struct gre_tunnel *, struct mbuf *, int, 387 uint8_t); 388 struct if_clone egre_cloner = 389 IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy); 390 391 /* protected by NET_LOCK */ 392 struct egre_tree egre_tree = RBT_INITIALIZER(); 393 394 /* 395 * Network Virtualisation Using Generic Routing Encapsulation (NVGRE) 396 */ 397 398 #define NVGRE_AGE_TMO 100 /* seconds */ 399 400 struct nvgre_entry { 401 RB_ENTRY(nvgre_entry) nv_entry; 402 struct ether_addr nv_dst; 403 uint8_t nv_type; 404 #define NVGRE_ENTRY_DYNAMIC 0 405 #define NVGRE_ENTRY_STATIC 1 406 union gre_addr nv_gateway; 407 struct refcnt nv_refs; 408 int nv_age; 409 }; 410 411 RBT_HEAD(nvgre_map, nvgre_entry); 412 413 static inline int 414 nvgre_entry_cmp(const struct nvgre_entry *, 415 const struct nvgre_entry *); 416 417 RBT_PROTOTYPE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp); 418 419 struct nvgre_softc { 420 struct gre_tunnel sc_tunnel; /* must be first */ 421 unsigned int sc_ifp0; 422 RBT_ENTRY(nvgre_softc) sc_uentry; 423 RBT_ENTRY(nvgre_softc) sc_mentry; 424 425 struct arpcom sc_ac; 426 struct ifmedia sc_media; 427 428 struct mbuf_queue sc_send_list; 429 struct task sc_send_task; 430 431 void *sc_inm; 432 struct task sc_ltask; 433 struct task sc_dtask; 434 435 struct rwlock sc_ether_lock; 436 struct nvgre_map sc_ether_map; 437 unsigned int sc_ether_num; 438 unsigned int sc_ether_max; 439 int sc_ether_tmo; 440 struct timeout sc_ether_age; 441 }; 442 443 RBT_HEAD(nvgre_ucast_tree, nvgre_softc); 444 RBT_HEAD(nvgre_mcast_tree, nvgre_softc); 445 446 static inline int 447 nvgre_cmp_ucast(const struct nvgre_softc *, 448 const struct nvgre_softc *); 449 static int 450 nvgre_cmp_mcast(const struct gre_tunnel *, 451 const union gre_addr *, unsigned int, 452 const struct gre_tunnel *, const union gre_addr *, 453 unsigned int); 454 static inline int 455 nvgre_cmp_mcast_sc(const struct nvgre_softc *, 456 const struct nvgre_softc *); 457 458 RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast); 459 RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc); 460 461 static int nvgre_clone_create(struct if_clone *, int); 462 static int nvgre_clone_destroy(struct ifnet *); 463 464 static void nvgre_start(struct ifnet *); 465 static int nvgre_ioctl(struct ifnet *, u_long, caddr_t); 466 467 static int nvgre_up(struct nvgre_softc *); 468 static int nvgre_down(struct nvgre_softc *); 469 static int nvgre_set_parent(struct nvgre_softc *, const char *); 470 static void nvgre_link_change(void *); 471 static void nvgre_detach(void *); 472 473 static int nvgre_input(const struct gre_tunnel *, struct mbuf *, int, 474 uint8_t); 475 static void nvgre_send(void *); 476 477 static int nvgre_rtfind(struct nvgre_softc *, struct ifbaconf *); 478 static void nvgre_flush_map(struct nvgre_softc *); 479 static void nvgre_input_map(struct nvgre_softc *, 480 const struct gre_tunnel *, const struct ether_header *); 481 static void nvgre_age(void *); 482 483 struct if_clone nvgre_cloner = 484 IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy); 485 486 struct pool nvgre_pool; 487 488 /* protected by NET_LOCK */ 489 struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER(); 490 struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER(); 491 492 /* 493 * MikroTik Ethernet over IP protocol (eoip) 494 */ 495 496 struct eoip_softc { 497 struct gre_tunnel sc_tunnel; /* must be first */ 498 uint16_t sc_tunnel_id; 499 RBT_ENTRY(eoip_softc) sc_entry; 500 501 struct arpcom sc_ac; 502 struct ifmedia sc_media; 503 504 struct timeout sc_ka_send; 505 struct timeout sc_ka_hold; 506 507 unsigned int sc_ka_state; 508 unsigned int sc_ka_timeo; 509 unsigned int sc_ka_count; 510 511 unsigned int sc_ka_holdmax; 512 unsigned int sc_ka_holdcnt; 513 }; 514 515 RBT_HEAD(eoip_tree, eoip_softc); 516 517 static inline int 518 eoip_cmp(const struct eoip_softc *, const struct eoip_softc *); 519 520 RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp); 521 522 static int eoip_clone_create(struct if_clone *, int); 523 static int eoip_clone_destroy(struct ifnet *); 524 525 static void eoip_start(struct ifnet *); 526 static int eoip_ioctl(struct ifnet *, u_long, caddr_t); 527 528 static void eoip_keepalive_send(void *); 529 static void eoip_keepalive_recv(struct eoip_softc *); 530 static void eoip_keepalive_hold(void *); 531 532 static int eoip_up(struct eoip_softc *); 533 static int eoip_down(struct eoip_softc *); 534 535 static struct mbuf * 536 eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t); 537 538 static struct mbuf * 539 eoip_input(struct gre_tunnel *, struct mbuf *, 540 const struct gre_header *, uint8_t, int); 541 struct if_clone eoip_cloner = 542 IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy); 543 544 /* protected by NET_LOCK */ 545 struct eoip_tree eoip_tree = RBT_INITIALIZER(); 546 547 /* 548 * It is not easy to calculate the right value for a GRE MTU. 549 * We leave this task to the admin and use the same default that 550 * other vendors use. 551 */ 552 #define GREMTU 1476 553 554 /* 555 * We can control the acceptance of GRE and MobileIP packets by 556 * altering the sysctl net.inet.gre.allow values 557 * respectively. Zero means drop them, all else is acceptance. We can also 558 * control acceptance of WCCPv1-style GRE packets through the 559 * net.inet.gre.wccp value, but be aware it depends upon normal GRE being 560 * allowed as well. 561 * 562 */ 563 int gre_allow = 0; 564 int gre_wccp = 0; 565 566 void 567 greattach(int n) 568 { 569 if_clone_attach(&gre_cloner); 570 if_clone_attach(&mgre_cloner); 571 if_clone_attach(&egre_cloner); 572 if_clone_attach(&nvgre_cloner); 573 if_clone_attach(&eoip_cloner); 574 } 575 576 static int 577 gre_clone_create(struct if_clone *ifc, int unit) 578 { 579 struct gre_softc *sc; 580 struct ifnet *ifp; 581 582 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 583 snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d", 584 ifc->ifc_name, unit); 585 586 ifp = &sc->sc_if; 587 ifp->if_softc = sc; 588 ifp->if_type = IFT_TUNNEL; 589 ifp->if_hdrlen = GRE_HDRLEN; 590 ifp->if_mtu = GREMTU; 591 ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 592 ifp->if_xflags = IFXF_CLONED; 593 ifp->if_output = gre_output; 594 ifp->if_start = gre_start; 595 ifp->if_ioctl = gre_ioctl; 596 ifp->if_rtrequest = p2p_rtrequest; 597 598 sc->sc_tunnel.t_ttl = ip_defttl; 599 sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD; 600 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET; 601 sc->sc_tunnel.t_df = htons(0); 602 sc->sc_tunnel.t_ecn = ECN_ALLOWED; 603 604 timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc); 605 timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc); 606 sc->sc_ka_state = GRE_KA_NONE; 607 608 if_counters_alloc(ifp); 609 if_attach(ifp); 610 if_alloc_sadl(ifp); 611 612 #if NBPFILTER > 0 613 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t)); 614 #endif 615 616 ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL); 617 618 NET_LOCK(); 619 TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry); 620 NET_UNLOCK(); 621 622 return (0); 623 } 624 625 static int 626 gre_clone_destroy(struct ifnet *ifp) 627 { 628 struct gre_softc *sc = ifp->if_softc; 629 630 NET_LOCK(); 631 if (ISSET(ifp->if_flags, IFF_RUNNING)) 632 gre_down(sc); 633 634 TAILQ_REMOVE(&gre_list, sc, sc_entry); 635 NET_UNLOCK(); 636 637 if_detach(ifp); 638 639 free(sc, M_DEVBUF, sizeof(*sc)); 640 641 return (0); 642 } 643 644 static int 645 mgre_clone_create(struct if_clone *ifc, int unit) 646 { 647 struct mgre_softc *sc; 648 struct ifnet *ifp; 649 650 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 651 ifp = &sc->sc_if; 652 653 snprintf(ifp->if_xname, sizeof(ifp->if_xname), 654 "%s%d", ifc->ifc_name, unit); 655 656 ifp->if_softc = sc; 657 ifp->if_type = IFT_L3IPVLAN; 658 ifp->if_hdrlen = GRE_HDRLEN; 659 ifp->if_mtu = GREMTU; 660 ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX; 661 ifp->if_xflags = IFXF_CLONED; 662 ifp->if_rtrequest = mgre_rtrequest; 663 ifp->if_output = mgre_output; 664 ifp->if_start = mgre_start; 665 ifp->if_ioctl = mgre_ioctl; 666 667 sc->sc_tunnel.t_ttl = ip_defttl; 668 sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD; 669 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET; 670 sc->sc_tunnel.t_df = htons(0); 671 sc->sc_tunnel.t_ecn = ECN_ALLOWED; 672 673 if_counters_alloc(ifp); 674 if_attach(ifp); 675 if_alloc_sadl(ifp); 676 677 #if NBPFILTER > 0 678 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t)); 679 #endif 680 681 return (0); 682 } 683 684 static int 685 mgre_clone_destroy(struct ifnet *ifp) 686 { 687 struct mgre_softc *sc = ifp->if_softc; 688 689 NET_LOCK(); 690 if (ISSET(ifp->if_flags, IFF_RUNNING)) 691 mgre_down(sc); 692 NET_UNLOCK(); 693 694 if_detach(ifp); 695 696 free(sc, M_DEVBUF, sizeof(*sc)); 697 698 return (0); 699 } 700 701 static int 702 egre_clone_create(struct if_clone *ifc, int unit) 703 { 704 struct egre_softc *sc; 705 struct ifnet *ifp; 706 707 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 708 ifp = &sc->sc_ac.ac_if; 709 710 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 711 ifc->ifc_name, unit); 712 713 ifp->if_softc = sc; 714 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 715 ifp->if_ioctl = egre_ioctl; 716 ifp->if_start = egre_start; 717 ifp->if_xflags = IFXF_CLONED; 718 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 719 ether_fakeaddr(ifp); 720 721 sc->sc_tunnel.t_ttl = ip_defttl; 722 sc->sc_tunnel.t_txhprio = 0; 723 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET; 724 sc->sc_tunnel.t_df = htons(0); 725 726 ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status); 727 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 728 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 729 730 if_counters_alloc(ifp); 731 if_attach(ifp); 732 ether_ifattach(ifp); 733 734 return (0); 735 } 736 737 static int 738 egre_clone_destroy(struct ifnet *ifp) 739 { 740 struct egre_softc *sc = ifp->if_softc; 741 742 NET_LOCK(); 743 if (ISSET(ifp->if_flags, IFF_RUNNING)) 744 egre_down(sc); 745 NET_UNLOCK(); 746 747 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); 748 ether_ifdetach(ifp); 749 if_detach(ifp); 750 751 free(sc, M_DEVBUF, sizeof(*sc)); 752 753 return (0); 754 } 755 756 static int 757 nvgre_clone_create(struct if_clone *ifc, int unit) 758 { 759 struct nvgre_softc *sc; 760 struct ifnet *ifp; 761 struct gre_tunnel *tunnel; 762 763 if (nvgre_pool.pr_size == 0) { 764 pool_init(&nvgre_pool, sizeof(struct nvgre_entry), 0, 765 IPL_SOFTNET, 0, "nvgren", NULL); 766 } 767 768 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 769 ifp = &sc->sc_ac.ac_if; 770 771 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 772 ifc->ifc_name, unit); 773 774 ifp->if_softc = sc; 775 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 776 ifp->if_ioctl = nvgre_ioctl; 777 ifp->if_start = nvgre_start; 778 ifp->if_xflags = IFXF_CLONED; 779 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 780 ether_fakeaddr(ifp); 781 782 tunnel = &sc->sc_tunnel; 783 tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL; 784 tunnel->t_txhprio = 0; 785 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET; 786 tunnel->t_df = htons(IP_DF); 787 tunnel->t_key_mask = GRE_KEY_ENTROPY; 788 tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) << 789 GRE_KEY_ENTROPY_SHIFT); 790 791 mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET); 792 task_set(&sc->sc_send_task, nvgre_send, sc); 793 task_set(&sc->sc_ltask, nvgre_link_change, sc); 794 task_set(&sc->sc_dtask, nvgre_detach, sc); 795 796 rw_init(&sc->sc_ether_lock, "nvgrelk"); 797 RBT_INIT(nvgre_map, &sc->sc_ether_map); 798 sc->sc_ether_num = 0; 799 sc->sc_ether_max = 100; 800 sc->sc_ether_tmo = 240 * hz; 801 timeout_set_proc(&sc->sc_ether_age, nvgre_age, sc); /* ugh */ 802 803 ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status); 804 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 805 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 806 807 if_counters_alloc(ifp); 808 if_attach(ifp); 809 ether_ifattach(ifp); 810 811 return (0); 812 } 813 814 static int 815 nvgre_clone_destroy(struct ifnet *ifp) 816 { 817 struct nvgre_softc *sc = ifp->if_softc; 818 819 NET_LOCK(); 820 if (ISSET(ifp->if_flags, IFF_RUNNING)) 821 nvgre_down(sc); 822 NET_UNLOCK(); 823 824 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); 825 ether_ifdetach(ifp); 826 if_detach(ifp); 827 828 free(sc, M_DEVBUF, sizeof(*sc)); 829 830 return (0); 831 } 832 833 static int 834 eoip_clone_create(struct if_clone *ifc, int unit) 835 { 836 struct eoip_softc *sc; 837 struct ifnet *ifp; 838 839 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 840 ifp = &sc->sc_ac.ac_if; 841 842 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 843 ifc->ifc_name, unit); 844 845 ifp->if_softc = sc; 846 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 847 ifp->if_ioctl = eoip_ioctl; 848 ifp->if_start = eoip_start; 849 ifp->if_xflags = IFXF_CLONED; 850 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 851 ether_fakeaddr(ifp); 852 853 sc->sc_tunnel.t_ttl = ip_defttl; 854 sc->sc_tunnel.t_txhprio = 0; 855 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET; 856 sc->sc_tunnel.t_df = htons(0); 857 858 sc->sc_ka_timeo = 10; 859 sc->sc_ka_count = 10; 860 861 timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc); 862 timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc); 863 sc->sc_ka_state = GRE_KA_DOWN; 864 865 ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status); 866 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 867 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 868 869 if_counters_alloc(ifp); 870 if_attach(ifp); 871 ether_ifattach(ifp); 872 873 return (0); 874 } 875 876 static int 877 eoip_clone_destroy(struct ifnet *ifp) 878 { 879 struct eoip_softc *sc = ifp->if_softc; 880 881 NET_LOCK(); 882 if (ISSET(ifp->if_flags, IFF_RUNNING)) 883 eoip_down(sc); 884 NET_UNLOCK(); 885 886 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); 887 ether_ifdetach(ifp); 888 if_detach(ifp); 889 890 free(sc, M_DEVBUF, sizeof(*sc)); 891 892 return (0); 893 } 894 895 int 896 gre_input(struct mbuf **mp, int *offp, int type, int af) 897 { 898 struct mbuf *m = *mp; 899 struct gre_tunnel key; 900 struct ip *ip; 901 902 ip = mtod(m, struct ip *); 903 904 /* XXX check if ip_src is sane for nvgre? */ 905 906 key.t_af = AF_INET; 907 key.t_src4 = ip->ip_dst; 908 key.t_dst4 = ip->ip_src; 909 910 if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1) 911 return (rip_input(mp, offp, type, af)); 912 913 return (IPPROTO_DONE); 914 } 915 916 #ifdef INET6 917 int 918 gre_input6(struct mbuf **mp, int *offp, int type, int af) 919 { 920 struct mbuf *m = *mp; 921 struct gre_tunnel key; 922 struct ip6_hdr *ip6; 923 uint32_t flow; 924 925 ip6 = mtod(m, struct ip6_hdr *); 926 927 /* XXX check if ip6_src is sane for nvgre? */ 928 929 key.t_af = AF_INET6; 930 key.t_src6 = ip6->ip6_dst; 931 key.t_dst6 = ip6->ip6_src; 932 933 flow = bemtoh32(&ip6->ip6_flow); 934 935 if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1) 936 return (rip6_input(mp, offp, type, af)); 937 938 return (IPPROTO_DONE); 939 } 940 #endif /* INET6 */ 941 942 static inline struct ifnet * 943 gre_find(const struct gre_tunnel *key) 944 { 945 struct gre_softc *sc; 946 947 TAILQ_FOREACH(sc, &gre_list, sc_entry) { 948 if (gre_cmp(key, &sc->sc_tunnel) != 0) 949 continue; 950 951 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 952 continue; 953 954 return (&sc->sc_if); 955 } 956 957 return (NULL); 958 } 959 960 static inline struct ifnet * 961 mgre_find(const struct gre_tunnel *key) 962 { 963 struct mgre_softc *sc; 964 965 NET_ASSERT_LOCKED(); 966 sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key); 967 if (sc != NULL) 968 return (&sc->sc_if); 969 970 return (NULL); 971 } 972 973 static struct mbuf * 974 gre_input_1(struct gre_tunnel *key, struct mbuf *m, 975 const struct gre_header *gh, uint8_t otos, int iphlen) 976 { 977 switch (gh->gre_proto) { 978 case htons(ETHERTYPE_PPP): 979 #ifdef PIPEX 980 if (pipex_enable) { 981 struct pipex_session *session; 982 983 session = pipex_pptp_lookup_session(m); 984 if (session != NULL && 985 pipex_pptp_input(m, session) == NULL) 986 return (NULL); 987 } 988 #endif 989 break; 990 case htons(GRE_EOIP): 991 return (eoip_input(key, m, gh, otos, iphlen)); 992 break; 993 } 994 995 return (m); 996 } 997 998 static int 999 gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos, 1000 struct gre_tunnel *key) 1001 { 1002 struct mbuf *m = *mp; 1003 int iphlen = *offp, hlen, rxprio; 1004 struct ifnet *ifp; 1005 const struct gre_tunnel *tunnel; 1006 caddr_t buf; 1007 struct gre_header *gh; 1008 struct gre_h_key *gkh; 1009 void (*input)(struct ifnet *, struct mbuf *); 1010 struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *, 1011 uint8_t *, uint8_t); 1012 #if NBPFILTER > 0 1013 int bpf_af = AF_UNSPEC; /* bpf */ 1014 #endif 1015 int mcast = 0; 1016 uint8_t itos; 1017 1018 if (!gre_allow) 1019 goto decline; 1020 1021 key->t_rtableid = m->m_pkthdr.ph_rtableid; 1022 1023 hlen = iphlen + sizeof(*gh); 1024 if (m->m_pkthdr.len < hlen) 1025 goto decline; 1026 1027 m = m_pullup(m, hlen); 1028 if (m == NULL) 1029 return (IPPROTO_DONE); 1030 1031 buf = mtod(m, caddr_t); 1032 gh = (struct gre_header *)(buf + iphlen); 1033 1034 /* check the version */ 1035 switch (gh->gre_flags & htons(GRE_VERS_MASK)) { 1036 case htons(GRE_VERS_0): 1037 break; 1038 1039 case htons(GRE_VERS_1): 1040 m = gre_input_1(key, m, gh, otos, iphlen); 1041 if (m == NULL) 1042 return (IPPROTO_DONE); 1043 /* FALLTHROUGH */ 1044 default: 1045 goto decline; 1046 } 1047 1048 /* the only optional bit in the header is K flag */ 1049 if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0)) 1050 goto decline; 1051 1052 if (gh->gre_flags & htons(GRE_KP)) { 1053 hlen += sizeof(*gkh); 1054 if (m->m_pkthdr.len < hlen) 1055 goto decline; 1056 1057 m = m_pullup(m, hlen); 1058 if (m == NULL) 1059 return (IPPROTO_DONE); 1060 1061 buf = mtod(m, caddr_t); 1062 gh = (struct gre_header *)(buf + iphlen); 1063 gkh = (struct gre_h_key *)(gh + 1); 1064 1065 key->t_key_mask = GRE_KEY_MASK; 1066 key->t_key = gkh->gre_key; 1067 } else 1068 key->t_key_mask = GRE_KEY_NONE; 1069 1070 if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) { 1071 if (egre_input(key, m, hlen, otos) == -1 && 1072 nvgre_input(key, m, hlen, otos) == -1) 1073 goto decline; 1074 1075 return (IPPROTO_DONE); 1076 } 1077 1078 ifp = gre_find(key); 1079 if (ifp == NULL) { 1080 ifp = mgre_find(key); 1081 if (ifp == NULL) 1082 goto decline; 1083 } 1084 1085 switch (gh->gre_proto) { 1086 case htons(GRE_WCCP): { 1087 struct mbuf *n; 1088 int off; 1089 1090 /* WCCP/GRE: 1091 * So far as I can see (and test) it seems that Cisco's WCCP 1092 * GRE tunnel is precisely a IP-in-GRE tunnel that differs 1093 * only in its protocol number. At least, it works for me. 1094 * 1095 * The Internet Drafts can be found if you look for 1096 * the following: 1097 * draft-forster-wrec-wccp-v1-00.txt 1098 * draft-wilson-wrec-wccp-v2-01.txt 1099 */ 1100 1101 if (!gre_wccp && !ISSET(ifp->if_flags, IFF_LINK0)) 1102 goto decline; 1103 1104 /* 1105 * If the first nibble of the payload does not look like 1106 * IPv4, assume it is WCCP v2. 1107 */ 1108 n = m_getptr(m, hlen, &off); 1109 if (n == NULL) 1110 goto decline; 1111 if (n->m_data[off] >> 4 != IPVERSION) 1112 hlen += sizeof(gre_wccp); 1113 1114 /* FALLTHROUGH */ 1115 } 1116 case htons(ETHERTYPE_IP): 1117 #if NBPFILTER > 0 1118 bpf_af = AF_INET; 1119 #endif 1120 patch = gre_ipv4_patch; 1121 input = ipv4_input; 1122 break; 1123 #ifdef INET6 1124 case htons(ETHERTYPE_IPV6): 1125 #if NBPFILTER > 0 1126 bpf_af = AF_INET6; 1127 #endif 1128 patch = gre_ipv6_patch; 1129 input = ipv6_input; 1130 break; 1131 #endif 1132 #ifdef MPLS 1133 case htons(ETHERTYPE_MPLS_MCAST): 1134 mcast = M_MCAST|M_BCAST; 1135 /* fallthrough */ 1136 case htons(ETHERTYPE_MPLS): 1137 #if NBPFILTER > 0 1138 bpf_af = AF_MPLS; 1139 #endif 1140 patch = gre_mpls_patch; 1141 input = mpls_input; 1142 break; 1143 #endif 1144 case htons(0): 1145 if (ifp->if_type != IFT_TUNNEL) { 1146 /* keepalives dont make sense for mgre */ 1147 goto decline; 1148 } 1149 1150 m_adj(m, hlen); 1151 gre_keepalive_recv(ifp, m); 1152 return (IPPROTO_DONE); 1153 1154 default: 1155 goto decline; 1156 } 1157 1158 /* it's ours now */ 1159 1160 m_adj(m, hlen); 1161 1162 tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */ 1163 1164 m = (*patch)(tunnel, m, &itos, otos); 1165 if (m == NULL) 1166 return (IPPROTO_DONE); 1167 1168 if (tunnel->t_key_mask == GRE_KEY_ENTROPY) { 1169 SET(m->m_pkthdr.csum_flags, M_FLOWID); 1170 m->m_pkthdr.ph_flowid = 1171 bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY; 1172 } 1173 1174 rxprio = tunnel->t_rxhprio; 1175 switch (rxprio) { 1176 case IF_HDRPRIO_PACKET: 1177 /* nop */ 1178 break; 1179 case IF_HDRPRIO_OUTER: 1180 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos); 1181 break; 1182 case IF_HDRPRIO_PAYLOAD: 1183 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos); 1184 break; 1185 default: 1186 m->m_pkthdr.pf.prio = rxprio; 1187 break; 1188 } 1189 1190 m->m_flags &= ~(M_MCAST|M_BCAST); 1191 m->m_flags |= mcast; 1192 m->m_pkthdr.ph_ifidx = ifp->if_index; 1193 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 1194 1195 #if NPF > 0 1196 pf_pkt_addr_changed(m); 1197 #endif 1198 1199 counters_pkt(ifp->if_counters, 1200 ifc_ipackets, ifc_ibytes, m->m_pkthdr.len); 1201 1202 #if NBPFILTER > 0 1203 if (ifp->if_bpf) 1204 bpf_mtap_af(ifp->if_bpf, bpf_af, m, BPF_DIRECTION_IN); 1205 #endif 1206 1207 (*input)(ifp, m); 1208 return (IPPROTO_DONE); 1209 decline: 1210 *mp = m; 1211 return (-1); 1212 } 1213 1214 static struct mbuf * 1215 gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m, 1216 uint8_t *itosp, uint8_t otos) 1217 { 1218 struct ip *ip; 1219 uint8_t itos; 1220 1221 m = m_pullup(m, sizeof(*ip)); 1222 if (m == NULL) 1223 return (NULL); 1224 1225 ip = mtod(m, struct ip *); 1226 1227 itos = ip->ip_tos; 1228 if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) { 1229 m_freem(m); 1230 return (NULL); 1231 } 1232 if (itos != ip->ip_tos) 1233 ip_tos_patch(ip, itos); 1234 1235 *itosp = itos; 1236 1237 return (m); 1238 } 1239 1240 #ifdef INET6 1241 static struct mbuf * 1242 gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m, 1243 uint8_t *itosp, uint8_t otos) 1244 { 1245 struct ip6_hdr *ip6; 1246 uint32_t flow; 1247 uint8_t itos; 1248 1249 m = m_pullup(m, sizeof(*ip6)); 1250 if (m == NULL) 1251 return (NULL); 1252 1253 ip6 = mtod(m, struct ip6_hdr *); 1254 1255 flow = bemtoh32(&ip6->ip6_flow); 1256 itos = flow >> 20; 1257 if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) { 1258 m_freem(m); 1259 return (NULL); 1260 } 1261 1262 CLR(flow, 0xff << 20); 1263 SET(flow, itos << 20); 1264 htobem32(&ip6->ip6_flow, flow); 1265 1266 *itosp = itos; 1267 1268 return (m); 1269 } 1270 #endif 1271 1272 #ifdef MPLS 1273 static struct mbuf * 1274 gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m, 1275 uint8_t *itosp, uint8_t otos) 1276 { 1277 uint8_t itos; 1278 uint32_t shim; 1279 1280 m = m_pullup(m, sizeof(shim)); 1281 if (m == NULL) 1282 return (NULL); 1283 1284 shim = *mtod(m, uint32_t *); 1285 itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5; 1286 1287 if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) { 1288 m_freem(m); 1289 return (NULL); 1290 } 1291 1292 *itosp = itos; 1293 1294 return (m); 1295 } 1296 #endif 1297 1298 #define gre_l2_prio(_t, _m, _otos) do { \ 1299 int rxprio = (_t)->t_rxhprio; \ 1300 switch (rxprio) { \ 1301 case IF_HDRPRIO_PACKET: \ 1302 /* nop */ \ 1303 break; \ 1304 case IF_HDRPRIO_OUTER: \ 1305 (_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos)); \ 1306 break; \ 1307 default: \ 1308 (_m)->m_pkthdr.pf.prio = rxprio; \ 1309 break; \ 1310 } \ 1311 } while (0) 1312 1313 static int 1314 egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos) 1315 { 1316 struct egre_softc *sc; 1317 1318 NET_ASSERT_LOCKED(); 1319 sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key); 1320 if (sc == NULL) 1321 return (-1); 1322 1323 /* it's ours now */ 1324 m = gre_ether_align(m, hlen); 1325 if (m == NULL) 1326 return (0); 1327 1328 if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) { 1329 SET(m->m_pkthdr.csum_flags, M_FLOWID); 1330 m->m_pkthdr.ph_flowid = 1331 bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY; 1332 } 1333 1334 m->m_flags &= ~(M_MCAST|M_BCAST); 1335 1336 #if NPF > 0 1337 pf_pkt_addr_changed(m); 1338 #endif 1339 1340 gre_l2_prio(&sc->sc_tunnel, m, otos); 1341 1342 if_vinput(&sc->sc_ac.ac_if, m); 1343 1344 return (0); 1345 } 1346 1347 static int 1348 nvgre_rtfind(struct nvgre_softc *sc, struct ifbaconf *baconf) 1349 { 1350 struct ifnet *ifp = &sc->sc_ac.ac_if; 1351 struct nvgre_entry *nv; 1352 struct ifbareq bareq; 1353 caddr_t uaddr, end; 1354 int error; 1355 int age; 1356 1357 if (baconf->ifbac_len == 0) { 1358 /* single read is atomic */ 1359 baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq); 1360 return (0); 1361 } 1362 1363 uaddr = baconf->ifbac_buf; 1364 end = uaddr + baconf->ifbac_len; 1365 1366 rw_enter_read(&sc->sc_ether_lock); 1367 RBT_FOREACH(nv, nvgre_map, &sc->sc_ether_map) { 1368 if (uaddr >= end) 1369 break; 1370 1371 memcpy(bareq.ifba_name, ifp->if_xname, 1372 sizeof(bareq.ifba_name)); 1373 memcpy(bareq.ifba_ifsname, ifp->if_xname, 1374 sizeof(bareq.ifba_ifsname)); 1375 memcpy(&bareq.ifba_dst, &nv->nv_dst, 1376 sizeof(bareq.ifba_dst)); 1377 1378 memset(&bareq.ifba_dstsa, 0, sizeof(bareq.ifba_dstsa)); 1379 switch (sc->sc_tunnel.t_af) { 1380 case AF_INET: { 1381 struct sockaddr_in *sin; 1382 1383 sin = (struct sockaddr_in *)&bareq.ifba_dstsa; 1384 sin->sin_len = sizeof(*sin); 1385 sin->sin_family = AF_INET; 1386 sin->sin_addr = nv->nv_gateway.in4; 1387 1388 break; 1389 } 1390 #ifdef INET6 1391 case AF_INET6: { 1392 struct sockaddr_in6 *sin6; 1393 1394 sin6 = (struct sockaddr_in6 *)&bareq.ifba_dstsa; 1395 sin6->sin6_len = sizeof(*sin6); 1396 sin6->sin6_family = AF_INET6; 1397 sin6->sin6_addr = nv->nv_gateway.in6; 1398 1399 break; 1400 } 1401 #endif /* INET6 */ 1402 default: 1403 unhandled_af(sc->sc_tunnel.t_af); 1404 } 1405 1406 switch (nv->nv_type) { 1407 case NVGRE_ENTRY_DYNAMIC: 1408 age = (ticks - nv->nv_age) / hz; 1409 bareq.ifba_age = MIN(age, 0xff); 1410 bareq.ifba_flags = IFBAF_DYNAMIC; 1411 break; 1412 case NVGRE_ENTRY_STATIC: 1413 bareq.ifba_age = 0; 1414 bareq.ifba_flags = IFBAF_STATIC; 1415 break; 1416 } 1417 1418 error = copyout(&bareq, uaddr, sizeof(bareq)); 1419 if (error != 0) { 1420 rw_exit_read(&sc->sc_ether_lock); 1421 return (error); 1422 } 1423 1424 uaddr += sizeof(bareq); 1425 } 1426 baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq); 1427 rw_exit_read(&sc->sc_ether_lock); 1428 1429 return (0); 1430 } 1431 1432 static void 1433 nvgre_flush_map(struct nvgre_softc *sc) 1434 { 1435 struct nvgre_map map; 1436 struct nvgre_entry *nv, *nnv; 1437 1438 rw_enter_write(&sc->sc_ether_lock); 1439 map = sc->sc_ether_map; 1440 RBT_INIT(nvgre_map, &sc->sc_ether_map); 1441 sc->sc_ether_num = 0; 1442 rw_exit_write(&sc->sc_ether_lock); 1443 1444 RBT_FOREACH_SAFE(nv, nvgre_map, &map, nnv) { 1445 RBT_REMOVE(nvgre_map, &map, nv); 1446 if (refcnt_rele(&nv->nv_refs)) 1447 pool_put(&nvgre_pool, nv); 1448 } 1449 } 1450 1451 static void 1452 nvgre_input_map(struct nvgre_softc *sc, const struct gre_tunnel *key, 1453 const struct ether_header *eh) 1454 { 1455 struct nvgre_entry *nv, nkey; 1456 int new = 0; 1457 1458 if (ETHER_IS_BROADCAST(eh->ether_shost) || 1459 ETHER_IS_MULTICAST(eh->ether_shost)) 1460 return; 1461 1462 memcpy(&nkey.nv_dst, eh->ether_shost, ETHER_ADDR_LEN); 1463 1464 /* remember where it came from */ 1465 rw_enter_read(&sc->sc_ether_lock); 1466 nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &nkey); 1467 if (nv == NULL) 1468 new = 1; 1469 else { 1470 nv->nv_age = ticks; 1471 1472 if (nv->nv_type != NVGRE_ENTRY_DYNAMIC || 1473 gre_ip_cmp(key->t_af, &key->t_dst, &nv->nv_gateway)) 1474 nv = NULL; 1475 else 1476 refcnt_take(&nv->nv_refs); 1477 } 1478 rw_exit_read(&sc->sc_ether_lock); 1479 1480 if (new) { 1481 struct nvgre_entry *onv; 1482 unsigned int num; 1483 1484 nv = pool_get(&nvgre_pool, PR_NOWAIT); 1485 if (nv == NULL) { 1486 /* oh well */ 1487 return; 1488 } 1489 1490 memcpy(&nv->nv_dst, eh->ether_shost, ETHER_ADDR_LEN); 1491 nv->nv_type = NVGRE_ENTRY_DYNAMIC; 1492 nv->nv_gateway = key->t_dst; 1493 refcnt_init(&nv->nv_refs); 1494 nv->nv_age = ticks; 1495 1496 rw_enter_write(&sc->sc_ether_lock); 1497 num = sc->sc_ether_num; 1498 if (++num > sc->sc_ether_max) 1499 onv = nv; 1500 else { 1501 /* try to give the ref to the map */ 1502 onv = RBT_INSERT(nvgre_map, &sc->sc_ether_map, nv); 1503 if (onv == NULL) { 1504 /* count the successful insert */ 1505 sc->sc_ether_num = num; 1506 } 1507 } 1508 rw_exit_write(&sc->sc_ether_lock); 1509 1510 if (onv != NULL) 1511 pool_put(&nvgre_pool, nv); 1512 } else if (nv != NULL) { 1513 rw_enter_write(&sc->sc_ether_lock); 1514 nv->nv_gateway = key->t_dst; 1515 rw_exit_write(&sc->sc_ether_lock); 1516 1517 if (refcnt_rele(&nv->nv_refs)) { 1518 /* ioctl may have deleted the entry */ 1519 pool_put(&nvgre_pool, nv); 1520 } 1521 } 1522 } 1523 1524 static inline struct nvgre_softc * 1525 nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx) 1526 { 1527 struct nvgre_softc *sc; 1528 int rv; 1529 1530 /* 1531 * building an nvgre_softc to use with RBT_FIND is expensive, and 1532 * would need to swap the src and dst addresses in the key. so do the 1533 * find by hand. 1534 */ 1535 1536 NET_ASSERT_LOCKED(); 1537 sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree); 1538 while (sc != NULL) { 1539 rv = nvgre_cmp_mcast(key, &key->t_src, if0idx, 1540 &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0); 1541 if (rv == 0) 1542 return (sc); 1543 if (rv < 0) 1544 sc = RBT_LEFT(nvgre_mcast_tree, sc); 1545 else 1546 sc = RBT_RIGHT(nvgre_mcast_tree, sc); 1547 } 1548 1549 return (NULL); 1550 } 1551 1552 static inline struct nvgre_softc * 1553 nvgre_ucast_find(const struct gre_tunnel *key) 1554 { 1555 NET_ASSERT_LOCKED(); 1556 return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree, 1557 (struct nvgre_softc *)key)); 1558 } 1559 1560 static int 1561 nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, 1562 uint8_t otos) 1563 { 1564 struct nvgre_softc *sc; 1565 1566 if (ISSET(m->m_flags, M_MCAST|M_BCAST)) 1567 sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx); 1568 else 1569 sc = nvgre_ucast_find(key); 1570 1571 if (sc == NULL) 1572 return (-1); 1573 1574 /* it's ours now */ 1575 m = gre_ether_align(m, hlen); 1576 if (m == NULL) 1577 return (0); 1578 1579 nvgre_input_map(sc, key, mtod(m, struct ether_header *)); 1580 1581 SET(m->m_pkthdr.csum_flags, M_FLOWID); 1582 m->m_pkthdr.ph_flowid = bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY; 1583 1584 gre_l2_prio(&sc->sc_tunnel, m, otos); 1585 1586 m->m_flags &= ~(M_MCAST|M_BCAST); 1587 1588 #if NPF > 0 1589 pf_pkt_addr_changed(m); 1590 #endif 1591 1592 if_vinput(&sc->sc_ac.ac_if, m); 1593 1594 return (0); 1595 } 1596 1597 static struct mbuf * 1598 gre_ether_align(struct mbuf *m, int hlen) 1599 { 1600 struct mbuf *n; 1601 int off; 1602 1603 m_adj(m, hlen); 1604 1605 if (m->m_pkthdr.len < sizeof(struct ether_header)) { 1606 m_freem(m); 1607 return (NULL); 1608 } 1609 1610 m = m_pullup(m, sizeof(struct ether_header)); 1611 if (m == NULL) 1612 return (NULL); 1613 1614 n = m_getptr(m, sizeof(struct ether_header), &off); 1615 if (n == NULL) { 1616 m_freem(m); 1617 return (NULL); 1618 } 1619 1620 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { 1621 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); 1622 m_freem(m); 1623 if (n == NULL) 1624 return (NULL); 1625 m = n; 1626 } 1627 1628 return (m); 1629 } 1630 1631 static void 1632 gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m) 1633 { 1634 struct gre_softc *sc = ifp->if_softc; 1635 struct gre_keepalive *gk; 1636 SIPHASH_CTX ctx; 1637 uint8_t digest[SIPHASH_DIGEST_LENGTH]; 1638 int uptime, delta; 1639 int tick = ticks; 1640 1641 if (sc->sc_ka_state == GRE_KA_NONE || 1642 sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain) 1643 goto drop; 1644 1645 if (m->m_pkthdr.len < sizeof(*gk)) 1646 goto drop; 1647 m = m_pullup(m, sizeof(*gk)); 1648 if (m == NULL) 1649 return; 1650 1651 gk = mtod(m, struct gre_keepalive *); 1652 uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias; 1653 delta = tick - uptime; 1654 if (delta < 0) 1655 goto drop; 1656 if (delta > hz * 10) /* magic */ 1657 goto drop; 1658 1659 /* avoid too much siphash work */ 1660 delta = tick - sc->sc_ka_recvtm; 1661 if (delta > 0 && delta < (hz / 10)) 1662 goto drop; 1663 1664 SipHash24_Init(&ctx, &sc->sc_ka_key); 1665 SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime)); 1666 SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random)); 1667 SipHash24_Final(digest, &ctx); 1668 1669 if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0) 1670 goto drop; 1671 1672 sc->sc_ka_recvtm = tick; 1673 1674 switch (sc->sc_ka_state) { 1675 case GRE_KA_DOWN: 1676 sc->sc_ka_state = GRE_KA_HOLD; 1677 sc->sc_ka_holdcnt = sc->sc_ka_holdmax; 1678 sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2, 1679 16 * sc->sc_ka_count); 1680 break; 1681 case GRE_KA_HOLD: 1682 if (--sc->sc_ka_holdcnt > 0) 1683 break; 1684 1685 sc->sc_ka_state = GRE_KA_UP; 1686 gre_link_state(&sc->sc_if, sc->sc_ka_state); 1687 break; 1688 1689 case GRE_KA_UP: 1690 sc->sc_ka_holdmax--; 1691 sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count); 1692 break; 1693 } 1694 1695 timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count); 1696 1697 drop: 1698 m_freem(m); 1699 } 1700 1701 static int 1702 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1703 struct rtentry *rt) 1704 { 1705 struct m_tag *mtag; 1706 int error = 0; 1707 1708 if (!gre_allow) { 1709 error = EACCES; 1710 goto drop; 1711 } 1712 1713 if (!ISSET(ifp->if_flags, IFF_RUNNING)) { 1714 error = ENETDOWN; 1715 goto drop; 1716 } 1717 1718 switch (dst->sa_family) { 1719 case AF_INET: 1720 #ifdef INET6 1721 case AF_INET6: 1722 #endif 1723 #ifdef MPLS 1724 case AF_MPLS: 1725 #endif 1726 break; 1727 default: 1728 error = EAFNOSUPPORT; 1729 goto drop; 1730 } 1731 1732 /* Try to limit infinite recursion through misconfiguration. */ 1733 for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag; 1734 mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) { 1735 if (memcmp((caddr_t)(mtag + 1), &ifp->if_index, 1736 sizeof(ifp->if_index)) == 0) { 1737 m_freem(m); 1738 error = EIO; 1739 goto end; 1740 } 1741 } 1742 1743 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); 1744 if (mtag == NULL) { 1745 m_freem(m); 1746 error = ENOBUFS; 1747 goto end; 1748 } 1749 memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index)); 1750 m_tag_prepend(m, mtag); 1751 1752 m->m_pkthdr.ph_family = dst->sa_family; 1753 1754 error = if_enqueue(ifp, m); 1755 end: 1756 if (error) 1757 ifp->if_oerrors++; 1758 return (error); 1759 1760 drop: 1761 m_freem(m); 1762 return (error); 1763 } 1764 1765 void 1766 gre_start(struct ifnet *ifp) 1767 { 1768 struct gre_softc *sc = ifp->if_softc; 1769 struct mbuf *m; 1770 int af; 1771 #if NBPFILTER > 0 1772 caddr_t if_bpf; 1773 #endif 1774 1775 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 1776 af = m->m_pkthdr.ph_family; 1777 1778 #if NBPFILTER > 0 1779 if_bpf = ifp->if_bpf; 1780 if (if_bpf) 1781 bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT); 1782 #endif 1783 1784 m = gre_l3_encap(&sc->sc_tunnel, m, af); 1785 if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) { 1786 ifp->if_oerrors++; 1787 continue; 1788 } 1789 } 1790 } 1791 1792 void 1793 mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt) 1794 { 1795 struct ifnet *lo0ifp; 1796 struct ifaddr *ifa, *lo0ifa; 1797 1798 switch (req) { 1799 case RTM_ADD: 1800 if (!ISSET(rt->rt_flags, RTF_LOCAL)) 1801 break; 1802 1803 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1804 if (memcmp(rt_key(rt), ifa->ifa_addr, 1805 rt_key(rt)->sa_len) == 0) 1806 break; 1807 } 1808 1809 if (ifa == NULL) 1810 break; 1811 1812 KASSERT(ifa == rt->rt_ifa); 1813 1814 lo0ifp = if_get(rtable_loindex(ifp->if_rdomain)); 1815 KASSERT(lo0ifp != NULL); 1816 TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) { 1817 if (lo0ifa->ifa_addr->sa_family == 1818 ifa->ifa_addr->sa_family) 1819 break; 1820 } 1821 if_put(lo0ifp); 1822 1823 if (lo0ifa == NULL) 1824 break; 1825 1826 rt->rt_flags &= ~RTF_LLINFO; 1827 break; 1828 case RTM_DELETE: 1829 case RTM_RESOLVE: 1830 default: 1831 break; 1832 } 1833 } 1834 1835 static int 1836 mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest, 1837 struct rtentry *rt0) 1838 { 1839 struct mgre_softc *sc = ifp->if_softc; 1840 struct sockaddr *gate; 1841 struct rtentry *rt; 1842 struct m_tag *mtag; 1843 int error = 0; 1844 sa_family_t af; 1845 const void *addr; 1846 1847 if (!gre_allow) { 1848 error = EACCES; 1849 goto drop; 1850 } 1851 1852 if (!ISSET(ifp->if_flags, IFF_RUNNING)) { 1853 error = ENETDOWN; 1854 goto drop; 1855 } 1856 1857 switch (dest->sa_family) { 1858 case AF_INET: 1859 #ifdef INET6 1860 case AF_INET6: 1861 #endif 1862 #ifdef MPLS 1863 case AF_MPLS: 1864 #endif 1865 break; 1866 default: 1867 error = EAFNOSUPPORT; 1868 goto drop; 1869 } 1870 1871 if (ISSET(m->m_flags, M_MCAST|M_BCAST)) { 1872 error = ENETUNREACH; 1873 goto drop; 1874 } 1875 1876 rt = rt_getll(rt0); 1877 1878 /* chech rt_expire? */ 1879 if (ISSET(rt->rt_flags, RTF_REJECT)) { 1880 error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH; 1881 goto drop; 1882 } 1883 if (!ISSET(rt->rt_flags, RTF_HOST)) { 1884 error = EHOSTUNREACH; 1885 goto drop; 1886 } 1887 if (ISSET(rt->rt_flags, RTF_GATEWAY)) { 1888 error = EINVAL; 1889 goto drop; 1890 } 1891 1892 gate = rt->rt_gateway; 1893 af = gate->sa_family; 1894 if (af != sc->sc_tunnel.t_af) { 1895 error = EAGAIN; 1896 goto drop; 1897 } 1898 1899 /* Try to limit infinite recursion through misconfiguration. */ 1900 for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag; 1901 mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) { 1902 if (memcmp((caddr_t)(mtag + 1), &ifp->if_index, 1903 sizeof(ifp->if_index)) == 0) { 1904 error = EIO; 1905 goto drop; 1906 } 1907 } 1908 1909 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); 1910 if (mtag == NULL) { 1911 error = ENOBUFS; 1912 goto drop; 1913 } 1914 memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index)); 1915 m_tag_prepend(m, mtag); 1916 1917 switch (af) { 1918 case AF_INET: { 1919 struct sockaddr_in *sin = (struct sockaddr_in *)gate; 1920 addr = &sin->sin_addr; 1921 break; 1922 } 1923 #ifdef INET6 1924 case AF_INET6: { 1925 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate; 1926 addr = &sin6->sin6_addr; 1927 break; 1928 } 1929 #endif 1930 default: 1931 unhandled_af(af); 1932 /* NOTREACHED */ 1933 } 1934 1935 m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family); 1936 if (m == NULL) { 1937 ifp->if_oerrors++; 1938 return (ENOBUFS); 1939 } 1940 1941 m->m_pkthdr.ph_family = dest->sa_family; 1942 1943 error = if_enqueue(ifp, m); 1944 if (error) 1945 ifp->if_oerrors++; 1946 return (error); 1947 1948 drop: 1949 m_freem(m); 1950 return (error); 1951 } 1952 1953 static void 1954 mgre_start(struct ifnet *ifp) 1955 { 1956 struct mgre_softc *sc = ifp->if_softc; 1957 struct mbuf *m; 1958 #if NBPFILTER > 0 1959 caddr_t if_bpf; 1960 #endif 1961 1962 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 1963 #if NBPFILTER > 0 1964 if_bpf = ifp->if_bpf; 1965 if (if_bpf) { 1966 struct m_hdr mh; 1967 struct mbuf *n; 1968 int off; 1969 1970 n = m_getptr(m, ifp->if_hdrlen, &off); 1971 KASSERT(n != NULL); 1972 1973 mh.mh_flags = 0; 1974 mh.mh_next = n->m_next; 1975 mh.mh_len = n->m_len - off; 1976 mh.mh_data = n->m_data + off; 1977 1978 bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, 1979 (struct mbuf *)&mh, BPF_DIRECTION_OUT); 1980 } 1981 #endif 1982 1983 if (gre_ip_output(&sc->sc_tunnel, m) != 0) { 1984 ifp->if_oerrors++; 1985 continue; 1986 } 1987 } 1988 } 1989 1990 static void 1991 egre_start(struct ifnet *ifp) 1992 { 1993 struct egre_softc *sc = ifp->if_softc; 1994 struct mbuf *m0, *m; 1995 #if NBPFILTER > 0 1996 caddr_t if_bpf; 1997 #endif 1998 1999 if (!gre_allow) { 2000 ifq_purge(&ifp->if_snd); 2001 return; 2002 } 2003 2004 while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) { 2005 #if NBPFILTER > 0 2006 if_bpf = ifp->if_bpf; 2007 if (if_bpf) 2008 bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT); 2009 #endif 2010 2011 /* force prepend mbuf because of alignment problems */ 2012 m = m_get(M_DONTWAIT, m0->m_type); 2013 if (m == NULL) { 2014 m_freem(m0); 2015 continue; 2016 } 2017 2018 M_MOVE_PKTHDR(m, m0); 2019 m->m_next = m0; 2020 2021 m_align(m, 0); 2022 m->m_len = 0; 2023 2024 m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER), 2025 sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m)); 2026 if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) { 2027 ifp->if_oerrors++; 2028 continue; 2029 } 2030 } 2031 } 2032 2033 static struct mbuf * 2034 gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst, 2035 struct mbuf *m, sa_family_t af) 2036 { 2037 uint16_t proto; 2038 uint8_t ttl, itos, otos; 2039 int tttl = tunnel->t_ttl; 2040 int ttloff; 2041 2042 switch (af) { 2043 case AF_INET: { 2044 struct ip *ip; 2045 2046 m = m_pullup(m, sizeof(*ip)); 2047 if (m == NULL) 2048 return (NULL); 2049 2050 ip = mtod(m, struct ip *); 2051 itos = ip->ip_tos; 2052 2053 ttloff = offsetof(struct ip, ip_ttl); 2054 proto = htons(ETHERTYPE_IP); 2055 break; 2056 } 2057 #ifdef INET6 2058 case AF_INET6: { 2059 struct ip6_hdr *ip6; 2060 2061 m = m_pullup(m, sizeof(*ip6)); 2062 if (m == NULL) 2063 return (NULL); 2064 2065 ip6 = mtod(m, struct ip6_hdr *); 2066 itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20; 2067 2068 ttloff = offsetof(struct ip6_hdr, ip6_hlim); 2069 proto = htons(ETHERTYPE_IPV6); 2070 break; 2071 } 2072 #endif 2073 #ifdef MPLS 2074 case AF_MPLS: { 2075 uint32_t shim; 2076 2077 m = m_pullup(m, sizeof(shim)); 2078 if (m == NULL) 2079 return (NULL); 2080 2081 shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK; 2082 itos = (shim >> MPLS_EXP_OFFSET) << 5; 2083 2084 ttloff = 3; 2085 2086 if (m->m_flags & (M_BCAST | M_MCAST)) 2087 proto = htons(ETHERTYPE_MPLS_MCAST); 2088 else 2089 proto = htons(ETHERTYPE_MPLS); 2090 break; 2091 } 2092 #endif 2093 default: 2094 unhandled_af(af); 2095 } 2096 2097 if (tttl == -1) { 2098 KASSERT(m->m_len > ttloff); /* m_pullup has happened */ 2099 2100 ttl = *(m->m_data + ttloff); 2101 } else 2102 ttl = tttl; 2103 2104 itos = gre_l3_tos(tunnel, m, itos); 2105 ip_ecn_ingress(tunnel->t_ecn, &otos, &itos); 2106 2107 return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos)); 2108 } 2109 2110 static struct mbuf * 2111 gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst, 2112 struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos) 2113 { 2114 struct gre_header *gh; 2115 struct gre_h_key *gkh; 2116 int hlen; 2117 2118 hlen = sizeof(*gh); 2119 if (tunnel->t_key_mask != GRE_KEY_NONE) 2120 hlen += sizeof(*gkh); 2121 2122 m = m_prepend(m, hlen, M_DONTWAIT); 2123 if (m == NULL) 2124 return (NULL); 2125 2126 gh = mtod(m, struct gre_header *); 2127 gh->gre_flags = GRE_VERS_0; 2128 gh->gre_proto = proto; 2129 if (tunnel->t_key_mask != GRE_KEY_NONE) { 2130 gh->gre_flags |= htons(GRE_KP); 2131 2132 gkh = (struct gre_h_key *)(gh + 1); 2133 gkh->gre_key = tunnel->t_key; 2134 2135 if (tunnel->t_key_mask == GRE_KEY_ENTROPY && 2136 ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) { 2137 gkh->gre_key |= htonl(~GRE_KEY_ENTROPY & 2138 m->m_pkthdr.ph_flowid); 2139 } 2140 } 2141 2142 return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos)); 2143 } 2144 2145 static struct mbuf * 2146 gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst, 2147 struct mbuf *m, uint8_t ttl, uint8_t tos) 2148 { 2149 switch (tunnel->t_af) { 2150 case AF_UNSPEC: 2151 /* packets may arrive before tunnel is set up */ 2152 m_freem(m); 2153 return (NULL); 2154 case AF_INET: { 2155 struct ip *ip; 2156 2157 m = m_prepend(m, sizeof(*ip), M_DONTWAIT); 2158 if (m == NULL) 2159 return (NULL); 2160 2161 ip = mtod(m, struct ip *); 2162 ip->ip_v = IPVERSION; 2163 ip->ip_hl = sizeof(*ip) >> 2; 2164 ip->ip_off = tunnel->t_df; 2165 ip->ip_tos = tos; 2166 ip->ip_len = htons(m->m_pkthdr.len); 2167 ip->ip_ttl = ttl; 2168 ip->ip_p = IPPROTO_GRE; 2169 ip->ip_src = tunnel->t_src4; 2170 ip->ip_dst = dst->in4; 2171 break; 2172 } 2173 #ifdef INET6 2174 case AF_INET6: { 2175 struct ip6_hdr *ip6; 2176 int len = m->m_pkthdr.len; 2177 2178 m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); 2179 if (m == NULL) 2180 return (NULL); 2181 2182 ip6 = mtod(m, struct ip6_hdr *); 2183 ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ? 2184 htonl(m->m_pkthdr.ph_flowid) : 0; 2185 ip6->ip6_vfc |= IPV6_VERSION; 2186 ip6->ip6_flow |= htonl((uint32_t)tos << 20); 2187 ip6->ip6_plen = htons(len); 2188 ip6->ip6_nxt = IPPROTO_GRE; 2189 ip6->ip6_hlim = ttl; 2190 ip6->ip6_src = tunnel->t_src6; 2191 ip6->ip6_dst = dst->in6; 2192 2193 if (tunnel->t_df) 2194 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 2195 2196 break; 2197 } 2198 #endif /* INET6 */ 2199 default: 2200 unhandled_af(tunnel->t_af); 2201 } 2202 2203 return (m); 2204 } 2205 2206 static int 2207 gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m) 2208 { 2209 m->m_flags &= ~(M_BCAST|M_MCAST); 2210 m->m_pkthdr.ph_rtableid = tunnel->t_rtableid; 2211 2212 #if NPF > 0 2213 pf_pkt_addr_changed(m); 2214 #endif 2215 2216 switch (tunnel->t_af) { 2217 case AF_INET: 2218 ip_send(m); 2219 break; 2220 #ifdef INET6 2221 case AF_INET6: 2222 ip6_send(m); 2223 break; 2224 #endif 2225 default: 2226 unhandled_af(tunnel->t_af); 2227 } 2228 2229 return (0); 2230 } 2231 2232 static int 2233 gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel, 2234 u_long cmd, void *data) 2235 { 2236 struct ifreq *ifr = (struct ifreq *)data; 2237 int error = 0; 2238 2239 switch(cmd) { 2240 case SIOCSIFMTU: 2241 if (ifr->ifr_mtu < 576) { 2242 error = EINVAL; 2243 break; 2244 } 2245 ifp->if_mtu = ifr->ifr_mtu; 2246 break; 2247 case SIOCADDMULTI: 2248 case SIOCDELMULTI: 2249 break; 2250 2251 case SIOCSVNETID: 2252 error = gre_set_vnetid(tunnel, ifr); 2253 break; 2254 2255 case SIOCGVNETID: 2256 error = gre_get_vnetid(tunnel, ifr); 2257 break; 2258 case SIOCDVNETID: 2259 error = gre_del_vnetid(tunnel); 2260 break; 2261 2262 case SIOCSVNETFLOWID: 2263 error = gre_set_vnetflowid(tunnel, ifr); 2264 break; 2265 2266 case SIOCGVNETFLOWID: 2267 error = gre_get_vnetflowid(tunnel, ifr); 2268 break; 2269 2270 case SIOCSLIFPHYADDR: 2271 error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1); 2272 break; 2273 case SIOCGLIFPHYADDR: 2274 error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data); 2275 break; 2276 case SIOCDIFPHYADDR: 2277 error = gre_del_tunnel(tunnel); 2278 break; 2279 2280 case SIOCSLIFPHYRTABLE: 2281 if (ifr->ifr_rdomainid < 0 || 2282 ifr->ifr_rdomainid > RT_TABLEID_MAX || 2283 !rtable_exists(ifr->ifr_rdomainid)) { 2284 error = EINVAL; 2285 break; 2286 } 2287 tunnel->t_rtableid = ifr->ifr_rdomainid; 2288 break; 2289 case SIOCGLIFPHYRTABLE: 2290 ifr->ifr_rdomainid = tunnel->t_rtableid; 2291 break; 2292 2293 case SIOCSLIFPHYDF: 2294 /* commit */ 2295 tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 2296 break; 2297 case SIOCGLIFPHYDF: 2298 ifr->ifr_df = tunnel->t_df ? 1 : 0; 2299 break; 2300 2301 default: 2302 error = ENOTTY; 2303 break; 2304 } 2305 2306 return (error); 2307 } 2308 2309 static uint8_t 2310 gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m) 2311 { 2312 uint8_t prio; 2313 2314 switch (t->t_txhprio) { 2315 case IF_HDRPRIO_PACKET: 2316 prio = m->m_pkthdr.pf.prio; 2317 break; 2318 default: 2319 prio = t->t_txhprio; 2320 break; 2321 } 2322 2323 return (IFQ_PRIO2TOS(prio)); 2324 } 2325 2326 static uint8_t 2327 gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos) 2328 { 2329 uint8_t prio; 2330 2331 switch (t->t_txhprio) { 2332 case IF_HDRPRIO_PAYLOAD: 2333 return (tos); 2334 case IF_HDRPRIO_PACKET: 2335 prio = m->m_pkthdr.pf.prio; 2336 break; 2337 default: 2338 prio = t->t_txhprio; 2339 break; 2340 } 2341 2342 return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK)); 2343 } 2344 2345 static int 2346 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2347 { 2348 struct gre_softc *sc = ifp->if_softc; 2349 struct ifreq *ifr = (struct ifreq *)data; 2350 struct ifkalivereq *ikar = (struct ifkalivereq *)data; 2351 int error = 0; 2352 2353 switch(cmd) { 2354 case SIOCSIFADDR: 2355 ifp->if_flags |= IFF_UP; 2356 /* FALLTHROUGH */ 2357 case SIOCSIFFLAGS: 2358 if (ISSET(ifp->if_flags, IFF_UP)) { 2359 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 2360 error = gre_up(sc); 2361 else 2362 error = 0; 2363 } else { 2364 if (ISSET(ifp->if_flags, IFF_RUNNING)) 2365 error = gre_down(sc); 2366 } 2367 break; 2368 case SIOCSIFRDOMAIN: 2369 /* let if_rdomain do its thing */ 2370 error = ENOTTY; 2371 break; 2372 2373 case SIOCSETKALIVE: 2374 if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 || 2375 ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 || 2376 (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0)) 2377 return (EINVAL); 2378 2379 if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) { 2380 sc->sc_ka_count = 0; 2381 sc->sc_ka_timeo = 0; 2382 sc->sc_ka_state = GRE_KA_NONE; 2383 } else { 2384 sc->sc_ka_count = ikar->ikar_cnt; 2385 sc->sc_ka_timeo = ikar->ikar_timeo; 2386 sc->sc_ka_state = GRE_KA_DOWN; 2387 2388 arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key)); 2389 sc->sc_ka_bias = arc4random(); 2390 sc->sc_ka_holdmax = sc->sc_ka_count; 2391 2392 sc->sc_ka_recvtm = ticks - hz; 2393 timeout_add(&sc->sc_ka_send, 1); 2394 timeout_add_sec(&sc->sc_ka_hold, 2395 sc->sc_ka_timeo * sc->sc_ka_count); 2396 } 2397 break; 2398 2399 case SIOCGETKALIVE: 2400 ikar->ikar_cnt = sc->sc_ka_count; 2401 ikar->ikar_timeo = sc->sc_ka_timeo; 2402 break; 2403 2404 case SIOCSLIFPHYTTL: 2405 if (ifr->ifr_ttl != -1 && 2406 (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) { 2407 error = EINVAL; 2408 break; 2409 } 2410 2411 /* commit */ 2412 sc->sc_tunnel.t_ttl = ifr->ifr_ttl; 2413 break; 2414 2415 case SIOCGLIFPHYTTL: 2416 ifr->ifr_ttl = sc->sc_tunnel.t_ttl; 2417 break; 2418 2419 case SIOCSLIFPHYECN: 2420 sc->sc_tunnel.t_ecn = 2421 ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN; 2422 break; 2423 case SIOCGLIFPHYECN: 2424 ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED); 2425 break; 2426 2427 case SIOCSTXHPRIO: 2428 error = if_txhprio_l3_check(ifr->ifr_hdrprio); 2429 if (error != 0) 2430 break; 2431 2432 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio; 2433 break; 2434 case SIOCGTXHPRIO: 2435 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio; 2436 break; 2437 2438 case SIOCSRXHPRIO: 2439 error = if_rxhprio_l3_check(ifr->ifr_hdrprio); 2440 if (error != 0) 2441 break; 2442 2443 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio; 2444 break; 2445 case SIOCGRXHPRIO: 2446 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio; 2447 break; 2448 2449 default: 2450 error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data); 2451 break; 2452 } 2453 2454 return (error); 2455 } 2456 2457 static int 2458 mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2459 { 2460 struct mgre_softc *sc = ifp->if_softc; 2461 struct ifreq *ifr = (struct ifreq *)data; 2462 int error = 0; 2463 2464 switch(cmd) { 2465 case SIOCSIFADDR: 2466 break; 2467 case SIOCSIFFLAGS: 2468 if (ISSET(ifp->if_flags, IFF_UP)) { 2469 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 2470 error = mgre_up(sc); 2471 else 2472 error = 0; 2473 } else { 2474 if (ISSET(ifp->if_flags, IFF_RUNNING)) 2475 error = mgre_down(sc); 2476 } 2477 break; 2478 2479 case SIOCSLIFPHYTTL: 2480 if (ifr->ifr_ttl != -1 && 2481 (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) { 2482 error = EINVAL; 2483 break; 2484 } 2485 2486 /* commit */ 2487 sc->sc_tunnel.t_ttl = ifr->ifr_ttl; 2488 break; 2489 2490 case SIOCGLIFPHYTTL: 2491 ifr->ifr_ttl = sc->sc_tunnel.t_ttl; 2492 break; 2493 2494 case SIOCSLIFPHYECN: 2495 sc->sc_tunnel.t_ecn = 2496 ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN; 2497 break; 2498 case SIOCGLIFPHYECN: 2499 ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED); 2500 break; 2501 2502 case SIOCSLIFPHYADDR: 2503 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2504 error = EBUSY; 2505 break; 2506 } 2507 error = mgre_set_tunnel(sc, (struct if_laddrreq *)data); 2508 break; 2509 case SIOCGLIFPHYADDR: 2510 error = mgre_get_tunnel(sc, (struct if_laddrreq *)data); 2511 break; 2512 2513 case SIOCSTXHPRIO: 2514 error = if_txhprio_l3_check(ifr->ifr_hdrprio); 2515 if (error != 0) 2516 break; 2517 2518 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio; 2519 break; 2520 case SIOCGTXHPRIO: 2521 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio; 2522 break; 2523 2524 case SIOCSRXHPRIO: 2525 error = if_rxhprio_l3_check(ifr->ifr_hdrprio); 2526 if (error != 0) 2527 break; 2528 2529 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio; 2530 break; 2531 case SIOCGRXHPRIO: 2532 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio; 2533 break; 2534 2535 case SIOCSVNETID: 2536 case SIOCDVNETID: 2537 case SIOCDIFPHYADDR: 2538 case SIOCSLIFPHYRTABLE: 2539 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2540 error = EBUSY; 2541 break; 2542 } 2543 2544 /* FALLTHROUGH */ 2545 default: 2546 error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data); 2547 break; 2548 } 2549 2550 return (error); 2551 } 2552 2553 static int 2554 mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req) 2555 { 2556 struct gre_tunnel *tunnel = &sc->sc_tunnel; 2557 struct sockaddr *addr = (struct sockaddr *)&req->addr; 2558 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; 2559 struct sockaddr_in *addr4; 2560 #ifdef INET6 2561 struct sockaddr_in6 *addr6; 2562 int error; 2563 #endif 2564 2565 if (dstaddr->sa_family != AF_UNSPEC) 2566 return (EINVAL); 2567 2568 /* validate */ 2569 switch (addr->sa_family) { 2570 case AF_INET: 2571 if (addr->sa_len != sizeof(*addr4)) 2572 return (EINVAL); 2573 2574 addr4 = (struct sockaddr_in *)addr; 2575 if (in_nullhost(addr4->sin_addr) || 2576 IN_MULTICAST(addr4->sin_addr.s_addr)) 2577 return (EINVAL); 2578 2579 tunnel->t_src4 = addr4->sin_addr; 2580 tunnel->t_dst4.s_addr = INADDR_ANY; 2581 2582 break; 2583 #ifdef INET6 2584 case AF_INET6: 2585 if (addr->sa_len != sizeof(*addr6)) 2586 return (EINVAL); 2587 2588 addr6 = (struct sockaddr_in6 *)addr; 2589 if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) || 2590 IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr)) 2591 return (EINVAL); 2592 2593 error = in6_embedscope(&tunnel->t_src6, addr6, NULL); 2594 if (error != 0) 2595 return (error); 2596 2597 memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6)); 2598 2599 break; 2600 #endif 2601 default: 2602 return (EAFNOSUPPORT); 2603 } 2604 2605 /* commit */ 2606 tunnel->t_af = addr->sa_family; 2607 2608 return (0); 2609 } 2610 2611 static int 2612 mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req) 2613 { 2614 struct gre_tunnel *tunnel = &sc->sc_tunnel; 2615 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; 2616 struct sockaddr_in *sin; 2617 #ifdef INET6 2618 struct sockaddr_in6 *sin6; 2619 #endif 2620 2621 switch (tunnel->t_af) { 2622 case AF_UNSPEC: 2623 return (EADDRNOTAVAIL); 2624 case AF_INET: 2625 sin = (struct sockaddr_in *)&req->addr; 2626 memset(sin, 0, sizeof(*sin)); 2627 sin->sin_family = AF_INET; 2628 sin->sin_len = sizeof(*sin); 2629 sin->sin_addr = tunnel->t_src4; 2630 break; 2631 2632 #ifdef INET6 2633 case AF_INET6: 2634 sin6 = (struct sockaddr_in6 *)&req->addr; 2635 memset(sin6, 0, sizeof(*sin6)); 2636 sin6->sin6_family = AF_INET6; 2637 sin6->sin6_len = sizeof(*sin6); 2638 in6_recoverscope(sin6, &tunnel->t_src6); 2639 break; 2640 #endif 2641 default: 2642 unhandled_af(tunnel->t_af); 2643 } 2644 2645 dstaddr->sa_len = 2; 2646 dstaddr->sa_family = AF_UNSPEC; 2647 2648 return (0); 2649 } 2650 2651 static int 2652 egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2653 { 2654 struct egre_softc *sc = ifp->if_softc; 2655 struct ifreq *ifr = (struct ifreq *)data; 2656 int error = 0; 2657 2658 switch(cmd) { 2659 case SIOCSIFADDR: 2660 break; 2661 case SIOCSIFFLAGS: 2662 if (ISSET(ifp->if_flags, IFF_UP)) { 2663 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 2664 error = egre_up(sc); 2665 else 2666 error = 0; 2667 } else { 2668 if (ISSET(ifp->if_flags, IFF_RUNNING)) 2669 error = egre_down(sc); 2670 } 2671 break; 2672 2673 case SIOCSLIFPHYTTL: 2674 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 2675 error = EINVAL; 2676 break; 2677 } 2678 2679 /* commit */ 2680 sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl; 2681 break; 2682 2683 case SIOCGLIFPHYTTL: 2684 ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl; 2685 break; 2686 2687 case SIOCSTXHPRIO: 2688 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 2689 if (error != 0) 2690 break; 2691 2692 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio; 2693 break; 2694 case SIOCGTXHPRIO: 2695 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio; 2696 break; 2697 2698 case SIOCSRXHPRIO: 2699 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 2700 if (error != 0) 2701 break; 2702 2703 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio; 2704 break; 2705 case SIOCGRXHPRIO: 2706 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio; 2707 break; 2708 2709 case SIOCSVNETID: 2710 case SIOCDVNETID: 2711 case SIOCSVNETFLOWID: 2712 case SIOCSLIFPHYADDR: 2713 case SIOCDIFPHYADDR: 2714 case SIOCSLIFPHYRTABLE: 2715 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2716 error = EBUSY; 2717 break; 2718 } 2719 2720 /* FALLTHROUGH */ 2721 default: 2722 error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data); 2723 if (error == ENOTTY) 2724 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 2725 break; 2726 } 2727 2728 if (error == ENETRESET) { 2729 /* no hardware to program */ 2730 error = 0; 2731 } 2732 2733 return (error); 2734 } 2735 2736 static int 2737 nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2738 { 2739 struct nvgre_softc *sc = ifp->if_softc; 2740 struct gre_tunnel *tunnel = &sc->sc_tunnel; 2741 2742 struct ifreq *ifr = (struct ifreq *)data; 2743 struct if_parent *parent = (struct if_parent *)data; 2744 struct ifbrparam *bparam = (struct ifbrparam *)data; 2745 struct ifnet *ifp0; 2746 2747 int error = 0; 2748 2749 switch (cmd) { 2750 case SIOCSIFADDR: 2751 break; 2752 case SIOCSIFFLAGS: 2753 if (ISSET(ifp->if_flags, IFF_UP)) { 2754 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 2755 error = nvgre_up(sc); 2756 else 2757 error = ENETRESET; 2758 } else { 2759 if (ISSET(ifp->if_flags, IFF_RUNNING)) 2760 error = nvgre_down(sc); 2761 } 2762 break; 2763 2764 case SIOCSLIFPHYADDR: 2765 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2766 error = EBUSY; 2767 break; 2768 } 2769 error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0); 2770 if (error == 0) 2771 nvgre_flush_map(sc); 2772 break; 2773 case SIOCGLIFPHYADDR: 2774 error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data); 2775 break; 2776 case SIOCDIFPHYADDR: 2777 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2778 error = EBUSY; 2779 break; 2780 } 2781 error = gre_del_tunnel(tunnel); 2782 if (error == 0) 2783 nvgre_flush_map(sc); 2784 break; 2785 2786 case SIOCSIFPARENT: 2787 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2788 error = EBUSY; 2789 break; 2790 } 2791 error = nvgre_set_parent(sc, parent->ifp_parent); 2792 if (error == 0) 2793 nvgre_flush_map(sc); 2794 break; 2795 case SIOCGIFPARENT: 2796 ifp0 = if_get(sc->sc_ifp0); 2797 if (ifp0 == NULL) 2798 error = EADDRNOTAVAIL; 2799 else { 2800 memcpy(parent->ifp_parent, ifp0->if_xname, 2801 sizeof(parent->ifp_parent)); 2802 } 2803 if_put(ifp0); 2804 break; 2805 case SIOCDIFPARENT: 2806 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2807 error = EBUSY; 2808 break; 2809 } 2810 /* commit */ 2811 sc->sc_ifp0 = 0; 2812 nvgre_flush_map(sc); 2813 break; 2814 2815 case SIOCSVNETID: 2816 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2817 error = EBUSY; 2818 break; 2819 } 2820 if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN || 2821 ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) { 2822 error = EINVAL; 2823 break; 2824 } 2825 2826 /* commit */ 2827 tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT); 2828 nvgre_flush_map(sc); 2829 break; 2830 case SIOCGVNETID: 2831 error = gre_get_vnetid(tunnel, ifr); 2832 break; 2833 2834 case SIOCSLIFPHYRTABLE: 2835 if (ifr->ifr_rdomainid < 0 || 2836 ifr->ifr_rdomainid > RT_TABLEID_MAX || 2837 !rtable_exists(ifr->ifr_rdomainid)) { 2838 error = EINVAL; 2839 break; 2840 } 2841 tunnel->t_rtableid = ifr->ifr_rdomainid; 2842 nvgre_flush_map(sc); 2843 break; 2844 case SIOCGLIFPHYRTABLE: 2845 ifr->ifr_rdomainid = tunnel->t_rtableid; 2846 break; 2847 2848 case SIOCSLIFPHYDF: 2849 /* commit */ 2850 tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 2851 break; 2852 case SIOCGLIFPHYDF: 2853 ifr->ifr_df = tunnel->t_df ? 1 : 0; 2854 break; 2855 2856 case SIOCSLIFPHYTTL: 2857 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 2858 error = EINVAL; 2859 break; 2860 } 2861 2862 /* commit */ 2863 tunnel->t_ttl = ifr->ifr_ttl; 2864 break; 2865 2866 case SIOCGLIFPHYTTL: 2867 ifr->ifr_ttl = tunnel->t_ttl; 2868 break; 2869 2870 case SIOCSTXHPRIO: 2871 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 2872 if (error != 0) 2873 break; 2874 2875 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio; 2876 break; 2877 case SIOCGTXHPRIO: 2878 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio; 2879 break; 2880 2881 case SIOCSRXHPRIO: 2882 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 2883 if (error != 0) 2884 break; 2885 2886 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio; 2887 break; 2888 case SIOCGRXHPRIO: 2889 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio; 2890 break; 2891 2892 case SIOCBRDGSCACHE: 2893 if (bparam->ifbrp_csize < 1) { 2894 error = EINVAL; 2895 break; 2896 } 2897 2898 /* commit */ 2899 sc->sc_ether_max = bparam->ifbrp_csize; 2900 break; 2901 case SIOCBRDGGCACHE: 2902 bparam->ifbrp_csize = sc->sc_ether_max; 2903 break; 2904 2905 case SIOCBRDGSTO: 2906 if (bparam->ifbrp_ctime < 0 || 2907 bparam->ifbrp_ctime > INT_MAX / hz) { 2908 error = EINVAL; 2909 break; 2910 } 2911 sc->sc_ether_tmo = bparam->ifbrp_ctime * hz; 2912 break; 2913 case SIOCBRDGGTO: 2914 bparam->ifbrp_ctime = sc->sc_ether_tmo / hz; 2915 break; 2916 2917 case SIOCBRDGRTS: 2918 error = nvgre_rtfind(sc, (struct ifbaconf *)data); 2919 break; 2920 case SIOCBRDGFLUSH: 2921 nvgre_flush_map(sc); 2922 break; 2923 2924 case SIOCADDMULTI: 2925 case SIOCDELMULTI: 2926 break; 2927 2928 default: 2929 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 2930 break; 2931 } 2932 2933 if (error == ENETRESET) { 2934 /* no hardware to program */ 2935 error = 0; 2936 } 2937 2938 return (error); 2939 } 2940 2941 static int 2942 eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2943 { 2944 struct eoip_softc *sc = ifp->if_softc; 2945 struct ifreq *ifr = (struct ifreq *)data; 2946 struct ifkalivereq *ikar = (struct ifkalivereq *)data; 2947 int error = 0; 2948 2949 switch(cmd) { 2950 case SIOCSIFADDR: 2951 break; 2952 case SIOCSIFFLAGS: 2953 if (ISSET(ifp->if_flags, IFF_UP)) { 2954 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 2955 error = eoip_up(sc); 2956 else 2957 error = 0; 2958 } else { 2959 if (ISSET(ifp->if_flags, IFF_RUNNING)) 2960 error = eoip_down(sc); 2961 } 2962 break; 2963 2964 case SIOCSETKALIVE: 2965 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2966 error = EBUSY; 2967 break; 2968 } 2969 2970 if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 || 2971 ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256) 2972 return (EINVAL); 2973 2974 if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) { 2975 sc->sc_ka_count = 0; 2976 sc->sc_ka_timeo = 0; 2977 sc->sc_ka_state = GRE_KA_NONE; 2978 } else { 2979 sc->sc_ka_count = ikar->ikar_cnt; 2980 sc->sc_ka_timeo = ikar->ikar_timeo; 2981 sc->sc_ka_state = GRE_KA_DOWN; 2982 } 2983 break; 2984 2985 case SIOCGETKALIVE: 2986 ikar->ikar_cnt = sc->sc_ka_count; 2987 ikar->ikar_timeo = sc->sc_ka_timeo; 2988 break; 2989 2990 case SIOCSVNETID: 2991 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2992 error = EBUSY; 2993 break; 2994 } 2995 if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff) 2996 return (EINVAL); 2997 2998 sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */ 2999 sc->sc_tunnel_id = htole16(ifr->ifr_vnetid); 3000 break; 3001 3002 case SIOCGVNETID: 3003 ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id); 3004 break; 3005 3006 case SIOCSLIFPHYADDR: 3007 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 3008 error = EBUSY; 3009 break; 3010 } 3011 3012 error = gre_set_tunnel(&sc->sc_tunnel, 3013 (struct if_laddrreq *)data, 1); 3014 break; 3015 case SIOCGLIFPHYADDR: 3016 error = gre_get_tunnel(&sc->sc_tunnel, 3017 (struct if_laddrreq *)data); 3018 break; 3019 case SIOCDIFPHYADDR: 3020 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 3021 error = EBUSY; 3022 break; 3023 } 3024 3025 error = gre_del_tunnel(&sc->sc_tunnel); 3026 break; 3027 3028 case SIOCSLIFPHYRTABLE: 3029 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 3030 error = EBUSY; 3031 break; 3032 } 3033 3034 if (ifr->ifr_rdomainid < 0 || 3035 ifr->ifr_rdomainid > RT_TABLEID_MAX || 3036 !rtable_exists(ifr->ifr_rdomainid)) { 3037 error = EINVAL; 3038 break; 3039 } 3040 sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid; 3041 break; 3042 case SIOCGLIFPHYRTABLE: 3043 ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid; 3044 break; 3045 3046 case SIOCSLIFPHYTTL: 3047 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 3048 error = EINVAL; 3049 break; 3050 } 3051 3052 /* commit */ 3053 sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl; 3054 break; 3055 case SIOCGLIFPHYTTL: 3056 ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl; 3057 break; 3058 3059 case SIOCSLIFPHYDF: 3060 /* commit */ 3061 sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 3062 break; 3063 case SIOCGLIFPHYDF: 3064 ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0; 3065 break; 3066 3067 case SIOCSTXHPRIO: 3068 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 3069 if (error != 0) 3070 break; 3071 3072 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio; 3073 break; 3074 case SIOCGTXHPRIO: 3075 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio; 3076 break; 3077 3078 case SIOCSRXHPRIO: 3079 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 3080 if (error != 0) 3081 break; 3082 3083 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio; 3084 break; 3085 case SIOCGRXHPRIO: 3086 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio; 3087 break; 3088 3089 case SIOCADDMULTI: 3090 case SIOCDELMULTI: 3091 break; 3092 3093 default: 3094 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 3095 break; 3096 } 3097 3098 if (error == ENETRESET) { 3099 /* no hardware to program */ 3100 error = 0; 3101 } 3102 3103 return (error); 3104 } 3105 3106 static int 3107 gre_up(struct gre_softc *sc) 3108 { 3109 NET_ASSERT_LOCKED(); 3110 SET(sc->sc_if.if_flags, IFF_RUNNING); 3111 3112 if (sc->sc_ka_state != GRE_KA_NONE) 3113 gre_keepalive_send(sc); 3114 3115 return (0); 3116 } 3117 3118 static int 3119 gre_down(struct gre_softc *sc) 3120 { 3121 NET_ASSERT_LOCKED(); 3122 CLR(sc->sc_if.if_flags, IFF_RUNNING); 3123 3124 if (sc->sc_ka_state != GRE_KA_NONE) { 3125 timeout_del_barrier(&sc->sc_ka_hold); 3126 timeout_del_barrier(&sc->sc_ka_send); 3127 3128 sc->sc_ka_state = GRE_KA_DOWN; 3129 gre_link_state(&sc->sc_if, sc->sc_ka_state); 3130 } 3131 3132 return (0); 3133 } 3134 3135 static void 3136 gre_link_state(struct ifnet *ifp, unsigned int state) 3137 { 3138 int link_state = LINK_STATE_UNKNOWN; 3139 3140 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 3141 switch (state) { 3142 case GRE_KA_NONE: 3143 /* maybe up? or down? it's unknown, really */ 3144 break; 3145 case GRE_KA_UP: 3146 link_state = LINK_STATE_UP; 3147 break; 3148 default: 3149 link_state = LINK_STATE_KALIVE_DOWN; 3150 break; 3151 } 3152 } 3153 3154 if (ifp->if_link_state != link_state) { 3155 ifp->if_link_state = link_state; 3156 if_link_state_change(ifp); 3157 } 3158 } 3159 3160 static void 3161 gre_keepalive_send(void *arg) 3162 { 3163 struct gre_tunnel t; 3164 struct gre_softc *sc = arg; 3165 struct mbuf *m; 3166 struct gre_keepalive *gk; 3167 SIPHASH_CTX ctx; 3168 int linkhdr, len; 3169 uint16_t proto; 3170 uint8_t ttl; 3171 uint8_t tos; 3172 3173 /* 3174 * re-schedule immediately, so we deal with incomplete configuation 3175 * or temporary errors. 3176 */ 3177 if (sc->sc_ka_timeo) 3178 timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo); 3179 3180 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 3181 sc->sc_ka_state == GRE_KA_NONE || 3182 sc->sc_tunnel.t_af == AF_UNSPEC || 3183 sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain) 3184 return; 3185 3186 /* this is really conservative */ 3187 #ifdef INET6 3188 linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) + 3189 sizeof(struct gre_header) + sizeof(struct gre_h_key); 3190 #else 3191 linkhdr = max_linkhdr + sizeof(struct ip) + 3192 sizeof(struct gre_header) + sizeof(struct gre_h_key); 3193 #endif 3194 len = linkhdr + sizeof(*gk); 3195 3196 MGETHDR(m, M_DONTWAIT, MT_DATA); 3197 if (m == NULL) 3198 return; 3199 3200 if (len > MHLEN) { 3201 MCLGETI(m, M_DONTWAIT, NULL, len); 3202 if (!ISSET(m->m_flags, M_EXT)) { 3203 m_freem(m); 3204 return; 3205 } 3206 } 3207 3208 m->m_pkthdr.len = m->m_len = len; 3209 m_adj(m, linkhdr); 3210 3211 /* 3212 * build the inside packet 3213 */ 3214 gk = mtod(m, struct gre_keepalive *); 3215 htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks); 3216 htobem32(&gk->gk_random, arc4random()); 3217 3218 SipHash24_Init(&ctx, &sc->sc_ka_key); 3219 SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime)); 3220 SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random)); 3221 SipHash24_Final(gk->gk_digest, &ctx); 3222 3223 ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl; 3224 3225 m->m_pkthdr.pf.prio = sc->sc_if.if_llprio; 3226 tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio)); 3227 3228 t.t_af = sc->sc_tunnel.t_af; 3229 t.t_df = sc->sc_tunnel.t_df; 3230 t.t_src = sc->sc_tunnel.t_dst; 3231 t.t_dst = sc->sc_tunnel.t_src; 3232 t.t_key = sc->sc_tunnel.t_key; 3233 t.t_key_mask = sc->sc_tunnel.t_key_mask; 3234 3235 m = gre_encap(&t, m, htons(0), ttl, tos); 3236 if (m == NULL) 3237 return; 3238 3239 switch (sc->sc_tunnel.t_af) { 3240 case AF_INET: { 3241 struct ip *ip; 3242 3243 ip = mtod(m, struct ip *); 3244 ip->ip_id = htons(ip_randomid()); 3245 ip->ip_sum = 0; 3246 ip->ip_sum = in_cksum(m, sizeof(*ip)); 3247 3248 proto = htons(ETHERTYPE_IP); 3249 break; 3250 } 3251 #ifdef INET6 3252 case AF_INET6: 3253 proto = htons(ETHERTYPE_IPV6); 3254 break; 3255 #endif 3256 default: 3257 m_freem(m); 3258 return; 3259 } 3260 3261 /* 3262 * put it in the tunnel 3263 */ 3264 m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos); 3265 if (m == NULL) 3266 return; 3267 3268 gre_ip_output(&sc->sc_tunnel, m); 3269 } 3270 3271 static void 3272 gre_keepalive_hold(void *arg) 3273 { 3274 struct gre_softc *sc = arg; 3275 struct ifnet *ifp = &sc->sc_if; 3276 3277 if (!ISSET(ifp->if_flags, IFF_RUNNING) || 3278 sc->sc_ka_state == GRE_KA_NONE) 3279 return; 3280 3281 NET_LOCK(); 3282 sc->sc_ka_state = GRE_KA_DOWN; 3283 gre_link_state(ifp, sc->sc_ka_state); 3284 NET_UNLOCK(); 3285 } 3286 3287 static int 3288 gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast) 3289 { 3290 struct sockaddr *src = (struct sockaddr *)&req->addr; 3291 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; 3292 struct sockaddr_in *src4, *dst4; 3293 #ifdef INET6 3294 struct sockaddr_in6 *src6, *dst6; 3295 int error; 3296 #endif 3297 3298 /* sa_family and sa_len must be equal */ 3299 if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) 3300 return (EINVAL); 3301 3302 /* validate */ 3303 switch (dst->sa_family) { 3304 case AF_INET: 3305 if (dst->sa_len != sizeof(*dst4)) 3306 return (EINVAL); 3307 3308 src4 = (struct sockaddr_in *)src; 3309 if (in_nullhost(src4->sin_addr) || 3310 IN_MULTICAST(src4->sin_addr.s_addr)) 3311 return (EINVAL); 3312 3313 dst4 = (struct sockaddr_in *)dst; 3314 if (in_nullhost(dst4->sin_addr) || 3315 (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast)) 3316 return (EINVAL); 3317 3318 tunnel->t_src4 = src4->sin_addr; 3319 tunnel->t_dst4 = dst4->sin_addr; 3320 3321 break; 3322 #ifdef INET6 3323 case AF_INET6: 3324 if (dst->sa_len != sizeof(*dst6)) 3325 return (EINVAL); 3326 3327 src6 = (struct sockaddr_in6 *)src; 3328 if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || 3329 IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) 3330 return (EINVAL); 3331 3332 dst6 = (struct sockaddr_in6 *)dst; 3333 if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) || 3334 IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast) 3335 return (EINVAL); 3336 3337 if (src6->sin6_scope_id != dst6->sin6_scope_id) 3338 return (EINVAL); 3339 3340 error = in6_embedscope(&tunnel->t_src6, src6, NULL); 3341 if (error != 0) 3342 return (error); 3343 3344 error = in6_embedscope(&tunnel->t_dst6, dst6, NULL); 3345 if (error != 0) 3346 return (error); 3347 3348 break; 3349 #endif 3350 default: 3351 return (EAFNOSUPPORT); 3352 } 3353 3354 /* commit */ 3355 tunnel->t_af = dst->sa_family; 3356 3357 return (0); 3358 } 3359 3360 static int 3361 gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req) 3362 { 3363 struct sockaddr *src = (struct sockaddr *)&req->addr; 3364 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; 3365 struct sockaddr_in *sin; 3366 #ifdef INET6 /* ifconfig already embeds the scopeid */ 3367 struct sockaddr_in6 *sin6; 3368 #endif 3369 3370 switch (tunnel->t_af) { 3371 case AF_UNSPEC: 3372 return (EADDRNOTAVAIL); 3373 case AF_INET: 3374 sin = (struct sockaddr_in *)src; 3375 memset(sin, 0, sizeof(*sin)); 3376 sin->sin_family = AF_INET; 3377 sin->sin_len = sizeof(*sin); 3378 sin->sin_addr = tunnel->t_src4; 3379 3380 sin = (struct sockaddr_in *)dst; 3381 memset(sin, 0, sizeof(*sin)); 3382 sin->sin_family = AF_INET; 3383 sin->sin_len = sizeof(*sin); 3384 sin->sin_addr = tunnel->t_dst4; 3385 3386 break; 3387 3388 #ifdef INET6 3389 case AF_INET6: 3390 sin6 = (struct sockaddr_in6 *)src; 3391 memset(sin6, 0, sizeof(*sin6)); 3392 sin6->sin6_family = AF_INET6; 3393 sin6->sin6_len = sizeof(*sin6); 3394 in6_recoverscope(sin6, &tunnel->t_src6); 3395 3396 sin6 = (struct sockaddr_in6 *)dst; 3397 memset(sin6, 0, sizeof(*sin6)); 3398 sin6->sin6_family = AF_INET6; 3399 sin6->sin6_len = sizeof(*sin6); 3400 in6_recoverscope(sin6, &tunnel->t_dst6); 3401 3402 break; 3403 #endif 3404 default: 3405 return (EAFNOSUPPORT); 3406 } 3407 3408 return (0); 3409 } 3410 3411 static int 3412 gre_del_tunnel(struct gre_tunnel *tunnel) 3413 { 3414 /* commit */ 3415 tunnel->t_af = AF_UNSPEC; 3416 3417 return (0); 3418 } 3419 3420 static int 3421 gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr) 3422 { 3423 uint32_t key; 3424 uint32_t min = GRE_KEY_MIN; 3425 uint32_t max = GRE_KEY_MAX; 3426 unsigned int shift = GRE_KEY_SHIFT; 3427 uint32_t mask = GRE_KEY_MASK; 3428 3429 if (tunnel->t_key_mask == GRE_KEY_ENTROPY) { 3430 min = GRE_KEY_ENTROPY_MIN; 3431 max = GRE_KEY_ENTROPY_MAX; 3432 shift = GRE_KEY_ENTROPY_SHIFT; 3433 mask = GRE_KEY_ENTROPY; 3434 } 3435 3436 if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max) 3437 return (EINVAL); 3438 3439 key = htonl(ifr->ifr_vnetid << shift); 3440 3441 /* commit */ 3442 tunnel->t_key_mask = mask; 3443 tunnel->t_key = key; 3444 3445 return (0); 3446 } 3447 3448 static int 3449 gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr) 3450 { 3451 int shift; 3452 3453 switch (tunnel->t_key_mask) { 3454 case GRE_KEY_NONE: 3455 return (EADDRNOTAVAIL); 3456 case GRE_KEY_ENTROPY: 3457 shift = GRE_KEY_ENTROPY_SHIFT; 3458 break; 3459 case GRE_KEY_MASK: 3460 shift = GRE_KEY_SHIFT; 3461 break; 3462 } 3463 3464 ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift; 3465 3466 return (0); 3467 } 3468 3469 static int 3470 gre_del_vnetid(struct gre_tunnel *tunnel) 3471 { 3472 tunnel->t_key_mask = GRE_KEY_NONE; 3473 3474 return (0); 3475 } 3476 3477 static int 3478 gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr) 3479 { 3480 uint32_t mask, key; 3481 3482 if (tunnel->t_key_mask == GRE_KEY_NONE) 3483 return (EADDRNOTAVAIL); 3484 3485 mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK; 3486 if (tunnel->t_key_mask == mask) { 3487 /* nop */ 3488 return (0); 3489 } 3490 3491 key = ntohl(tunnel->t_key); 3492 if (mask == GRE_KEY_ENTROPY) { 3493 if (key > GRE_KEY_ENTROPY_MAX) 3494 return (ERANGE); 3495 3496 key = htonl(key << GRE_KEY_ENTROPY_SHIFT); 3497 } else 3498 key = htonl(key >> GRE_KEY_ENTROPY_SHIFT); 3499 3500 /* commit */ 3501 tunnel->t_key_mask = mask; 3502 tunnel->t_key = key; 3503 3504 return (0); 3505 } 3506 3507 static int 3508 gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr) 3509 { 3510 if (tunnel->t_key_mask == GRE_KEY_NONE) 3511 return (EADDRNOTAVAIL); 3512 3513 ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY; 3514 3515 return (0); 3516 } 3517 3518 static int 3519 mgre_up(struct mgre_softc *sc) 3520 { 3521 unsigned int hlen; 3522 3523 switch (sc->sc_tunnel.t_af) { 3524 case AF_UNSPEC: 3525 return (EDESTADDRREQ); 3526 case AF_INET: 3527 hlen = sizeof(struct ip); 3528 break; 3529 #ifdef INET6 3530 case AF_INET6: 3531 hlen = sizeof(struct ip6_hdr); 3532 break; 3533 #endif /* INET6 */ 3534 default: 3535 unhandled_af(sc->sc_tunnel.t_af); 3536 } 3537 3538 hlen += sizeof(struct gre_header); 3539 if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE) 3540 hlen += sizeof(struct gre_h_key); 3541 3542 NET_ASSERT_LOCKED(); 3543 3544 if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL) 3545 return (EADDRINUSE); 3546 3547 sc->sc_if.if_hdrlen = hlen; 3548 SET(sc->sc_if.if_flags, IFF_RUNNING); 3549 3550 return (0); 3551 } 3552 3553 static int 3554 mgre_down(struct mgre_softc *sc) 3555 { 3556 NET_ASSERT_LOCKED(); 3557 3558 CLR(sc->sc_if.if_flags, IFF_RUNNING); 3559 sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */ 3560 3561 RBT_REMOVE(mgre_tree, &mgre_tree, sc); 3562 3563 /* barrier? */ 3564 3565 return (0); 3566 } 3567 3568 static int 3569 egre_up(struct egre_softc *sc) 3570 { 3571 if (sc->sc_tunnel.t_af == AF_UNSPEC) 3572 return (EDESTADDRREQ); 3573 3574 NET_ASSERT_LOCKED(); 3575 3576 if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL) 3577 return (EADDRINUSE); 3578 3579 SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); 3580 3581 return (0); 3582 } 3583 3584 static int 3585 egre_down(struct egre_softc *sc) 3586 { 3587 NET_ASSERT_LOCKED(); 3588 3589 CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); 3590 3591 RBT_REMOVE(egre_tree, &egre_tree, sc); 3592 3593 /* barrier? */ 3594 3595 return (0); 3596 } 3597 3598 static int 3599 egre_media_change(struct ifnet *ifp) 3600 { 3601 return (ENOTTY); 3602 } 3603 3604 static void 3605 egre_media_status(struct ifnet *ifp, struct ifmediareq *imr) 3606 { 3607 imr->ifm_active = IFM_ETHER | IFM_AUTO; 3608 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 3609 } 3610 3611 static int 3612 nvgre_up(struct nvgre_softc *sc) 3613 { 3614 struct gre_tunnel *tunnel = &sc->sc_tunnel; 3615 struct ifnet *ifp0; 3616 void *inm; 3617 int error; 3618 3619 if (tunnel->t_af == AF_UNSPEC) 3620 return (EDESTADDRREQ); 3621 3622 ifp0 = if_get(sc->sc_ifp0); 3623 if (ifp0 == NULL) 3624 return (ENXIO); 3625 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 3626 error = ENODEV; 3627 goto put; 3628 } 3629 3630 NET_ASSERT_LOCKED(); 3631 3632 if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) { 3633 error = EADDRINUSE; 3634 goto put; 3635 } 3636 if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) { 3637 error = EADDRINUSE; 3638 goto remove_mcast; 3639 } 3640 3641 switch (tunnel->t_af) { 3642 case AF_INET: 3643 inm = in_addmulti(&tunnel->t_dst4, ifp0); 3644 if (inm == NULL) { 3645 error = ECONNABORTED; 3646 goto remove_ucast; 3647 } 3648 break; 3649 #ifdef INET6 3650 case AF_INET6: 3651 inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error); 3652 if (inm == NULL) { 3653 /* error is already set */ 3654 goto remove_ucast; 3655 } 3656 break; 3657 #endif /* INET6 */ 3658 default: 3659 unhandled_af(tunnel->t_af); 3660 } 3661 3662 if_linkstatehook_add(ifp0, &sc->sc_ltask); 3663 if_detachhook_add(ifp0, &sc->sc_dtask); 3664 3665 if_put(ifp0); 3666 3667 sc->sc_inm = inm; 3668 SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); 3669 3670 timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO); 3671 3672 return (0); 3673 3674 remove_ucast: 3675 RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc); 3676 remove_mcast: 3677 RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc); 3678 put: 3679 if_put(ifp0); 3680 return (error); 3681 } 3682 3683 static int 3684 nvgre_down(struct nvgre_softc *sc) 3685 { 3686 struct gre_tunnel *tunnel = &sc->sc_tunnel; 3687 struct ifnet *ifp = &sc->sc_ac.ac_if; 3688 struct taskq *softnet = net_tq(ifp->if_index); 3689 struct ifnet *ifp0; 3690 3691 NET_ASSERT_LOCKED(); 3692 3693 CLR(ifp->if_flags, IFF_RUNNING); 3694 3695 NET_UNLOCK(); 3696 timeout_del_barrier(&sc->sc_ether_age); 3697 ifq_barrier(&ifp->if_snd); 3698 if (!task_del(softnet, &sc->sc_send_task)) 3699 taskq_barrier(softnet); 3700 NET_LOCK(); 3701 3702 mq_purge(&sc->sc_send_list); 3703 3704 ifp0 = if_get(sc->sc_ifp0); 3705 if (ifp0 != NULL) { 3706 if_detachhook_del(ifp0, &sc->sc_dtask); 3707 if_linkstatehook_del(ifp0, &sc->sc_ltask); 3708 } 3709 if_put(ifp0); 3710 3711 switch (tunnel->t_af) { 3712 case AF_INET: 3713 in_delmulti(sc->sc_inm); 3714 break; 3715 3716 #ifdef INET6 3717 case AF_INET6: 3718 in6_delmulti(sc->sc_inm); 3719 break; 3720 #endif 3721 default: 3722 unhandled_af(tunnel->t_af); 3723 } 3724 3725 RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc); 3726 RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc); 3727 3728 return (0); 3729 } 3730 3731 static void 3732 nvgre_link_change(void *arg) 3733 { 3734 /* nop */ 3735 } 3736 3737 static void 3738 nvgre_detach(void *arg) 3739 { 3740 struct nvgre_softc *sc = arg; 3741 struct ifnet *ifp = &sc->sc_ac.ac_if; 3742 3743 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 3744 nvgre_down(sc); 3745 if_down(ifp); 3746 } 3747 3748 sc->sc_ifp0 = 0; 3749 } 3750 3751 static int 3752 nvgre_set_parent(struct nvgre_softc *sc, const char *parent) 3753 { 3754 struct ifnet *ifp0; 3755 3756 ifp0 = ifunit(parent); /* doesn't need an if_put */ 3757 if (ifp0 == NULL) 3758 return (EINVAL); 3759 3760 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) 3761 return (EPROTONOSUPPORT); 3762 3763 /* commit */ 3764 sc->sc_ifp0 = ifp0->if_index; 3765 3766 return (0); 3767 } 3768 3769 static void 3770 nvgre_age(void *arg) 3771 { 3772 struct nvgre_softc *sc = arg; 3773 struct nvgre_entry *nv, *nnv; 3774 int tmo = sc->sc_ether_tmo * 2; 3775 int diff; 3776 3777 if (!ISSET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING)) 3778 return; 3779 3780 rw_enter_write(&sc->sc_ether_lock); /* XXX */ 3781 RBT_FOREACH_SAFE(nv, nvgre_map, &sc->sc_ether_map, nnv) { 3782 if (nv->nv_type != NVGRE_ENTRY_DYNAMIC) 3783 continue; 3784 3785 diff = ticks - nv->nv_age; 3786 if (diff < tmo) 3787 continue; 3788 3789 sc->sc_ether_num--; 3790 RBT_REMOVE(nvgre_map, &sc->sc_ether_map, nv); 3791 if (refcnt_rele(&nv->nv_refs)) 3792 pool_put(&nvgre_pool, nv); 3793 } 3794 rw_exit_write(&sc->sc_ether_lock); 3795 3796 timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO); 3797 } 3798 3799 static inline int 3800 nvgre_entry_valid(struct nvgre_softc *sc, const struct nvgre_entry *nv) 3801 { 3802 int diff; 3803 3804 if (nv == NULL) 3805 return (0); 3806 3807 if (nv->nv_type == NVGRE_ENTRY_STATIC) 3808 return (1); 3809 3810 diff = ticks - nv->nv_age; 3811 if (diff < sc->sc_ether_tmo) 3812 return (1); 3813 3814 return (0); 3815 } 3816 3817 static void 3818 nvgre_start(struct ifnet *ifp) 3819 { 3820 struct nvgre_softc *sc = ifp->if_softc; 3821 const struct gre_tunnel *tunnel = &sc->sc_tunnel; 3822 union gre_addr gateway; 3823 struct nvgre_entry *nv, key; 3824 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 3825 struct ether_header *eh; 3826 struct mbuf *m, *m0; 3827 #if NBPFILTER > 0 3828 caddr_t if_bpf; 3829 #endif 3830 3831 if (!gre_allow) { 3832 ifq_purge(&ifp->if_snd); 3833 return; 3834 } 3835 3836 while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) { 3837 #if NBPFILTER > 0 3838 if_bpf = ifp->if_bpf; 3839 if (if_bpf) 3840 bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT); 3841 #endif 3842 3843 eh = mtod(m0, struct ether_header *); 3844 if (ETHER_IS_BROADCAST(eh->ether_dhost)) 3845 gateway = tunnel->t_dst; 3846 else { 3847 memcpy(&key.nv_dst, eh->ether_dhost, 3848 sizeof(key.nv_dst)); 3849 3850 rw_enter_read(&sc->sc_ether_lock); 3851 nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &key); 3852 if (nvgre_entry_valid(sc, nv)) 3853 gateway = nv->nv_gateway; 3854 else { 3855 /* "flood" to unknown hosts */ 3856 gateway = tunnel->t_dst; 3857 } 3858 rw_exit_read(&sc->sc_ether_lock); 3859 } 3860 3861 /* force prepend mbuf because of alignment problems */ 3862 m = m_get(M_DONTWAIT, m0->m_type); 3863 if (m == NULL) { 3864 m_freem(m0); 3865 continue; 3866 } 3867 3868 M_MOVE_PKTHDR(m, m0); 3869 m->m_next = m0; 3870 3871 m_align(m, 0); 3872 m->m_len = 0; 3873 3874 m = gre_encap_dst(tunnel, &gateway, m, 3875 htons(ETHERTYPE_TRANSETHER), 3876 tunnel->t_ttl, gre_l2_tos(tunnel, m)); 3877 if (m == NULL) 3878 continue; 3879 3880 m->m_flags &= ~(M_BCAST|M_MCAST); 3881 m->m_pkthdr.ph_rtableid = tunnel->t_rtableid; 3882 3883 #if NPF > 0 3884 pf_pkt_addr_changed(m); 3885 #endif 3886 3887 ml_enqueue(&ml, m); 3888 } 3889 3890 if (!ml_empty(&ml)) { 3891 if (mq_enlist(&sc->sc_send_list, &ml) == 0) 3892 task_add(net_tq(ifp->if_index), &sc->sc_send_task); 3893 /* else set OACTIVE? */ 3894 } 3895 } 3896 3897 static uint64_t 3898 nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml) 3899 { 3900 struct ip_moptions imo; 3901 struct mbuf *m; 3902 uint64_t oerrors = 0; 3903 3904 imo.imo_ifidx = sc->sc_ifp0; 3905 imo.imo_ttl = sc->sc_tunnel.t_ttl; 3906 imo.imo_loop = 0; 3907 3908 NET_LOCK(); 3909 while ((m = ml_dequeue(ml)) != NULL) { 3910 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0) 3911 oerrors++; 3912 } 3913 NET_UNLOCK(); 3914 3915 return (oerrors); 3916 } 3917 3918 #ifdef INET6 3919 static uint64_t 3920 nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml) 3921 { 3922 struct ip6_moptions im6o; 3923 struct mbuf *m; 3924 uint64_t oerrors = 0; 3925 3926 im6o.im6o_ifidx = sc->sc_ifp0; 3927 im6o.im6o_hlim = sc->sc_tunnel.t_ttl; 3928 im6o.im6o_loop = 0; 3929 3930 NET_LOCK(); 3931 while ((m = ml_dequeue(ml)) != NULL) { 3932 if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0) 3933 oerrors++; 3934 } 3935 NET_UNLOCK(); 3936 3937 return (oerrors); 3938 } 3939 #endif /* INET6 */ 3940 3941 static void 3942 nvgre_send(void *arg) 3943 { 3944 struct nvgre_softc *sc = arg; 3945 struct ifnet *ifp = &sc->sc_ac.ac_if; 3946 sa_family_t af = sc->sc_tunnel.t_af; 3947 struct mbuf_list ml; 3948 uint64_t oerrors; 3949 3950 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 3951 return; 3952 3953 mq_delist(&sc->sc_send_list, &ml); 3954 if (ml_empty(&ml)) 3955 return; 3956 3957 switch (af) { 3958 case AF_INET: 3959 oerrors = nvgre_send4(sc, &ml); 3960 break; 3961 #ifdef INET6 3962 case AF_INET6: 3963 oerrors = nvgre_send6(sc, &ml); 3964 break; 3965 #endif 3966 default: 3967 unhandled_af(af); 3968 /* NOTREACHED */ 3969 } 3970 3971 ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */ 3972 } 3973 3974 static int 3975 eoip_up(struct eoip_softc *sc) 3976 { 3977 if (sc->sc_tunnel.t_af == AF_UNSPEC) 3978 return (EDESTADDRREQ); 3979 3980 NET_ASSERT_LOCKED(); 3981 3982 if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL) 3983 return (EADDRINUSE); 3984 3985 SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); 3986 3987 if (sc->sc_ka_state != GRE_KA_NONE) { 3988 sc->sc_ka_holdmax = sc->sc_ka_count; 3989 eoip_keepalive_send(sc); 3990 } 3991 3992 return (0); 3993 } 3994 3995 static int 3996 eoip_down(struct eoip_softc *sc) 3997 { 3998 NET_ASSERT_LOCKED(); 3999 CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); 4000 4001 if (sc->sc_ka_state != GRE_KA_NONE) { 4002 timeout_del_barrier(&sc->sc_ka_hold); 4003 timeout_del_barrier(&sc->sc_ka_send); 4004 4005 sc->sc_ka_state = GRE_KA_DOWN; 4006 gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state); 4007 } 4008 4009 RBT_REMOVE(eoip_tree, &eoip_tree, sc); 4010 4011 return (0); 4012 } 4013 4014 static void 4015 eoip_start(struct ifnet *ifp) 4016 { 4017 struct eoip_softc *sc = ifp->if_softc; 4018 struct mbuf *m0, *m; 4019 #if NBPFILTER > 0 4020 caddr_t if_bpf; 4021 #endif 4022 4023 if (!gre_allow) { 4024 ifq_purge(&ifp->if_snd); 4025 return; 4026 } 4027 4028 while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) { 4029 #if NBPFILTER > 0 4030 if_bpf = ifp->if_bpf; 4031 if (if_bpf) 4032 bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT); 4033 #endif 4034 4035 /* force prepend mbuf because of alignment problems */ 4036 m = m_get(M_DONTWAIT, m0->m_type); 4037 if (m == NULL) { 4038 m_freem(m0); 4039 continue; 4040 } 4041 4042 M_MOVE_PKTHDR(m, m0); 4043 m->m_next = m0; 4044 4045 m_align(m, 0); 4046 m->m_len = 0; 4047 4048 m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m)); 4049 if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) { 4050 ifp->if_oerrors++; 4051 continue; 4052 } 4053 } 4054 } 4055 4056 static struct mbuf * 4057 eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos) 4058 { 4059 struct gre_header *gh; 4060 struct gre_h_key_eoip *eoiph; 4061 int len = m->m_pkthdr.len; 4062 4063 m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT); 4064 if (m == NULL) 4065 return (NULL); 4066 4067 gh = mtod(m, struct gre_header *); 4068 gh->gre_flags = htons(GRE_VERS_1 | GRE_KP); 4069 gh->gre_proto = htons(GRE_EOIP); 4070 4071 eoiph = (struct gre_h_key_eoip *)(gh + 1); 4072 htobem16(&eoiph->eoip_len, len); 4073 eoiph->eoip_tunnel_id = sc->sc_tunnel_id; 4074 4075 return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos)); 4076 } 4077 4078 static void 4079 eoip_keepalive_send(void *arg) 4080 { 4081 struct eoip_softc *sc = arg; 4082 struct ifnet *ifp = &sc->sc_ac.ac_if; 4083 struct mbuf *m; 4084 int linkhdr; 4085 4086 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 4087 return; 4088 4089 /* this is really conservative */ 4090 #ifdef INET6 4091 linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) + 4092 sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip); 4093 #else 4094 linkhdr = max_linkhdr + sizeof(struct ip) + 4095 sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip); 4096 #endif 4097 MGETHDR(m, M_DONTWAIT, MT_DATA); 4098 if (m == NULL) 4099 return; 4100 4101 if (linkhdr > MHLEN) { 4102 MCLGETI(m, M_DONTWAIT, NULL, linkhdr); 4103 if (!ISSET(m->m_flags, M_EXT)) { 4104 m_freem(m); 4105 return; 4106 } 4107 } 4108 4109 m->m_pkthdr.pf.prio = ifp->if_llprio; 4110 m->m_pkthdr.len = m->m_len = linkhdr; 4111 m_adj(m, linkhdr); 4112 4113 m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m)); 4114 if (m == NULL) 4115 return; 4116 4117 gre_ip_output(&sc->sc_tunnel, m); 4118 4119 timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo); 4120 } 4121 4122 static void 4123 eoip_keepalive_hold(void *arg) 4124 { 4125 struct eoip_softc *sc = arg; 4126 struct ifnet *ifp = &sc->sc_ac.ac_if; 4127 4128 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 4129 return; 4130 4131 NET_LOCK(); 4132 sc->sc_ka_state = GRE_KA_DOWN; 4133 gre_link_state(ifp, sc->sc_ka_state); 4134 NET_UNLOCK(); 4135 } 4136 4137 static void 4138 eoip_keepalive_recv(struct eoip_softc *sc) 4139 { 4140 switch (sc->sc_ka_state) { 4141 case GRE_KA_NONE: 4142 return; 4143 case GRE_KA_DOWN: 4144 sc->sc_ka_state = GRE_KA_HOLD; 4145 sc->sc_ka_holdcnt = sc->sc_ka_holdmax; 4146 sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2, 4147 16 * sc->sc_ka_count); 4148 break; 4149 case GRE_KA_HOLD: 4150 if (--sc->sc_ka_holdcnt > 0) 4151 break; 4152 4153 sc->sc_ka_state = GRE_KA_UP; 4154 gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state); 4155 break; 4156 4157 case GRE_KA_UP: 4158 sc->sc_ka_holdmax--; 4159 sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count); 4160 break; 4161 } 4162 4163 timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count); 4164 } 4165 4166 static struct mbuf * 4167 eoip_input(struct gre_tunnel *key, struct mbuf *m, 4168 const struct gre_header *gh, uint8_t otos, int iphlen) 4169 { 4170 struct eoip_softc *sc; 4171 struct gre_h_key_eoip *eoiph; 4172 int hlen, len; 4173 caddr_t buf; 4174 4175 if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1)) 4176 goto decline; 4177 4178 hlen = iphlen + sizeof(*gh) + sizeof(*eoiph); 4179 if (m->m_pkthdr.len < hlen) 4180 goto decline; 4181 4182 m = m_pullup(m, hlen); 4183 if (m == NULL) 4184 return (NULL); 4185 4186 buf = mtod(m, caddr_t); 4187 gh = (struct gre_header *)(buf + iphlen); 4188 eoiph = (struct gre_h_key_eoip *)(gh + 1); 4189 4190 key->t_key = eoiph->eoip_tunnel_id; 4191 4192 NET_ASSERT_LOCKED(); 4193 sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key); 4194 if (sc == NULL) 4195 goto decline; 4196 4197 /* it's ours now */ 4198 len = bemtoh16(&eoiph->eoip_len); 4199 if (len == 0) { 4200 eoip_keepalive_recv(sc); 4201 goto drop; 4202 } 4203 4204 m = gre_ether_align(m, hlen); 4205 if (m == NULL) 4206 return (NULL); 4207 4208 if (m->m_pkthdr.len < len) 4209 goto drop; 4210 if (m->m_pkthdr.len != len) 4211 m_adj(m, len - m->m_pkthdr.len); 4212 4213 gre_l2_prio(&sc->sc_tunnel, m, otos); 4214 4215 m->m_flags &= ~(M_MCAST|M_BCAST); 4216 4217 #if NPF > 0 4218 pf_pkt_addr_changed(m); 4219 #endif 4220 4221 if_vinput(&sc->sc_ac.ac_if, m); 4222 4223 return (NULL); 4224 4225 decline: 4226 return (m); 4227 drop: 4228 m_freem(m); 4229 return (NULL); 4230 } 4231 4232 int 4233 gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 4234 size_t newlen) 4235 { 4236 int error; 4237 4238 /* All sysctl names at this level are terminal. */ 4239 if (namelen != 1) 4240 return (ENOTDIR); 4241 4242 switch (name[0]) { 4243 case GRECTL_ALLOW: 4244 NET_LOCK(); 4245 error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow); 4246 NET_UNLOCK(); 4247 return (error); 4248 case GRECTL_WCCP: 4249 NET_LOCK(); 4250 error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp); 4251 NET_UNLOCK(); 4252 return (error); 4253 default: 4254 return (ENOPROTOOPT); 4255 } 4256 /* NOTREACHED */ 4257 } 4258 4259 static inline int 4260 gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b) 4261 { 4262 switch (af) { 4263 #ifdef INET6 4264 case AF_INET6: 4265 return (memcmp(&a->in6, &b->in6, sizeof(a->in6))); 4266 #endif /* INET6 */ 4267 case AF_INET: 4268 return (memcmp(&a->in4, &b->in4, sizeof(a->in4))); 4269 default: 4270 unhandled_af(af); 4271 } 4272 4273 return (0); 4274 } 4275 4276 static int 4277 gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b) 4278 { 4279 uint32_t ka, kb; 4280 uint32_t mask; 4281 int rv; 4282 4283 /* is K set at all? */ 4284 ka = a->t_key_mask & GRE_KEY_ENTROPY; 4285 kb = b->t_key_mask & GRE_KEY_ENTROPY; 4286 4287 /* sort by whether K is set */ 4288 if (ka > kb) 4289 return (1); 4290 if (ka < kb) 4291 return (-1); 4292 4293 /* is K set on both? */ 4294 if (ka != GRE_KEY_NONE) { 4295 /* get common prefix */ 4296 mask = a->t_key_mask & b->t_key_mask; 4297 4298 ka = a->t_key & mask; 4299 kb = b->t_key & mask; 4300 4301 /* sort by common prefix */ 4302 if (ka > kb) 4303 return (1); 4304 if (ka < kb) 4305 return (-1); 4306 } 4307 4308 /* sort by routing table */ 4309 if (a->t_rtableid > b->t_rtableid) 4310 return (1); 4311 if (a->t_rtableid < b->t_rtableid) 4312 return (-1); 4313 4314 /* sort by address */ 4315 if (a->t_af > b->t_af) 4316 return (1); 4317 if (a->t_af < b->t_af) 4318 return (-1); 4319 4320 rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src); 4321 if (rv != 0) 4322 return (rv); 4323 4324 return (0); 4325 } 4326 4327 static int 4328 gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b) 4329 { 4330 int rv; 4331 4332 rv = gre_cmp_src(a, b); 4333 if (rv != 0) 4334 return (rv); 4335 4336 return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst)); 4337 } 4338 4339 static inline int 4340 mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b) 4341 { 4342 return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel)); 4343 } 4344 4345 RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp); 4346 4347 static inline int 4348 egre_cmp(const struct egre_softc *a, const struct egre_softc *b) 4349 { 4350 return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel)); 4351 } 4352 4353 RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp); 4354 4355 static inline int 4356 nvgre_entry_cmp(const struct nvgre_entry *a, const struct nvgre_entry *b) 4357 { 4358 return (memcmp(&a->nv_dst, &b->nv_dst, sizeof(a->nv_dst))); 4359 } 4360 4361 RBT_GENERATE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp); 4362 4363 static int 4364 nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b) 4365 { 4366 uint32_t ka, kb; 4367 4368 ka = a->t_key & GRE_KEY_ENTROPY; 4369 kb = b->t_key & GRE_KEY_ENTROPY; 4370 4371 /* sort by common prefix */ 4372 if (ka > kb) 4373 return (1); 4374 if (ka < kb) 4375 return (-1); 4376 4377 /* sort by routing table */ 4378 if (a->t_rtableid > b->t_rtableid) 4379 return (1); 4380 if (a->t_rtableid < b->t_rtableid) 4381 return (-1); 4382 4383 /* sort by address */ 4384 if (a->t_af > b->t_af) 4385 return (1); 4386 if (a->t_af < b->t_af) 4387 return (-1); 4388 4389 return (0); 4390 } 4391 4392 static inline int 4393 nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb) 4394 { 4395 const struct gre_tunnel *a = &na->sc_tunnel; 4396 const struct gre_tunnel *b = &nb->sc_tunnel; 4397 int rv; 4398 4399 rv = nvgre_cmp_tunnel(a, b); 4400 if (rv != 0) 4401 return (rv); 4402 4403 rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src); 4404 if (rv != 0) 4405 return (rv); 4406 4407 return (0); 4408 } 4409 4410 static int 4411 nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa, 4412 unsigned int if0idxa, const struct gre_tunnel *b, 4413 const union gre_addr *ab,unsigned int if0idxb) 4414 { 4415 int rv; 4416 4417 rv = nvgre_cmp_tunnel(a, b); 4418 if (rv != 0) 4419 return (rv); 4420 4421 rv = gre_ip_cmp(a->t_af, aa, ab); 4422 if (rv != 0) 4423 return (rv); 4424 4425 if (if0idxa > if0idxb) 4426 return (1); 4427 if (if0idxa < if0idxb) 4428 return (-1); 4429 4430 return (0); 4431 } 4432 4433 static inline int 4434 nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb) 4435 { 4436 const struct gre_tunnel *a = &na->sc_tunnel; 4437 const struct gre_tunnel *b = &nb->sc_tunnel; 4438 4439 return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0, 4440 b, &b->t_dst, nb->sc_ifp0)); 4441 } 4442 4443 RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast); 4444 RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc); 4445 4446 static inline int 4447 eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb) 4448 { 4449 const struct gre_tunnel *a = &ea->sc_tunnel; 4450 const struct gre_tunnel *b = &eb->sc_tunnel; 4451 int rv; 4452 4453 if (a->t_key > b->t_key) 4454 return (1); 4455 if (a->t_key < b->t_key) 4456 return (-1); 4457 4458 /* sort by routing table */ 4459 if (a->t_rtableid > b->t_rtableid) 4460 return (1); 4461 if (a->t_rtableid < b->t_rtableid) 4462 return (-1); 4463 4464 /* sort by address */ 4465 if (a->t_af > b->t_af) 4466 return (1); 4467 if (a->t_af < b->t_af) 4468 return (-1); 4469 4470 rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src); 4471 if (rv != 0) 4472 return (rv); 4473 4474 rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst); 4475 if (rv != 0) 4476 return (rv); 4477 4478 return (0); 4479 } 4480 4481 RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp); 4482