1 /* $OpenBSD: if_gre.c,v 1.150 2019/04/23 11:48:55 dlg Exp $ */ 2 /* $NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */ 3 4 /* 5 * Copyright (c) 1998 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Heiko W.Rupp <hwr@pilhuhn.de> 10 * 11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de> 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Encapsulate L3 protocols into IP, per RFC 1701 and 1702. 37 * See gre(4) for more details. 38 * Also supported: IP in IP encapsulation (proto 55) per RFC 2004. 39 */ 40 41 #include "bpfilter.h" 42 #include "pf.h" 43 44 #include <sys/param.h> 45 #include <sys/mbuf.h> 46 #include <sys/socket.h> 47 #include <sys/sockio.h> 48 #include <sys/kernel.h> 49 #include <sys/systm.h> 50 #include <sys/errno.h> 51 #include <sys/timeout.h> 52 #include <sys/queue.h> 53 #include <sys/tree.h> 54 #include <sys/pool.h> 55 #include <sys/rwlock.h> 56 57 #include <crypto/siphash.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/if_media.h> 63 #include <net/route.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_var.h> 67 #include <netinet/if_ether.h> 68 #include <netinet/ip.h> 69 #include <netinet/ip_var.h> 70 #include <netinet/ip_ecn.h> 71 72 #ifdef INET6 73 #include <netinet/ip6.h> 74 #include <netinet6/ip6_var.h> 75 #include <netinet6/in6_var.h> 76 #endif 77 78 #ifdef PIPEX 79 #include <net/pipex.h> 80 #endif 81 82 #ifdef MPLS 83 #include <netmpls/mpls.h> 84 #endif /* MPLS */ 85 86 #if NBPFILTER > 0 87 #include <net/bpf.h> 88 #endif 89 90 #if NPF > 0 91 #include <net/pfvar.h> 92 #endif 93 94 #include <net/if_gre.h> 95 96 #include <netinet/ip_gre.h> 97 #include <sys/sysctl.h> 98 99 /* for nvgre bridge shizz */ 100 #include <sys/socket.h> 101 #include <net/if_bridge.h> 102 103 /* 104 * packet formats 105 */ 106 struct gre_header { 107 uint16_t gre_flags; 108 #define GRE_CP 0x8000 /* Checksum Present */ 109 #define GRE_KP 0x2000 /* Key Present */ 110 #define GRE_SP 0x1000 /* Sequence Present */ 111 112 #define GRE_VERS_MASK 0x0007 113 #define GRE_VERS_0 0x0000 114 #define GRE_VERS_1 0x0001 115 116 uint16_t gre_proto; 117 } __packed __aligned(4); 118 119 struct gre_h_cksum { 120 uint16_t gre_cksum; 121 uint16_t gre_reserved1; 122 } __packed __aligned(4); 123 124 struct gre_h_key { 125 uint32_t gre_key; 126 } __packed __aligned(4); 127 128 #define GRE_EOIP 0x6400 129 130 struct gre_h_key_eoip { 131 uint16_t eoip_len; /* network order */ 132 uint16_t eoip_tunnel_id; /* little endian */ 133 } __packed __aligned(4); 134 135 #define NVGRE_VSID_RES_MIN 0x000000 /* reserved for future use */ 136 #define NVGRE_VSID_RES_MAX 0x000fff 137 #define NVGRE_VSID_NVE2NVE 0xffffff /* vendor specific NVE-to-NVE comms */ 138 139 struct gre_h_seq { 140 uint32_t gre_seq; 141 } __packed __aligned(4); 142 143 struct gre_h_wccp { 144 uint8_t wccp_flags; 145 uint8_t service_id; 146 uint8_t alt_bucket; 147 uint8_t pri_bucket; 148 } __packed __aligned(4); 149 150 #define GRE_WCCP 0x883e 151 152 #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header)) 153 154 /* 155 * GRE tunnel metadata 156 */ 157 158 #define GRE_KA_NONE 0 159 #define GRE_KA_DOWN 1 160 #define GRE_KA_HOLD 2 161 #define GRE_KA_UP 3 162 163 union gre_addr { 164 struct in_addr in4; 165 struct in6_addr in6; 166 }; 167 168 static inline int 169 gre_ip_cmp(int, const union gre_addr *, 170 const union gre_addr *); 171 172 #define GRE_KEY_MIN 0x00000000U 173 #define GRE_KEY_MAX 0xffffffffU 174 #define GRE_KEY_SHIFT 0 175 176 #define GRE_KEY_ENTROPY_MIN 0x00000000U 177 #define GRE_KEY_ENTROPY_MAX 0x00ffffffU 178 #define GRE_KEY_ENTROPY_SHIFT 8 179 180 struct gre_tunnel { 181 uint32_t t_key_mask; 182 #define GRE_KEY_NONE htonl(0x00000000U) 183 #define GRE_KEY_ENTROPY htonl(0xffffff00U) 184 #define GRE_KEY_MASK htonl(0xffffffffU) 185 uint32_t t_key; 186 187 u_int t_rtableid; 188 union gre_addr t_src; 189 #define t_src4 t_src.in4 190 #define t_src6 t_src.in6 191 union gre_addr t_dst; 192 #define t_dst4 t_dst.in4 193 #define t_dst6 t_dst.in6 194 int t_ttl; 195 int t_txhprio; 196 int t_rxhprio; 197 int t_ecn; 198 uint16_t t_df; 199 sa_family_t t_af; 200 }; 201 202 static int 203 gre_cmp_src(const struct gre_tunnel *, 204 const struct gre_tunnel *); 205 static int 206 gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *); 207 208 static int gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int); 209 static int gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *); 210 static int gre_del_tunnel(struct gre_tunnel *); 211 212 static int gre_set_vnetid(struct gre_tunnel *, struct ifreq *); 213 static int gre_get_vnetid(struct gre_tunnel *, struct ifreq *); 214 static int gre_del_vnetid(struct gre_tunnel *); 215 216 static int gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *); 217 static int gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *); 218 219 static struct mbuf * 220 gre_encap_dst(const struct gre_tunnel *, const union gre_addr *, 221 struct mbuf *, uint16_t, uint8_t, uint8_t); 222 #define gre_encap(_t, _m, _p, _ttl, _tos) \ 223 gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos)) 224 225 static struct mbuf * 226 gre_encap_dst_ip(const struct gre_tunnel *, 227 const union gre_addr *, struct mbuf *, uint8_t, uint8_t); 228 #define gre_encap_ip(_t, _m, _ttl, _tos) \ 229 gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos)) 230 231 static int 232 gre_ip_output(const struct gre_tunnel *, struct mbuf *); 233 234 static int gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *, 235 u_long, void *); 236 237 static uint8_t gre_l2_tos(const struct gre_tunnel *, const struct mbuf *); 238 static uint8_t gre_l3_tos(const struct gre_tunnel *, 239 const struct mbuf *, uint8_t); 240 241 /* 242 * layer 3 GRE tunnels 243 */ 244 245 struct gre_softc { 246 struct gre_tunnel sc_tunnel; /* must be first */ 247 TAILQ_ENTRY(gre_softc) sc_entry; 248 249 struct ifnet sc_if; 250 251 struct timeout sc_ka_send; 252 struct timeout sc_ka_hold; 253 254 unsigned int sc_ka_state; 255 unsigned int sc_ka_timeo; 256 unsigned int sc_ka_count; 257 258 unsigned int sc_ka_holdmax; 259 unsigned int sc_ka_holdcnt; 260 261 SIPHASH_KEY sc_ka_key; 262 uint32_t sc_ka_bias; 263 int sc_ka_recvtm; 264 }; 265 266 TAILQ_HEAD(gre_list, gre_softc); 267 268 struct gre_keepalive { 269 uint32_t gk_uptime; 270 uint32_t gk_random; 271 uint8_t gk_digest[SIPHASH_DIGEST_LENGTH]; 272 } __packed __aligned(4); 273 274 static int gre_clone_create(struct if_clone *, int); 275 static int gre_clone_destroy(struct ifnet *); 276 277 struct if_clone gre_cloner = 278 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy); 279 280 /* protected by NET_LOCK */ 281 struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list); 282 283 static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *, 284 struct rtentry *); 285 static void gre_start(struct ifnet *); 286 static int gre_ioctl(struct ifnet *, u_long, caddr_t); 287 288 static int gre_up(struct gre_softc *); 289 static int gre_down(struct gre_softc *); 290 static void gre_link_state(struct ifnet *, unsigned int); 291 292 static int gre_input_key(struct mbuf **, int *, int, int, uint8_t, 293 struct gre_tunnel *); 294 295 static struct mbuf * 296 gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *, 297 uint8_t *, uint8_t); 298 #ifdef INET6 299 static struct mbuf * 300 gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *, 301 uint8_t *, uint8_t); 302 #endif 303 #ifdef MPLS 304 static struct mbuf * 305 gre_mpls_patch(const struct gre_tunnel *, struct mbuf *, 306 uint8_t *, uint8_t); 307 #endif 308 static void gre_keepalive_send(void *); 309 static void gre_keepalive_recv(struct ifnet *ifp, struct mbuf *); 310 static void gre_keepalive_hold(void *); 311 312 static struct mbuf * 313 gre_l3_encap_dst(const struct gre_tunnel *, const void *, 314 struct mbuf *m, sa_family_t); 315 316 #define gre_l3_encap(_t, _m, _af) \ 317 gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af)) 318 319 struct mgre_softc { 320 struct gre_tunnel sc_tunnel; /* must be first */ 321 RBT_ENTRY(mgre_softc) sc_entry; 322 323 struct ifnet sc_if; 324 }; 325 326 RBT_HEAD(mgre_tree, mgre_softc); 327 328 static inline int 329 mgre_cmp(const struct mgre_softc *, const struct mgre_softc *); 330 331 RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp); 332 333 static int mgre_clone_create(struct if_clone *, int); 334 static int mgre_clone_destroy(struct ifnet *); 335 336 struct if_clone mgre_cloner = 337 IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy); 338 339 static void mgre_rtrequest(struct ifnet *, int, struct rtentry *); 340 static int mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *, 341 struct rtentry *); 342 static void mgre_start(struct ifnet *); 343 static int mgre_ioctl(struct ifnet *, u_long, caddr_t); 344 345 static int mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *); 346 static int mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *); 347 static int mgre_up(struct mgre_softc *); 348 static int mgre_down(struct mgre_softc *); 349 350 /* protected by NET_LOCK */ 351 struct mgre_tree mgre_tree = RBT_INITIALIZER(); 352 353 /* 354 * Ethernet GRE tunnels 355 */ 356 #define ether_cmp(_a, _b) memcmp((_a), (_b), ETHER_ADDR_LEN) 357 #define ether_isequal(_a, _b) (ether_cmp((_a), (_b)) == 0) 358 #define ether_isbcast(_e) ether_isequal((_e), etherbroadcastaddr) 359 360 static struct mbuf * 361 gre_ether_align(struct mbuf *, int); 362 363 struct egre_softc { 364 struct gre_tunnel sc_tunnel; /* must be first */ 365 RBT_ENTRY(egre_softc) sc_entry; 366 367 struct arpcom sc_ac; 368 struct ifmedia sc_media; 369 }; 370 371 RBT_HEAD(egre_tree, egre_softc); 372 373 static inline int 374 egre_cmp(const struct egre_softc *, const struct egre_softc *); 375 376 RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp); 377 378 static int egre_clone_create(struct if_clone *, int); 379 static int egre_clone_destroy(struct ifnet *); 380 381 static void egre_start(struct ifnet *); 382 static int egre_ioctl(struct ifnet *, u_long, caddr_t); 383 static int egre_media_change(struct ifnet *); 384 static void egre_media_status(struct ifnet *, struct ifmediareq *); 385 386 static int egre_up(struct egre_softc *); 387 static int egre_down(struct egre_softc *); 388 389 static int egre_input(const struct gre_tunnel *, struct mbuf *, int, 390 uint8_t); 391 struct if_clone egre_cloner = 392 IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy); 393 394 /* protected by NET_LOCK */ 395 struct egre_tree egre_tree = RBT_INITIALIZER(); 396 397 /* 398 * Network Virtualisation Using Generic Routing Encapsulation (NVGRE) 399 */ 400 401 #define NVGRE_AGE_TMO 100 /* seconds */ 402 403 struct nvgre_entry { 404 RB_ENTRY(nvgre_entry) nv_entry; 405 struct ether_addr nv_dst; 406 uint8_t nv_type; 407 #define NVGRE_ENTRY_DYNAMIC 0 408 #define NVGRE_ENTRY_STATIC 1 409 union gre_addr nv_gateway; 410 struct refcnt nv_refs; 411 int nv_age; 412 }; 413 414 RBT_HEAD(nvgre_map, nvgre_entry); 415 416 static inline int 417 nvgre_entry_cmp(const struct nvgre_entry *, 418 const struct nvgre_entry *); 419 420 RBT_PROTOTYPE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp); 421 422 struct nvgre_softc { 423 struct gre_tunnel sc_tunnel; /* must be first */ 424 unsigned int sc_ifp0; 425 RBT_ENTRY(nvgre_softc) sc_uentry; 426 RBT_ENTRY(nvgre_softc) sc_mentry; 427 428 struct arpcom sc_ac; 429 struct ifmedia sc_media; 430 431 struct mbuf_queue sc_send_list; 432 struct task sc_send_task; 433 434 void *sc_inm; 435 void *sc_lhcookie; 436 void *sc_dhcookie; 437 438 struct rwlock sc_ether_lock; 439 struct nvgre_map sc_ether_map; 440 unsigned int sc_ether_num; 441 unsigned int sc_ether_max; 442 int sc_ether_tmo; 443 struct timeout sc_ether_age; 444 }; 445 446 RBT_HEAD(nvgre_ucast_tree, nvgre_softc); 447 RBT_HEAD(nvgre_mcast_tree, nvgre_softc); 448 449 static inline int 450 nvgre_cmp_ucast(const struct nvgre_softc *, 451 const struct nvgre_softc *); 452 static int 453 nvgre_cmp_mcast(const struct gre_tunnel *, 454 const union gre_addr *, unsigned int, 455 const struct gre_tunnel *, const union gre_addr *, 456 unsigned int); 457 static inline int 458 nvgre_cmp_mcast_sc(const struct nvgre_softc *, 459 const struct nvgre_softc *); 460 461 RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast); 462 RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc); 463 464 static int nvgre_clone_create(struct if_clone *, int); 465 static int nvgre_clone_destroy(struct ifnet *); 466 467 static void nvgre_start(struct ifnet *); 468 static int nvgre_ioctl(struct ifnet *, u_long, caddr_t); 469 470 static int nvgre_up(struct nvgre_softc *); 471 static int nvgre_down(struct nvgre_softc *); 472 static int nvgre_set_parent(struct nvgre_softc *, const char *); 473 static void nvgre_link_change(void *); 474 static void nvgre_detach(void *); 475 476 static int nvgre_input(const struct gre_tunnel *, struct mbuf *, int, 477 uint8_t); 478 static void nvgre_send(void *); 479 480 static int nvgre_rtfind(struct nvgre_softc *, struct ifbaconf *); 481 static void nvgre_flush_map(struct nvgre_softc *); 482 static void nvgre_input_map(struct nvgre_softc *, 483 const struct gre_tunnel *, const struct ether_header *); 484 static void nvgre_age(void *); 485 486 struct if_clone nvgre_cloner = 487 IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy); 488 489 struct pool nvgre_pool; 490 491 /* protected by NET_LOCK */ 492 struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER(); 493 struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER(); 494 495 /* 496 * MikroTik Ethernet over IP protocol (eoip) 497 */ 498 499 struct eoip_softc { 500 struct gre_tunnel sc_tunnel; /* must be first */ 501 uint16_t sc_tunnel_id; 502 RBT_ENTRY(eoip_softc) sc_entry; 503 504 struct arpcom sc_ac; 505 struct ifmedia sc_media; 506 507 struct timeout sc_ka_send; 508 struct timeout sc_ka_hold; 509 510 unsigned int sc_ka_state; 511 unsigned int sc_ka_timeo; 512 unsigned int sc_ka_count; 513 514 unsigned int sc_ka_holdmax; 515 unsigned int sc_ka_holdcnt; 516 }; 517 518 RBT_HEAD(eoip_tree, eoip_softc); 519 520 static inline int 521 eoip_cmp(const struct eoip_softc *, const struct eoip_softc *); 522 523 RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp); 524 525 static int eoip_clone_create(struct if_clone *, int); 526 static int eoip_clone_destroy(struct ifnet *); 527 528 static void eoip_start(struct ifnet *); 529 static int eoip_ioctl(struct ifnet *, u_long, caddr_t); 530 531 static void eoip_keepalive_send(void *); 532 static void eoip_keepalive_recv(struct eoip_softc *); 533 static void eoip_keepalive_hold(void *); 534 535 static int eoip_up(struct eoip_softc *); 536 static int eoip_down(struct eoip_softc *); 537 538 static struct mbuf * 539 eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t); 540 541 static struct mbuf * 542 eoip_input(struct gre_tunnel *, struct mbuf *, 543 const struct gre_header *, uint8_t, int); 544 struct if_clone eoip_cloner = 545 IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy); 546 547 /* protected by NET_LOCK */ 548 struct eoip_tree eoip_tree = RBT_INITIALIZER(); 549 550 /* 551 * It is not easy to calculate the right value for a GRE MTU. 552 * We leave this task to the admin and use the same default that 553 * other vendors use. 554 */ 555 #define GREMTU 1476 556 557 /* 558 * We can control the acceptance of GRE and MobileIP packets by 559 * altering the sysctl net.inet.gre.allow values 560 * respectively. Zero means drop them, all else is acceptance. We can also 561 * control acceptance of WCCPv1-style GRE packets through the 562 * net.inet.gre.wccp value, but be aware it depends upon normal GRE being 563 * allowed as well. 564 * 565 */ 566 int gre_allow = 0; 567 int gre_wccp = 0; 568 569 void 570 greattach(int n) 571 { 572 if_clone_attach(&gre_cloner); 573 if_clone_attach(&mgre_cloner); 574 if_clone_attach(&egre_cloner); 575 if_clone_attach(&nvgre_cloner); 576 if_clone_attach(&eoip_cloner); 577 } 578 579 static int 580 gre_clone_create(struct if_clone *ifc, int unit) 581 { 582 struct gre_softc *sc; 583 struct ifnet *ifp; 584 585 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 586 snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d", 587 ifc->ifc_name, unit); 588 589 ifp = &sc->sc_if; 590 ifp->if_softc = sc; 591 ifp->if_type = IFT_TUNNEL; 592 ifp->if_hdrlen = GRE_HDRLEN; 593 ifp->if_mtu = GREMTU; 594 ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 595 ifp->if_xflags = IFXF_CLONED; 596 ifp->if_output = gre_output; 597 ifp->if_start = gre_start; 598 ifp->if_ioctl = gre_ioctl; 599 ifp->if_rtrequest = p2p_rtrequest; 600 601 sc->sc_tunnel.t_ttl = ip_defttl; 602 sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD; 603 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET; 604 sc->sc_tunnel.t_df = htons(0); 605 sc->sc_tunnel.t_ecn = ECN_ALLOWED; 606 607 timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc); 608 timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc); 609 sc->sc_ka_state = GRE_KA_NONE; 610 611 if_counters_alloc(ifp); 612 if_attach(ifp); 613 if_alloc_sadl(ifp); 614 615 #if NBPFILTER > 0 616 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t)); 617 #endif 618 619 ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL); 620 621 NET_LOCK(); 622 TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry); 623 NET_UNLOCK(); 624 625 return (0); 626 } 627 628 static int 629 gre_clone_destroy(struct ifnet *ifp) 630 { 631 struct gre_softc *sc = ifp->if_softc; 632 633 NET_LOCK(); 634 if (ISSET(ifp->if_flags, IFF_RUNNING)) 635 gre_down(sc); 636 637 TAILQ_REMOVE(&gre_list, sc, sc_entry); 638 NET_UNLOCK(); 639 640 if_detach(ifp); 641 642 free(sc, M_DEVBUF, sizeof(*sc)); 643 644 return (0); 645 } 646 647 static int 648 mgre_clone_create(struct if_clone *ifc, int unit) 649 { 650 struct mgre_softc *sc; 651 struct ifnet *ifp; 652 653 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 654 ifp = &sc->sc_if; 655 656 snprintf(ifp->if_xname, sizeof(ifp->if_xname), 657 "%s%d", ifc->ifc_name, unit); 658 659 ifp->if_softc = sc; 660 ifp->if_type = IFT_L3IPVLAN; 661 ifp->if_hdrlen = GRE_HDRLEN; 662 ifp->if_mtu = GREMTU; 663 ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX; 664 ifp->if_xflags = IFXF_CLONED; 665 ifp->if_rtrequest = mgre_rtrequest; 666 ifp->if_output = mgre_output; 667 ifp->if_start = mgre_start; 668 ifp->if_ioctl = mgre_ioctl; 669 670 sc->sc_tunnel.t_ttl = ip_defttl; 671 sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD; 672 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET; 673 sc->sc_tunnel.t_df = htons(0); 674 sc->sc_tunnel.t_ecn = ECN_ALLOWED; 675 676 if_counters_alloc(ifp); 677 if_attach(ifp); 678 if_alloc_sadl(ifp); 679 680 #if NBPFILTER > 0 681 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t)); 682 #endif 683 684 return (0); 685 } 686 687 static int 688 mgre_clone_destroy(struct ifnet *ifp) 689 { 690 struct mgre_softc *sc = ifp->if_softc; 691 692 NET_LOCK(); 693 if (ISSET(ifp->if_flags, IFF_RUNNING)) 694 mgre_down(sc); 695 NET_UNLOCK(); 696 697 if_detach(ifp); 698 699 free(sc, M_DEVBUF, sizeof(*sc)); 700 701 return (0); 702 } 703 704 static int 705 egre_clone_create(struct if_clone *ifc, int unit) 706 { 707 struct egre_softc *sc; 708 struct ifnet *ifp; 709 710 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 711 ifp = &sc->sc_ac.ac_if; 712 713 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 714 ifc->ifc_name, unit); 715 716 ifp->if_softc = sc; 717 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 718 ifp->if_ioctl = egre_ioctl; 719 ifp->if_start = egre_start; 720 ifp->if_xflags = IFXF_CLONED; 721 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 722 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 723 ether_fakeaddr(ifp); 724 725 sc->sc_tunnel.t_ttl = ip_defttl; 726 sc->sc_tunnel.t_txhprio = 0; 727 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET; 728 sc->sc_tunnel.t_df = htons(0); 729 730 ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status); 731 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 732 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 733 734 if_counters_alloc(ifp); 735 if_attach(ifp); 736 ether_ifattach(ifp); 737 738 return (0); 739 } 740 741 static int 742 egre_clone_destroy(struct ifnet *ifp) 743 { 744 struct egre_softc *sc = ifp->if_softc; 745 746 NET_LOCK(); 747 if (ISSET(ifp->if_flags, IFF_RUNNING)) 748 egre_down(sc); 749 NET_UNLOCK(); 750 751 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); 752 ether_ifdetach(ifp); 753 if_detach(ifp); 754 755 free(sc, M_DEVBUF, sizeof(*sc)); 756 757 return (0); 758 } 759 760 static int 761 nvgre_clone_create(struct if_clone *ifc, int unit) 762 { 763 struct nvgre_softc *sc; 764 struct ifnet *ifp; 765 struct gre_tunnel *tunnel; 766 767 if (nvgre_pool.pr_size == 0) { 768 pool_init(&nvgre_pool, sizeof(struct nvgre_entry), 0, 769 IPL_SOFTNET, 0, "nvgren", NULL); 770 } 771 772 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 773 ifp = &sc->sc_ac.ac_if; 774 775 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 776 ifc->ifc_name, unit); 777 778 ifp->if_softc = sc; 779 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 780 ifp->if_ioctl = nvgre_ioctl; 781 ifp->if_start = nvgre_start; 782 ifp->if_xflags = IFXF_CLONED; 783 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 784 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 785 ether_fakeaddr(ifp); 786 787 tunnel = &sc->sc_tunnel; 788 tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL; 789 tunnel->t_txhprio = 0; 790 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET; 791 tunnel->t_df = htons(IP_DF); 792 tunnel->t_key_mask = GRE_KEY_ENTROPY; 793 tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) << 794 GRE_KEY_ENTROPY_SHIFT); 795 796 mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET); 797 task_set(&sc->sc_send_task, nvgre_send, sc); 798 799 rw_init(&sc->sc_ether_lock, "nvgrelk"); 800 RBT_INIT(nvgre_map, &sc->sc_ether_map); 801 sc->sc_ether_num = 0; 802 sc->sc_ether_max = 100; 803 sc->sc_ether_tmo = 240 * hz; 804 timeout_set_proc(&sc->sc_ether_age, nvgre_age, sc); /* ugh */ 805 806 ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status); 807 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 808 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 809 810 if_counters_alloc(ifp); 811 if_attach(ifp); 812 ether_ifattach(ifp); 813 814 return (0); 815 } 816 817 static int 818 nvgre_clone_destroy(struct ifnet *ifp) 819 { 820 struct nvgre_softc *sc = ifp->if_softc; 821 822 NET_LOCK(); 823 if (ISSET(ifp->if_flags, IFF_RUNNING)) 824 nvgre_down(sc); 825 NET_UNLOCK(); 826 827 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); 828 ether_ifdetach(ifp); 829 if_detach(ifp); 830 831 free(sc, M_DEVBUF, sizeof(*sc)); 832 833 return (0); 834 } 835 836 static int 837 eoip_clone_create(struct if_clone *ifc, int unit) 838 { 839 struct eoip_softc *sc; 840 struct ifnet *ifp; 841 842 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 843 ifp = &sc->sc_ac.ac_if; 844 845 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 846 ifc->ifc_name, unit); 847 848 ifp->if_softc = sc; 849 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 850 ifp->if_ioctl = eoip_ioctl; 851 ifp->if_start = eoip_start; 852 ifp->if_xflags = IFXF_CLONED; 853 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 854 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 855 ether_fakeaddr(ifp); 856 857 sc->sc_tunnel.t_ttl = ip_defttl; 858 sc->sc_tunnel.t_txhprio = 0; 859 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET; 860 sc->sc_tunnel.t_df = htons(0); 861 862 sc->sc_ka_timeo = 10; 863 sc->sc_ka_count = 10; 864 865 timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc); 866 timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc); 867 sc->sc_ka_state = GRE_KA_DOWN; 868 869 ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status); 870 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 871 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 872 873 if_counters_alloc(ifp); 874 if_attach(ifp); 875 ether_ifattach(ifp); 876 877 return (0); 878 } 879 880 static int 881 eoip_clone_destroy(struct ifnet *ifp) 882 { 883 struct eoip_softc *sc = ifp->if_softc; 884 885 NET_LOCK(); 886 if (ISSET(ifp->if_flags, IFF_RUNNING)) 887 eoip_down(sc); 888 NET_UNLOCK(); 889 890 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); 891 ether_ifdetach(ifp); 892 if_detach(ifp); 893 894 free(sc, M_DEVBUF, sizeof(*sc)); 895 896 return (0); 897 } 898 899 int 900 gre_input(struct mbuf **mp, int *offp, int type, int af) 901 { 902 struct mbuf *m = *mp; 903 struct gre_tunnel key; 904 struct ip *ip; 905 906 ip = mtod(m, struct ip *); 907 908 /* XXX check if ip_src is sane for nvgre? */ 909 910 key.t_af = AF_INET; 911 key.t_src4 = ip->ip_dst; 912 key.t_dst4 = ip->ip_src; 913 914 if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1) 915 return (rip_input(mp, offp, type, af)); 916 917 return (IPPROTO_DONE); 918 } 919 920 #ifdef INET6 921 int 922 gre_input6(struct mbuf **mp, int *offp, int type, int af) 923 { 924 struct mbuf *m = *mp; 925 struct gre_tunnel key; 926 struct ip6_hdr *ip6; 927 uint32_t flow; 928 929 ip6 = mtod(m, struct ip6_hdr *); 930 931 /* XXX check if ip6_src is sane for nvgre? */ 932 933 key.t_af = AF_INET6; 934 key.t_src6 = ip6->ip6_dst; 935 key.t_dst6 = ip6->ip6_src; 936 937 flow = bemtoh32(&ip6->ip6_flow); 938 939 if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1) 940 return (rip6_input(mp, offp, type, af)); 941 942 return (IPPROTO_DONE); 943 } 944 #endif /* INET6 */ 945 946 static inline struct ifnet * 947 gre_find(const struct gre_tunnel *key) 948 { 949 struct gre_softc *sc; 950 951 TAILQ_FOREACH(sc, &gre_list, sc_entry) { 952 if (gre_cmp(key, &sc->sc_tunnel) != 0) 953 continue; 954 955 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 956 continue; 957 958 return (&sc->sc_if); 959 } 960 961 return (NULL); 962 } 963 964 static inline struct ifnet * 965 mgre_find(const struct gre_tunnel *key) 966 { 967 struct mgre_softc *sc; 968 969 NET_ASSERT_LOCKED(); 970 sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key); 971 if (sc != NULL) 972 return (&sc->sc_if); 973 974 return (NULL); 975 } 976 977 static struct mbuf * 978 gre_input_1(struct gre_tunnel *key, struct mbuf *m, 979 const struct gre_header *gh, uint8_t otos, int iphlen) 980 { 981 switch (gh->gre_proto) { 982 case htons(ETHERTYPE_PPP): 983 #ifdef PIPEX 984 if (pipex_enable) { 985 struct pipex_session *session; 986 987 session = pipex_pptp_lookup_session(m); 988 if (session != NULL && 989 pipex_pptp_input(m, session) == NULL) 990 return (NULL); 991 } 992 #endif 993 break; 994 case htons(GRE_EOIP): 995 return (eoip_input(key, m, gh, otos, iphlen)); 996 break; 997 } 998 999 return (m); 1000 } 1001 1002 static int 1003 gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos, 1004 struct gre_tunnel *key) 1005 { 1006 struct mbuf *m = *mp; 1007 int iphlen = *offp, hlen, rxprio; 1008 struct ifnet *ifp; 1009 const struct gre_tunnel *tunnel; 1010 caddr_t buf; 1011 struct gre_header *gh; 1012 struct gre_h_key *gkh; 1013 void (*input)(struct ifnet *, struct mbuf *); 1014 struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *, 1015 uint8_t *, uint8_t); 1016 int bpf_af = AF_UNSPEC; /* bpf */ 1017 int mcast = 0; 1018 uint8_t itos; 1019 1020 if (!gre_allow) 1021 goto decline; 1022 1023 key->t_rtableid = m->m_pkthdr.ph_rtableid; 1024 1025 hlen = iphlen + sizeof(*gh); 1026 if (m->m_pkthdr.len < hlen) 1027 goto decline; 1028 1029 m = m_pullup(m, hlen); 1030 if (m == NULL) 1031 return (IPPROTO_DONE); 1032 1033 buf = mtod(m, caddr_t); 1034 gh = (struct gre_header *)(buf + iphlen); 1035 1036 /* check the version */ 1037 switch (gh->gre_flags & htons(GRE_VERS_MASK)) { 1038 case htons(GRE_VERS_0): 1039 break; 1040 1041 case htons(GRE_VERS_1): 1042 m = gre_input_1(key, m, gh, otos, iphlen); 1043 if (m == NULL) 1044 return (IPPROTO_DONE); 1045 /* FALLTHROUGH */ 1046 default: 1047 goto decline; 1048 } 1049 1050 /* the only optional bit in the header is K flag */ 1051 if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0)) 1052 goto decline; 1053 1054 if (gh->gre_flags & htons(GRE_KP)) { 1055 hlen += sizeof(*gkh); 1056 if (m->m_pkthdr.len < hlen) 1057 goto decline; 1058 1059 m = m_pullup(m, hlen); 1060 if (m == NULL) 1061 return (IPPROTO_DONE); 1062 1063 buf = mtod(m, caddr_t); 1064 gh = (struct gre_header *)(buf + iphlen); 1065 gkh = (struct gre_h_key *)(gh + 1); 1066 1067 key->t_key_mask = GRE_KEY_MASK; 1068 key->t_key = gkh->gre_key; 1069 } else 1070 key->t_key_mask = GRE_KEY_NONE; 1071 1072 if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) { 1073 if (egre_input(key, m, hlen, otos) == -1 && 1074 nvgre_input(key, m, hlen, otos) == -1) 1075 goto decline; 1076 1077 return (IPPROTO_DONE); 1078 } 1079 1080 ifp = gre_find(key); 1081 if (ifp == NULL) { 1082 ifp = mgre_find(key); 1083 if (ifp == NULL) 1084 goto decline; 1085 } 1086 1087 switch (gh->gre_proto) { 1088 case htons(GRE_WCCP): { 1089 struct mbuf *n; 1090 int off; 1091 1092 /* WCCP/GRE: 1093 * So far as I can see (and test) it seems that Cisco's WCCP 1094 * GRE tunnel is precisely a IP-in-GRE tunnel that differs 1095 * only in its protocol number. At least, it works for me. 1096 * 1097 * The Internet Drafts can be found if you look for 1098 * the following: 1099 * draft-forster-wrec-wccp-v1-00.txt 1100 * draft-wilson-wrec-wccp-v2-01.txt 1101 */ 1102 1103 if (!gre_wccp && !ISSET(ifp->if_flags, IFF_LINK0)) 1104 goto decline; 1105 1106 /* 1107 * If the first nibble of the payload does not look like 1108 * IPv4, assume it is WCCP v2. 1109 */ 1110 n = m_getptr(m, hlen, &off); 1111 if (n == NULL) 1112 goto decline; 1113 if (n->m_data[off] >> 4 != IPVERSION) 1114 hlen += sizeof(gre_wccp); 1115 1116 /* FALLTHROUGH */ 1117 } 1118 case htons(ETHERTYPE_IP): 1119 #if NBPFILTER > 0 1120 bpf_af = AF_INET; 1121 #endif 1122 patch = gre_ipv4_patch; 1123 input = ipv4_input; 1124 break; 1125 #ifdef INET6 1126 case htons(ETHERTYPE_IPV6): 1127 #if NBPFILTER > 0 1128 bpf_af = AF_INET6; 1129 #endif 1130 patch = gre_ipv6_patch; 1131 input = ipv6_input; 1132 break; 1133 #endif 1134 #ifdef MPLS 1135 case htons(ETHERTYPE_MPLS_MCAST): 1136 mcast = M_MCAST|M_BCAST; 1137 /* fallthrough */ 1138 case htons(ETHERTYPE_MPLS): 1139 #if NBPFILTER > 0 1140 bpf_af = AF_MPLS; 1141 #endif 1142 patch = gre_mpls_patch; 1143 input = mpls_input; 1144 break; 1145 #endif 1146 case htons(0): 1147 if (ifp->if_type != IFT_TUNNEL) { 1148 /* keepalives dont make sense for mgre */ 1149 goto decline; 1150 } 1151 1152 m_adj(m, hlen); 1153 gre_keepalive_recv(ifp, m); 1154 return (IPPROTO_DONE); 1155 1156 default: 1157 goto decline; 1158 } 1159 1160 /* it's ours now */ 1161 1162 m_adj(m, hlen); 1163 1164 tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */ 1165 1166 m = (*patch)(tunnel, m, &itos, otos); 1167 if (m == NULL) 1168 return (IPPROTO_DONE); 1169 1170 if (tunnel->t_key_mask == GRE_KEY_ENTROPY) { 1171 m->m_pkthdr.ph_flowid = M_FLOWID_VALID | 1172 (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY); 1173 } 1174 1175 rxprio = tunnel->t_rxhprio; 1176 switch (rxprio) { 1177 case IF_HDRPRIO_PACKET: 1178 /* nop */ 1179 break; 1180 case IF_HDRPRIO_OUTER: 1181 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos); 1182 break; 1183 case IF_HDRPRIO_PAYLOAD: 1184 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos); 1185 break; 1186 default: 1187 m->m_pkthdr.pf.prio = rxprio; 1188 break; 1189 } 1190 1191 m->m_flags &= ~(M_MCAST|M_BCAST); 1192 m->m_flags |= mcast; 1193 m->m_pkthdr.ph_ifidx = ifp->if_index; 1194 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 1195 1196 #if NPF > 0 1197 pf_pkt_addr_changed(m); 1198 #endif 1199 1200 counters_pkt(ifp->if_counters, 1201 ifc_ipackets, ifc_ibytes, m->m_pkthdr.len); 1202 1203 #if NBPFILTER > 0 1204 if (ifp->if_bpf) 1205 bpf_mtap_af(ifp->if_bpf, bpf_af, m, BPF_DIRECTION_IN); 1206 #endif 1207 1208 (*input)(ifp, m); 1209 return (IPPROTO_DONE); 1210 decline: 1211 *mp = m; 1212 return (-1); 1213 } 1214 1215 static struct mbuf * 1216 gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m, 1217 uint8_t *itosp, uint8_t otos) 1218 { 1219 struct ip *ip; 1220 uint8_t itos; 1221 1222 m = m_pullup(m, sizeof(*ip)); 1223 if (m == NULL) 1224 return (NULL); 1225 1226 ip = mtod(m, struct ip *); 1227 1228 itos = ip->ip_tos; 1229 if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) { 1230 m_freem(m); 1231 return (NULL); 1232 } 1233 if (itos != ip->ip_tos) 1234 ip_tos_patch(ip, itos); 1235 1236 *itosp = itos; 1237 1238 return (m); 1239 } 1240 1241 #ifdef INET6 1242 static struct mbuf * 1243 gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m, 1244 uint8_t *itosp, uint8_t otos) 1245 { 1246 struct ip6_hdr *ip6; 1247 uint32_t flow; 1248 uint8_t itos; 1249 1250 m = m_pullup(m, sizeof(*ip6)); 1251 if (m == NULL) 1252 return (NULL); 1253 1254 ip6 = mtod(m, struct ip6_hdr *); 1255 1256 flow = bemtoh32(&ip6->ip6_flow); 1257 itos = flow >> 20; 1258 if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) { 1259 m_freem(m); 1260 return (NULL); 1261 } 1262 1263 CLR(flow, 0xff << 20); 1264 SET(flow, itos << 20); 1265 htobem32(&ip6->ip6_flow, flow); 1266 1267 *itosp = itos; 1268 1269 return (m); 1270 } 1271 #endif 1272 1273 #ifdef MPLS 1274 static struct mbuf * 1275 gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m, 1276 uint8_t *itosp, uint8_t otos) 1277 { 1278 uint8_t itos; 1279 uint32_t shim; 1280 1281 m = m_pullup(m, sizeof(shim)); 1282 if (m == NULL) 1283 return (NULL); 1284 1285 shim = *mtod(m, uint32_t *); 1286 itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5; 1287 1288 if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) { 1289 m_freem(m); 1290 return (NULL); 1291 } 1292 1293 *itosp = itos; 1294 1295 return (m); 1296 } 1297 #endif 1298 1299 #define gre_l2_prio(_t, _m, _otos) do { \ 1300 int rxprio = (_t)->t_rxhprio; \ 1301 switch (rxprio) { \ 1302 case IF_HDRPRIO_PACKET: \ 1303 /* nop */ \ 1304 break; \ 1305 case IF_HDRPRIO_OUTER: \ 1306 (_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos)); \ 1307 break; \ 1308 default: \ 1309 (_m)->m_pkthdr.pf.prio = rxprio; \ 1310 break; \ 1311 } \ 1312 } while (0) 1313 1314 static int 1315 egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos) 1316 { 1317 struct egre_softc *sc; 1318 1319 NET_ASSERT_LOCKED(); 1320 sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key); 1321 if (sc == NULL) 1322 return (-1); 1323 1324 /* it's ours now */ 1325 m = gre_ether_align(m, hlen); 1326 if (m == NULL) 1327 return (0); 1328 1329 if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) { 1330 m->m_pkthdr.ph_flowid = M_FLOWID_VALID | 1331 (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY); 1332 } 1333 1334 m->m_flags &= ~(M_MCAST|M_BCAST); 1335 1336 #if NPF > 0 1337 pf_pkt_addr_changed(m); 1338 #endif 1339 1340 gre_l2_prio(&sc->sc_tunnel, m, otos); 1341 1342 if_vinput(&sc->sc_ac.ac_if, m); 1343 1344 return (0); 1345 } 1346 1347 static int 1348 nvgre_rtfind(struct nvgre_softc *sc, struct ifbaconf *baconf) 1349 { 1350 struct ifnet *ifp = &sc->sc_ac.ac_if; 1351 struct nvgre_entry *nv; 1352 struct ifbareq bareq; 1353 caddr_t uaddr, end; 1354 int error; 1355 int age; 1356 1357 if (baconf->ifbac_len == 0) { 1358 /* single read is atomic */ 1359 baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq); 1360 return (0); 1361 } 1362 1363 uaddr = baconf->ifbac_buf; 1364 end = uaddr + baconf->ifbac_len; 1365 1366 rw_enter_read(&sc->sc_ether_lock); 1367 RBT_FOREACH(nv, nvgre_map, &sc->sc_ether_map) { 1368 if (uaddr >= end) 1369 break; 1370 1371 memcpy(bareq.ifba_name, ifp->if_xname, 1372 sizeof(bareq.ifba_name)); 1373 memcpy(bareq.ifba_ifsname, ifp->if_xname, 1374 sizeof(bareq.ifba_ifsname)); 1375 memcpy(&bareq.ifba_dst, &nv->nv_dst, 1376 sizeof(bareq.ifba_dst)); 1377 1378 memset(&bareq.ifba_dstsa, 0, sizeof(bareq.ifba_dstsa)); 1379 switch (sc->sc_tunnel.t_af) { 1380 case AF_INET: { 1381 struct sockaddr_in *sin; 1382 1383 sin = (struct sockaddr_in *)&bareq.ifba_dstsa; 1384 sin->sin_len = sizeof(*sin); 1385 sin->sin_family = AF_INET; 1386 sin->sin_addr = nv->nv_gateway.in4; 1387 1388 break; 1389 } 1390 #ifdef INET6 1391 case AF_INET6: { 1392 struct sockaddr_in6 *sin6; 1393 1394 sin6 = (struct sockaddr_in6 *)&bareq.ifba_dstsa; 1395 sin6->sin6_len = sizeof(*sin6); 1396 sin6->sin6_family = AF_INET6; 1397 sin6->sin6_addr = nv->nv_gateway.in6; 1398 1399 break; 1400 } 1401 #endif /* INET6 */ 1402 default: 1403 unhandled_af(sc->sc_tunnel.t_af); 1404 } 1405 1406 switch (nv->nv_type) { 1407 case NVGRE_ENTRY_DYNAMIC: 1408 age = (ticks - nv->nv_age) / hz; 1409 bareq.ifba_age = MIN(age, 0xff); 1410 bareq.ifba_flags = IFBAF_DYNAMIC; 1411 break; 1412 case NVGRE_ENTRY_STATIC: 1413 bareq.ifba_age = 0; 1414 bareq.ifba_flags = IFBAF_STATIC; 1415 break; 1416 } 1417 1418 error = copyout(&bareq, uaddr, sizeof(bareq)); 1419 if (error != 0) { 1420 rw_exit_read(&sc->sc_ether_lock); 1421 return (error); 1422 } 1423 1424 uaddr += sizeof(bareq); 1425 } 1426 baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq); 1427 rw_exit_read(&sc->sc_ether_lock); 1428 1429 return (0); 1430 } 1431 1432 static void 1433 nvgre_flush_map(struct nvgre_softc *sc) 1434 { 1435 struct nvgre_map map; 1436 struct nvgre_entry *nv, *nnv; 1437 1438 rw_enter_write(&sc->sc_ether_lock); 1439 map = sc->sc_ether_map; 1440 RBT_INIT(nvgre_map, &sc->sc_ether_map); 1441 sc->sc_ether_num = 0; 1442 rw_exit_write(&sc->sc_ether_lock); 1443 1444 RBT_FOREACH_SAFE(nv, nvgre_map, &map, nnv) { 1445 RBT_REMOVE(nvgre_map, &map, nv); 1446 if (refcnt_rele(&nv->nv_refs)) 1447 pool_put(&nvgre_pool, nv); 1448 } 1449 } 1450 1451 static void 1452 nvgre_input_map(struct nvgre_softc *sc, const struct gre_tunnel *key, 1453 const struct ether_header *eh) 1454 { 1455 struct nvgre_entry *nv, nkey; 1456 int new = 0; 1457 1458 if (ether_isbcast(eh->ether_shost) || 1459 ETHER_IS_MULTICAST(eh->ether_shost)) 1460 return; 1461 1462 memcpy(&nkey.nv_dst, eh->ether_shost, ETHER_ADDR_LEN); 1463 1464 /* remember where it came from */ 1465 rw_enter_read(&sc->sc_ether_lock); 1466 nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &nkey); 1467 if (nv == NULL) 1468 new = 1; 1469 else { 1470 nv->nv_age = ticks; 1471 1472 if (nv->nv_type != NVGRE_ENTRY_DYNAMIC || 1473 gre_ip_cmp(key->t_af, &key->t_dst, &nv->nv_gateway)) 1474 nv = NULL; 1475 else 1476 refcnt_take(&nv->nv_refs); 1477 } 1478 rw_exit_read(&sc->sc_ether_lock); 1479 1480 if (new) { 1481 struct nvgre_entry *onv; 1482 unsigned int num; 1483 1484 nv = pool_get(&nvgre_pool, PR_NOWAIT); 1485 if (nv == NULL) { 1486 /* oh well */ 1487 return; 1488 } 1489 1490 memcpy(&nv->nv_dst, eh->ether_shost, ETHER_ADDR_LEN); 1491 nv->nv_type = NVGRE_ENTRY_DYNAMIC; 1492 nv->nv_gateway = key->t_dst; 1493 refcnt_init(&nv->nv_refs); 1494 nv->nv_age = ticks; 1495 1496 rw_enter_write(&sc->sc_ether_lock); 1497 num = sc->sc_ether_num; 1498 if (++num > sc->sc_ether_max) 1499 onv = nv; 1500 else { 1501 /* try to give the ref to the map */ 1502 onv = RBT_INSERT(nvgre_map, &sc->sc_ether_map, nv); 1503 if (onv == NULL) { 1504 /* count the successful insert */ 1505 sc->sc_ether_num = num; 1506 } 1507 } 1508 rw_exit_write(&sc->sc_ether_lock); 1509 1510 if (onv != NULL) 1511 pool_put(&nvgre_pool, nv); 1512 } else if (nv != NULL) { 1513 rw_enter_write(&sc->sc_ether_lock); 1514 nv->nv_gateway = key->t_dst; 1515 rw_exit_write(&sc->sc_ether_lock); 1516 1517 if (refcnt_rele(&nv->nv_refs)) { 1518 /* ioctl may have deleted the entry */ 1519 pool_put(&nvgre_pool, nv); 1520 } 1521 } 1522 } 1523 1524 static inline struct nvgre_softc * 1525 nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx) 1526 { 1527 struct nvgre_softc *sc; 1528 int rv; 1529 1530 /* 1531 * building an nvgre_softc to use with RBT_FIND is expensive, and 1532 * would need to swap the src and dst addresses in the key. so do the 1533 * find by hand. 1534 */ 1535 1536 NET_ASSERT_LOCKED(); 1537 sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree); 1538 while (sc != NULL) { 1539 rv = nvgre_cmp_mcast(key, &key->t_src, if0idx, 1540 &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0); 1541 if (rv == 0) 1542 return (sc); 1543 if (rv < 0) 1544 sc = RBT_LEFT(nvgre_mcast_tree, sc); 1545 else 1546 sc = RBT_RIGHT(nvgre_mcast_tree, sc); 1547 } 1548 1549 return (NULL); 1550 } 1551 1552 static inline struct nvgre_softc * 1553 nvgre_ucast_find(const struct gre_tunnel *key) 1554 { 1555 NET_ASSERT_LOCKED(); 1556 return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree, 1557 (struct nvgre_softc *)key)); 1558 } 1559 1560 static int 1561 nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, 1562 uint8_t otos) 1563 { 1564 struct nvgre_softc *sc; 1565 1566 if (ISSET(m->m_flags, M_MCAST|M_BCAST)) 1567 sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx); 1568 else 1569 sc = nvgre_ucast_find(key); 1570 1571 if (sc == NULL) 1572 return (-1); 1573 1574 /* it's ours now */ 1575 m = gre_ether_align(m, hlen); 1576 if (m == NULL) 1577 return (0); 1578 1579 nvgre_input_map(sc, key, mtod(m, struct ether_header *)); 1580 1581 m->m_pkthdr.ph_flowid = M_FLOWID_VALID | 1582 (bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY); 1583 1584 gre_l2_prio(&sc->sc_tunnel, m, otos); 1585 1586 m->m_flags &= ~(M_MCAST|M_BCAST); 1587 1588 #if NPF > 0 1589 pf_pkt_addr_changed(m); 1590 #endif 1591 1592 if_vinput(&sc->sc_ac.ac_if, m); 1593 1594 return (0); 1595 } 1596 1597 static struct mbuf * 1598 gre_ether_align(struct mbuf *m, int hlen) 1599 { 1600 struct mbuf *n; 1601 int off; 1602 1603 m_adj(m, hlen); 1604 1605 if (m->m_pkthdr.len < sizeof(struct ether_header)) { 1606 m_freem(m); 1607 return (NULL); 1608 } 1609 1610 m = m_pullup(m, sizeof(struct ether_header)); 1611 if (m == NULL) 1612 return (NULL); 1613 1614 n = m_getptr(m, sizeof(struct ether_header), &off); 1615 if (n == NULL) { 1616 m_freem(m); 1617 return (NULL); 1618 } 1619 1620 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { 1621 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); 1622 m_freem(m); 1623 if (n == NULL) 1624 return (NULL); 1625 m = n; 1626 } 1627 1628 return (m); 1629 } 1630 1631 static void 1632 gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m) 1633 { 1634 struct gre_softc *sc = ifp->if_softc; 1635 struct gre_keepalive *gk; 1636 SIPHASH_CTX ctx; 1637 uint8_t digest[SIPHASH_DIGEST_LENGTH]; 1638 int uptime, delta; 1639 int tick = ticks; 1640 1641 if (sc->sc_ka_state == GRE_KA_NONE || 1642 sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain) 1643 goto drop; 1644 1645 if (m->m_pkthdr.len < sizeof(*gk)) 1646 goto drop; 1647 m = m_pullup(m, sizeof(*gk)); 1648 if (m == NULL) 1649 return; 1650 1651 gk = mtod(m, struct gre_keepalive *); 1652 uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias; 1653 delta = tick - uptime; 1654 if (delta < 0) 1655 goto drop; 1656 if (delta > hz * 10) /* magic */ 1657 goto drop; 1658 1659 /* avoid too much siphash work */ 1660 delta = tick - sc->sc_ka_recvtm; 1661 if (delta > 0 && delta < (hz / 10)) 1662 goto drop; 1663 1664 SipHash24_Init(&ctx, &sc->sc_ka_key); 1665 SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime)); 1666 SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random)); 1667 SipHash24_Final(digest, &ctx); 1668 1669 if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0) 1670 goto drop; 1671 1672 sc->sc_ka_recvtm = tick; 1673 1674 switch (sc->sc_ka_state) { 1675 case GRE_KA_DOWN: 1676 sc->sc_ka_state = GRE_KA_HOLD; 1677 sc->sc_ka_holdcnt = sc->sc_ka_holdmax; 1678 sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2, 1679 16 * sc->sc_ka_count); 1680 break; 1681 case GRE_KA_HOLD: 1682 if (--sc->sc_ka_holdcnt > 0) 1683 break; 1684 1685 sc->sc_ka_state = GRE_KA_UP; 1686 gre_link_state(&sc->sc_if, sc->sc_ka_state); 1687 break; 1688 1689 case GRE_KA_UP: 1690 sc->sc_ka_holdmax--; 1691 sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count); 1692 break; 1693 } 1694 1695 timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count); 1696 1697 drop: 1698 m_freem(m); 1699 } 1700 1701 static int 1702 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1703 struct rtentry *rt) 1704 { 1705 struct m_tag *mtag; 1706 int error = 0; 1707 1708 if (!gre_allow) { 1709 error = EACCES; 1710 goto drop; 1711 } 1712 1713 if (!ISSET(ifp->if_flags, IFF_RUNNING)) { 1714 error = ENETDOWN; 1715 goto drop; 1716 } 1717 1718 switch (dst->sa_family) { 1719 case AF_INET: 1720 #ifdef INET6 1721 case AF_INET6: 1722 #endif 1723 #ifdef MPLS 1724 case AF_MPLS: 1725 #endif 1726 break; 1727 default: 1728 error = EAFNOSUPPORT; 1729 goto drop; 1730 } 1731 1732 /* Try to limit infinite recursion through misconfiguration. */ 1733 for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag; 1734 mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) { 1735 if (memcmp((caddr_t)(mtag + 1), &ifp->if_index, 1736 sizeof(ifp->if_index)) == 0) { 1737 m_freem(m); 1738 error = EIO; 1739 goto end; 1740 } 1741 } 1742 1743 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); 1744 if (mtag == NULL) { 1745 m_freem(m); 1746 error = ENOBUFS; 1747 goto end; 1748 } 1749 memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index)); 1750 m_tag_prepend(m, mtag); 1751 1752 m->m_pkthdr.ph_family = dst->sa_family; 1753 1754 error = if_enqueue(ifp, m); 1755 end: 1756 if (error) 1757 ifp->if_oerrors++; 1758 return (error); 1759 1760 drop: 1761 m_freem(m); 1762 return (error); 1763 } 1764 1765 void 1766 gre_start(struct ifnet *ifp) 1767 { 1768 struct gre_softc *sc = ifp->if_softc; 1769 struct mbuf *m; 1770 int af; 1771 #if NBPFILTER > 0 1772 caddr_t if_bpf; 1773 #endif 1774 1775 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 1776 af = m->m_pkthdr.ph_family; 1777 1778 #if NBPFILTER > 0 1779 if_bpf = ifp->if_bpf; 1780 if (if_bpf) 1781 bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT); 1782 #endif 1783 1784 m = gre_l3_encap(&sc->sc_tunnel, m, af); 1785 if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) { 1786 ifp->if_oerrors++; 1787 continue; 1788 } 1789 } 1790 } 1791 1792 void 1793 mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt) 1794 { 1795 struct ifnet *lo0ifp; 1796 struct ifaddr *ifa, *lo0ifa; 1797 1798 switch (req) { 1799 case RTM_ADD: 1800 if (!ISSET(rt->rt_flags, RTF_LOCAL)) 1801 break; 1802 1803 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1804 if (memcmp(rt_key(rt), ifa->ifa_addr, 1805 rt_key(rt)->sa_len) == 0) 1806 break; 1807 } 1808 1809 if (ifa == NULL) 1810 break; 1811 1812 KASSERT(ifa == rt->rt_ifa); 1813 1814 lo0ifp = if_get(rtable_loindex(ifp->if_rdomain)); 1815 KASSERT(lo0ifp != NULL); 1816 TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) { 1817 if (lo0ifa->ifa_addr->sa_family == 1818 ifa->ifa_addr->sa_family) 1819 break; 1820 } 1821 if_put(lo0ifp); 1822 1823 if (lo0ifa == NULL) 1824 break; 1825 1826 rt->rt_flags &= ~RTF_LLINFO; 1827 break; 1828 case RTM_DELETE: 1829 case RTM_RESOLVE: 1830 default: 1831 break; 1832 } 1833 } 1834 1835 static int 1836 mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest, 1837 struct rtentry *rt0) 1838 { 1839 struct mgre_softc *sc = ifp->if_softc; 1840 struct sockaddr *gate; 1841 struct rtentry *rt; 1842 struct m_tag *mtag; 1843 int error = 0; 1844 sa_family_t af; 1845 const void *addr; 1846 1847 if (!gre_allow) { 1848 error = EACCES; 1849 goto drop; 1850 } 1851 1852 if (!ISSET(ifp->if_flags, IFF_RUNNING)) { 1853 error = ENETDOWN; 1854 goto drop; 1855 } 1856 1857 switch (dest->sa_family) { 1858 case AF_INET: 1859 #ifdef INET6 1860 case AF_INET6: 1861 #endif 1862 #ifdef MPLS 1863 case AF_MPLS: 1864 #endif 1865 break; 1866 default: 1867 error = EAFNOSUPPORT; 1868 goto drop; 1869 } 1870 1871 if (ISSET(m->m_flags, M_MCAST|M_BCAST)) { 1872 error = ENETUNREACH; 1873 goto drop; 1874 } 1875 1876 rt = rt_getll(rt0); 1877 1878 /* chech rt_expire? */ 1879 if (ISSET(rt->rt_flags, RTF_REJECT)) { 1880 error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH; 1881 goto drop; 1882 } 1883 if (!ISSET(rt->rt_flags, RTF_HOST)) { 1884 error = EHOSTUNREACH; 1885 goto drop; 1886 } 1887 if (ISSET(rt->rt_flags, RTF_GATEWAY)) { 1888 error = EINVAL; 1889 goto drop; 1890 } 1891 1892 gate = rt->rt_gateway; 1893 af = gate->sa_family; 1894 if (af != sc->sc_tunnel.t_af) { 1895 error = EAGAIN; 1896 goto drop; 1897 } 1898 1899 /* Try to limit infinite recursion through misconfiguration. */ 1900 for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag; 1901 mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) { 1902 if (memcmp((caddr_t)(mtag + 1), &ifp->if_index, 1903 sizeof(ifp->if_index)) == 0) { 1904 error = EIO; 1905 goto drop; 1906 } 1907 } 1908 1909 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); 1910 if (mtag == NULL) { 1911 error = ENOBUFS; 1912 goto drop; 1913 } 1914 memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index)); 1915 m_tag_prepend(m, mtag); 1916 1917 switch (af) { 1918 case AF_INET: { 1919 struct sockaddr_in *sin = (struct sockaddr_in *)gate; 1920 addr = &sin->sin_addr; 1921 break; 1922 } 1923 #ifdef INET6 1924 case AF_INET6: { 1925 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate; 1926 addr = &sin6->sin6_addr; 1927 break; 1928 } 1929 #endif 1930 default: 1931 unhandled_af(af); 1932 /* NOTREACHED */ 1933 } 1934 1935 m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family); 1936 if (m == NULL) 1937 return (ENOBUFS); 1938 1939 m->m_pkthdr.ph_family = dest->sa_family; 1940 1941 error = if_enqueue(ifp, m); 1942 if (error) 1943 ifp->if_oerrors++; 1944 return (error); 1945 1946 drop: 1947 m_freem(m); 1948 return (error); 1949 } 1950 1951 static void 1952 mgre_start(struct ifnet *ifp) 1953 { 1954 struct mgre_softc *sc = ifp->if_softc; 1955 struct mbuf *m; 1956 #if NBPFILTER > 0 1957 caddr_t if_bpf; 1958 #endif 1959 1960 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 1961 #if NBPFILTER > 0 1962 if_bpf = ifp->if_bpf; 1963 if (if_bpf) { 1964 struct m_hdr mh; 1965 struct mbuf *n; 1966 int off; 1967 1968 n = m_getptr(m, ifp->if_hdrlen, &off); 1969 KASSERT(n != NULL); 1970 1971 mh.mh_flags = 0; 1972 mh.mh_next = n->m_next; 1973 mh.mh_len = n->m_len - off; 1974 mh.mh_data = n->m_data + off; 1975 1976 bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, 1977 (struct mbuf *)&mh, BPF_DIRECTION_OUT); 1978 } 1979 #endif 1980 1981 if (gre_ip_output(&sc->sc_tunnel, m) != 0) { 1982 ifp->if_oerrors++; 1983 continue; 1984 } 1985 } 1986 } 1987 1988 static void 1989 egre_start(struct ifnet *ifp) 1990 { 1991 struct egre_softc *sc = ifp->if_softc; 1992 struct mbuf *m0, *m; 1993 #if NBPFILTER > 0 1994 caddr_t if_bpf; 1995 #endif 1996 1997 if (!gre_allow) { 1998 ifq_purge(&ifp->if_snd); 1999 return; 2000 } 2001 2002 while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) { 2003 #if NBPFILTER > 0 2004 if_bpf = ifp->if_bpf; 2005 if (if_bpf) 2006 bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT); 2007 #endif 2008 2009 /* force prepend mbuf because of alignment problems */ 2010 m = m_get(M_DONTWAIT, m0->m_type); 2011 if (m == NULL) { 2012 m_freem(m0); 2013 continue; 2014 } 2015 2016 M_MOVE_PKTHDR(m, m0); 2017 m->m_next = m0; 2018 2019 m_align(m, 0); 2020 m->m_len = 0; 2021 2022 m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER), 2023 sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m)); 2024 if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) { 2025 ifp->if_oerrors++; 2026 continue; 2027 } 2028 } 2029 } 2030 2031 static struct mbuf * 2032 gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst, 2033 struct mbuf *m, sa_family_t af) 2034 { 2035 uint16_t proto; 2036 uint8_t ttl, itos, otos; 2037 int tttl = tunnel->t_ttl; 2038 int ttloff; 2039 2040 switch (af) { 2041 case AF_INET: { 2042 struct ip *ip; 2043 2044 m = m_pullup(m, sizeof(*ip)); 2045 if (m == NULL) 2046 return (NULL); 2047 2048 ip = mtod(m, struct ip *); 2049 itos = ip->ip_tos; 2050 2051 ttloff = offsetof(struct ip, ip_ttl); 2052 proto = htons(ETHERTYPE_IP); 2053 break; 2054 } 2055 #ifdef INET6 2056 case AF_INET6: { 2057 struct ip6_hdr *ip6; 2058 2059 m = m_pullup(m, sizeof(*ip6)); 2060 if (m == NULL) 2061 return (NULL); 2062 2063 ip6 = mtod(m, struct ip6_hdr *); 2064 itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20; 2065 2066 ttloff = offsetof(struct ip6_hdr, ip6_hlim); 2067 proto = htons(ETHERTYPE_IPV6); 2068 break; 2069 } 2070 #endif 2071 #ifdef MPLS 2072 case AF_MPLS: { 2073 uint32_t shim; 2074 2075 m = m_pullup(m, sizeof(shim)); 2076 if (m == NULL) 2077 return (NULL); 2078 2079 shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK; 2080 itos = (shim >> MPLS_EXP_OFFSET) << 5; 2081 2082 ttloff = 3; 2083 2084 if (m->m_flags & (M_BCAST | M_MCAST)) 2085 proto = htons(ETHERTYPE_MPLS_MCAST); 2086 else 2087 proto = htons(ETHERTYPE_MPLS); 2088 break; 2089 } 2090 #endif 2091 default: 2092 unhandled_af(af); 2093 } 2094 2095 if (tttl == -1) { 2096 KASSERT(m->m_len > ttloff); /* m_pullup has happened */ 2097 2098 ttl = *(m->m_data + ttloff); 2099 } else 2100 ttl = tttl; 2101 2102 itos = gre_l3_tos(tunnel, m, itos); 2103 ip_ecn_ingress(tunnel->t_ecn, &otos, &itos); 2104 2105 return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos)); 2106 } 2107 2108 static struct mbuf * 2109 gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst, 2110 struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos) 2111 { 2112 struct gre_header *gh; 2113 struct gre_h_key *gkh; 2114 int hlen; 2115 2116 hlen = sizeof(*gh); 2117 if (tunnel->t_key_mask != GRE_KEY_NONE) 2118 hlen += sizeof(*gkh); 2119 2120 m = m_prepend(m, hlen, M_DONTWAIT); 2121 if (m == NULL) 2122 return (NULL); 2123 2124 gh = mtod(m, struct gre_header *); 2125 gh->gre_flags = GRE_VERS_0; 2126 gh->gre_proto = proto; 2127 if (tunnel->t_key_mask != GRE_KEY_NONE) { 2128 gh->gre_flags |= htons(GRE_KP); 2129 2130 gkh = (struct gre_h_key *)(gh + 1); 2131 gkh->gre_key = tunnel->t_key; 2132 2133 if (tunnel->t_key_mask == GRE_KEY_ENTROPY && 2134 ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) { 2135 gkh->gre_key |= htonl(~GRE_KEY_ENTROPY & 2136 (m->m_pkthdr.ph_flowid & M_FLOWID_MASK)); 2137 } 2138 } 2139 2140 return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos)); 2141 } 2142 2143 static struct mbuf * 2144 gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst, 2145 struct mbuf *m, uint8_t ttl, uint8_t tos) 2146 { 2147 switch (tunnel->t_af) { 2148 case AF_INET: { 2149 struct ip *ip; 2150 2151 m = m_prepend(m, sizeof(*ip), M_DONTWAIT); 2152 if (m == NULL) 2153 return (NULL); 2154 2155 ip = mtod(m, struct ip *); 2156 ip->ip_v = IPVERSION; 2157 ip->ip_hl = sizeof(*ip) >> 2; 2158 ip->ip_off = tunnel->t_df; 2159 ip->ip_tos = tos; 2160 ip->ip_len = htons(m->m_pkthdr.len); 2161 ip->ip_ttl = ttl; 2162 ip->ip_p = IPPROTO_GRE; 2163 ip->ip_src = tunnel->t_src4; 2164 ip->ip_dst = dst->in4; 2165 break; 2166 } 2167 #ifdef INET6 2168 case AF_INET6: { 2169 struct ip6_hdr *ip6; 2170 int len = m->m_pkthdr.len; 2171 2172 m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); 2173 if (m == NULL) 2174 return (NULL); 2175 2176 ip6 = mtod(m, struct ip6_hdr *); 2177 ip6->ip6_flow = ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID) ? 2178 htonl(m->m_pkthdr.ph_flowid & M_FLOWID_MASK) : 0; 2179 ip6->ip6_vfc |= IPV6_VERSION; 2180 ip6->ip6_flow |= htonl((uint32_t)tos << 20); 2181 ip6->ip6_plen = htons(len); 2182 ip6->ip6_nxt = IPPROTO_GRE; 2183 ip6->ip6_hlim = ttl; 2184 ip6->ip6_src = tunnel->t_src6; 2185 ip6->ip6_dst = dst->in6; 2186 2187 if (tunnel->t_df) 2188 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 2189 2190 break; 2191 } 2192 #endif /* INET6 */ 2193 default: 2194 panic("%s: unsupported af %d in %p", __func__, tunnel->t_af, 2195 tunnel); 2196 } 2197 2198 return (m); 2199 } 2200 2201 static int 2202 gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m) 2203 { 2204 m->m_flags &= ~(M_BCAST|M_MCAST); 2205 m->m_pkthdr.ph_rtableid = tunnel->t_rtableid; 2206 2207 #if NPF > 0 2208 pf_pkt_addr_changed(m); 2209 #endif 2210 2211 switch (tunnel->t_af) { 2212 case AF_INET: 2213 ip_send(m); 2214 break; 2215 #ifdef INET6 2216 case AF_INET6: 2217 ip6_send(m); 2218 break; 2219 #endif 2220 default: 2221 panic("%s: unsupported af %d in %p", __func__, tunnel->t_af, 2222 tunnel); 2223 } 2224 2225 return (0); 2226 } 2227 2228 static int 2229 gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel, 2230 u_long cmd, void *data) 2231 { 2232 struct ifreq *ifr = (struct ifreq *)data; 2233 int error = 0; 2234 2235 switch(cmd) { 2236 case SIOCSIFMTU: 2237 if (ifr->ifr_mtu < 576) { 2238 error = EINVAL; 2239 break; 2240 } 2241 ifp->if_mtu = ifr->ifr_mtu; 2242 break; 2243 case SIOCADDMULTI: 2244 case SIOCDELMULTI: 2245 break; 2246 2247 case SIOCSVNETID: 2248 error = gre_set_vnetid(tunnel, ifr); 2249 break; 2250 2251 case SIOCGVNETID: 2252 error = gre_get_vnetid(tunnel, ifr); 2253 break; 2254 case SIOCDVNETID: 2255 error = gre_del_vnetid(tunnel); 2256 break; 2257 2258 case SIOCSVNETFLOWID: 2259 error = gre_set_vnetflowid(tunnel, ifr); 2260 break; 2261 2262 case SIOCGVNETFLOWID: 2263 error = gre_get_vnetflowid(tunnel, ifr); 2264 break; 2265 2266 case SIOCSLIFPHYADDR: 2267 error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1); 2268 break; 2269 case SIOCGLIFPHYADDR: 2270 error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data); 2271 break; 2272 case SIOCDIFPHYADDR: 2273 error = gre_del_tunnel(tunnel); 2274 break; 2275 2276 case SIOCSLIFPHYRTABLE: 2277 if (ifr->ifr_rdomainid < 0 || 2278 ifr->ifr_rdomainid > RT_TABLEID_MAX || 2279 !rtable_exists(ifr->ifr_rdomainid)) { 2280 error = EINVAL; 2281 break; 2282 } 2283 tunnel->t_rtableid = ifr->ifr_rdomainid; 2284 break; 2285 case SIOCGLIFPHYRTABLE: 2286 ifr->ifr_rdomainid = tunnel->t_rtableid; 2287 break; 2288 2289 case SIOCSLIFPHYDF: 2290 /* commit */ 2291 tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 2292 break; 2293 case SIOCGLIFPHYDF: 2294 ifr->ifr_df = tunnel->t_df ? 1 : 0; 2295 break; 2296 2297 default: 2298 error = ENOTTY; 2299 break; 2300 } 2301 2302 return (error); 2303 } 2304 2305 static uint8_t 2306 gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m) 2307 { 2308 uint8_t prio; 2309 2310 switch (t->t_txhprio) { 2311 case IF_HDRPRIO_PACKET: 2312 prio = m->m_pkthdr.pf.prio; 2313 break; 2314 default: 2315 prio = t->t_txhprio; 2316 break; 2317 } 2318 2319 return (IFQ_PRIO2TOS(prio)); 2320 } 2321 2322 static uint8_t 2323 gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos) 2324 { 2325 uint8_t prio; 2326 2327 switch (t->t_txhprio) { 2328 case IF_HDRPRIO_PAYLOAD: 2329 return (tos); 2330 case IF_HDRPRIO_PACKET: 2331 prio = m->m_pkthdr.pf.prio; 2332 break; 2333 default: 2334 prio = t->t_txhprio; 2335 break; 2336 } 2337 2338 return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK)); 2339 } 2340 2341 static int 2342 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2343 { 2344 struct gre_softc *sc = ifp->if_softc; 2345 struct ifreq *ifr = (struct ifreq *)data; 2346 struct ifkalivereq *ikar = (struct ifkalivereq *)data; 2347 int error = 0; 2348 2349 switch(cmd) { 2350 case SIOCSIFADDR: 2351 ifp->if_flags |= IFF_UP; 2352 /* FALLTHROUGH */ 2353 case SIOCSIFFLAGS: 2354 if (ISSET(ifp->if_flags, IFF_UP)) { 2355 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 2356 error = gre_up(sc); 2357 else 2358 error = 0; 2359 } else { 2360 if (ISSET(ifp->if_flags, IFF_RUNNING)) 2361 error = gre_down(sc); 2362 } 2363 break; 2364 case SIOCSIFRDOMAIN: 2365 /* let if_rdomain do its thing */ 2366 error = ENOTTY; 2367 break; 2368 2369 case SIOCSETKALIVE: 2370 if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 || 2371 ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 || 2372 (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0)) 2373 return (EINVAL); 2374 2375 if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) { 2376 sc->sc_ka_count = 0; 2377 sc->sc_ka_timeo = 0; 2378 sc->sc_ka_state = GRE_KA_NONE; 2379 } else { 2380 sc->sc_ka_count = ikar->ikar_cnt; 2381 sc->sc_ka_timeo = ikar->ikar_timeo; 2382 sc->sc_ka_state = GRE_KA_DOWN; 2383 2384 arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key)); 2385 sc->sc_ka_bias = arc4random(); 2386 sc->sc_ka_holdmax = sc->sc_ka_count; 2387 2388 sc->sc_ka_recvtm = ticks - hz; 2389 timeout_add(&sc->sc_ka_send, 1); 2390 timeout_add_sec(&sc->sc_ka_hold, 2391 sc->sc_ka_timeo * sc->sc_ka_count); 2392 } 2393 break; 2394 2395 case SIOCGETKALIVE: 2396 ikar->ikar_cnt = sc->sc_ka_count; 2397 ikar->ikar_timeo = sc->sc_ka_timeo; 2398 break; 2399 2400 case SIOCSLIFPHYTTL: 2401 if (ifr->ifr_ttl != -1 && 2402 (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) { 2403 error = EINVAL; 2404 break; 2405 } 2406 2407 /* commit */ 2408 sc->sc_tunnel.t_ttl = ifr->ifr_ttl; 2409 break; 2410 2411 case SIOCGLIFPHYTTL: 2412 ifr->ifr_ttl = sc->sc_tunnel.t_ttl; 2413 break; 2414 2415 case SIOCSLIFPHYECN: 2416 sc->sc_tunnel.t_ecn = 2417 ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN; 2418 break; 2419 case SIOCGLIFPHYECN: 2420 ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED); 2421 break; 2422 2423 case SIOCSTXHPRIO: 2424 error = if_txhprio_l3_check(ifr->ifr_hdrprio); 2425 if (error != 0) 2426 break; 2427 2428 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio; 2429 break; 2430 case SIOCGTXHPRIO: 2431 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio; 2432 break; 2433 2434 case SIOCSRXHPRIO: 2435 error = if_rxhprio_l3_check(ifr->ifr_hdrprio); 2436 if (error != 0) 2437 break; 2438 2439 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio; 2440 break; 2441 case SIOCGRXHPRIO: 2442 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio; 2443 break; 2444 2445 default: 2446 error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data); 2447 break; 2448 } 2449 2450 return (error); 2451 } 2452 2453 static int 2454 mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2455 { 2456 struct mgre_softc *sc = ifp->if_softc; 2457 struct ifreq *ifr = (struct ifreq *)data; 2458 int error = 0; 2459 2460 switch(cmd) { 2461 case SIOCSIFADDR: 2462 break; 2463 case SIOCSIFFLAGS: 2464 if (ISSET(ifp->if_flags, IFF_UP)) { 2465 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 2466 error = mgre_up(sc); 2467 else 2468 error = 0; 2469 } else { 2470 if (ISSET(ifp->if_flags, IFF_RUNNING)) 2471 error = mgre_down(sc); 2472 } 2473 break; 2474 2475 case SIOCSLIFPHYTTL: 2476 if (ifr->ifr_ttl != -1 && 2477 (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) { 2478 error = EINVAL; 2479 break; 2480 } 2481 2482 /* commit */ 2483 sc->sc_tunnel.t_ttl = ifr->ifr_ttl; 2484 break; 2485 2486 case SIOCGLIFPHYTTL: 2487 ifr->ifr_ttl = sc->sc_tunnel.t_ttl; 2488 break; 2489 2490 case SIOCSLIFPHYECN: 2491 sc->sc_tunnel.t_ecn = 2492 ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN; 2493 break; 2494 case SIOCGLIFPHYECN: 2495 ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED); 2496 break; 2497 2498 case SIOCSLIFPHYADDR: 2499 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2500 error = EBUSY; 2501 break; 2502 } 2503 error = mgre_set_tunnel(sc, (struct if_laddrreq *)data); 2504 break; 2505 case SIOCGLIFPHYADDR: 2506 error = mgre_get_tunnel(sc, (struct if_laddrreq *)data); 2507 break; 2508 2509 case SIOCSTXHPRIO: 2510 error = if_txhprio_l3_check(ifr->ifr_hdrprio); 2511 if (error != 0) 2512 break; 2513 2514 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio; 2515 break; 2516 case SIOCGTXHPRIO: 2517 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio; 2518 break; 2519 2520 case SIOCSRXHPRIO: 2521 error = if_rxhprio_l3_check(ifr->ifr_hdrprio); 2522 if (error != 0) 2523 break; 2524 2525 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio; 2526 break; 2527 case SIOCGRXHPRIO: 2528 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio; 2529 break; 2530 2531 case SIOCSVNETID: 2532 case SIOCDVNETID: 2533 case SIOCDIFPHYADDR: 2534 case SIOCSLIFPHYRTABLE: 2535 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2536 error = EBUSY; 2537 break; 2538 } 2539 2540 /* FALLTHROUGH */ 2541 default: 2542 error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data); 2543 break; 2544 } 2545 2546 return (error); 2547 } 2548 2549 static int 2550 mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req) 2551 { 2552 struct gre_tunnel *tunnel = &sc->sc_tunnel; 2553 struct sockaddr *addr = (struct sockaddr *)&req->addr; 2554 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; 2555 struct sockaddr_in *addr4; 2556 #ifdef INET6 2557 struct sockaddr_in6 *addr6; 2558 int error; 2559 #endif 2560 2561 if (dstaddr->sa_family != AF_UNSPEC) 2562 return (EINVAL); 2563 2564 /* validate */ 2565 switch (addr->sa_family) { 2566 case AF_INET: 2567 if (addr->sa_len != sizeof(*addr4)) 2568 return (EINVAL); 2569 2570 addr4 = (struct sockaddr_in *)addr; 2571 if (in_nullhost(addr4->sin_addr) || 2572 IN_MULTICAST(addr4->sin_addr.s_addr)) 2573 return (EINVAL); 2574 2575 tunnel->t_src4 = addr4->sin_addr; 2576 tunnel->t_dst4.s_addr = INADDR_ANY; 2577 2578 break; 2579 #ifdef INET6 2580 case AF_INET6: 2581 if (addr->sa_len != sizeof(*addr6)) 2582 return (EINVAL); 2583 2584 addr6 = (struct sockaddr_in6 *)addr; 2585 if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) || 2586 IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr)) 2587 return (EINVAL); 2588 2589 error = in6_embedscope(&tunnel->t_src6, addr6, NULL); 2590 if (error != 0) 2591 return (error); 2592 2593 memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6)); 2594 2595 break; 2596 #endif 2597 default: 2598 return (EAFNOSUPPORT); 2599 } 2600 2601 /* commit */ 2602 tunnel->t_af = addr->sa_family; 2603 2604 return (0); 2605 } 2606 2607 static int 2608 mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req) 2609 { 2610 struct gre_tunnel *tunnel = &sc->sc_tunnel; 2611 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; 2612 struct sockaddr_in *sin; 2613 #ifdef INET6 2614 struct sockaddr_in6 *sin6; 2615 #endif 2616 2617 switch (tunnel->t_af) { 2618 case AF_UNSPEC: 2619 return (EADDRNOTAVAIL); 2620 case AF_INET: 2621 sin = (struct sockaddr_in *)&req->addr; 2622 memset(sin, 0, sizeof(*sin)); 2623 sin->sin_family = AF_INET; 2624 sin->sin_len = sizeof(*sin); 2625 sin->sin_addr = tunnel->t_src4; 2626 break; 2627 2628 #ifdef INET6 2629 case AF_INET6: 2630 sin6 = (struct sockaddr_in6 *)&req->addr; 2631 memset(sin6, 0, sizeof(*sin6)); 2632 sin6->sin6_family = AF_INET6; 2633 sin6->sin6_len = sizeof(*sin6); 2634 in6_recoverscope(sin6, &tunnel->t_src6); 2635 break; 2636 #endif 2637 default: 2638 unhandled_af(tunnel->t_af); 2639 } 2640 2641 dstaddr->sa_len = 2; 2642 dstaddr->sa_family = AF_UNSPEC; 2643 2644 return (0); 2645 } 2646 2647 static int 2648 egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2649 { 2650 struct egre_softc *sc = ifp->if_softc; 2651 struct ifreq *ifr = (struct ifreq *)data; 2652 int error = 0; 2653 2654 switch(cmd) { 2655 case SIOCSIFADDR: 2656 break; 2657 case SIOCSIFFLAGS: 2658 if (ISSET(ifp->if_flags, IFF_UP)) { 2659 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 2660 error = egre_up(sc); 2661 else 2662 error = 0; 2663 } else { 2664 if (ISSET(ifp->if_flags, IFF_RUNNING)) 2665 error = egre_down(sc); 2666 } 2667 break; 2668 2669 case SIOCSLIFPHYTTL: 2670 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 2671 error = EINVAL; 2672 break; 2673 } 2674 2675 /* commit */ 2676 sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl; 2677 break; 2678 2679 case SIOCGLIFPHYTTL: 2680 ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl; 2681 break; 2682 2683 case SIOCSTXHPRIO: 2684 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 2685 if (error != 0) 2686 break; 2687 2688 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio; 2689 break; 2690 case SIOCGTXHPRIO: 2691 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio; 2692 break; 2693 2694 case SIOCSRXHPRIO: 2695 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 2696 if (error != 0) 2697 break; 2698 2699 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio; 2700 break; 2701 case SIOCGRXHPRIO: 2702 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio; 2703 break; 2704 2705 case SIOCSVNETID: 2706 case SIOCDVNETID: 2707 case SIOCSVNETFLOWID: 2708 case SIOCSLIFPHYADDR: 2709 case SIOCDIFPHYADDR: 2710 case SIOCSLIFPHYRTABLE: 2711 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2712 error = EBUSY; 2713 break; 2714 } 2715 2716 /* FALLTHROUGH */ 2717 default: 2718 error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data); 2719 if (error == ENOTTY) 2720 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 2721 break; 2722 } 2723 2724 if (error == ENETRESET) { 2725 /* no hardware to program */ 2726 error = 0; 2727 } 2728 2729 return (error); 2730 } 2731 2732 static int 2733 nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2734 { 2735 struct nvgre_softc *sc = ifp->if_softc; 2736 struct gre_tunnel *tunnel = &sc->sc_tunnel; 2737 2738 struct ifreq *ifr = (struct ifreq *)data; 2739 struct if_parent *parent = (struct if_parent *)data; 2740 struct ifbrparam *bparam = (struct ifbrparam *)data; 2741 struct ifnet *ifp0; 2742 2743 int error = 0; 2744 2745 switch (cmd) { 2746 case SIOCSIFADDR: 2747 break; 2748 case SIOCSIFFLAGS: 2749 if (ISSET(ifp->if_flags, IFF_UP)) { 2750 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 2751 error = nvgre_up(sc); 2752 else 2753 error = ENETRESET; 2754 } else { 2755 if (ISSET(ifp->if_flags, IFF_RUNNING)) 2756 error = nvgre_down(sc); 2757 } 2758 break; 2759 2760 case SIOCSLIFPHYADDR: 2761 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2762 error = EBUSY; 2763 break; 2764 } 2765 error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0); 2766 if (error == 0) 2767 nvgre_flush_map(sc); 2768 break; 2769 case SIOCGLIFPHYADDR: 2770 error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data); 2771 break; 2772 case SIOCDIFPHYADDR: 2773 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2774 error = EBUSY; 2775 break; 2776 } 2777 error = gre_del_tunnel(tunnel); 2778 if (error == 0) 2779 nvgre_flush_map(sc); 2780 break; 2781 2782 case SIOCSIFPARENT: 2783 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2784 error = EBUSY; 2785 break; 2786 } 2787 error = nvgre_set_parent(sc, parent->ifp_parent); 2788 if (error == 0) 2789 nvgre_flush_map(sc); 2790 break; 2791 case SIOCGIFPARENT: 2792 ifp0 = if_get(sc->sc_ifp0); 2793 if (ifp0 == NULL) 2794 error = EADDRNOTAVAIL; 2795 else { 2796 memcpy(parent->ifp_parent, ifp0->if_xname, 2797 sizeof(parent->ifp_parent)); 2798 } 2799 if_put(ifp0); 2800 break; 2801 case SIOCDIFPARENT: 2802 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2803 error = EBUSY; 2804 break; 2805 } 2806 /* commit */ 2807 sc->sc_ifp0 = 0; 2808 nvgre_flush_map(sc); 2809 break; 2810 2811 case SIOCSVNETID: 2812 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2813 error = EBUSY; 2814 break; 2815 } 2816 if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN || 2817 ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) { 2818 error = EINVAL; 2819 break; 2820 } 2821 2822 /* commit */ 2823 tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT); 2824 nvgre_flush_map(sc); 2825 break; 2826 case SIOCGVNETID: 2827 error = gre_get_vnetid(tunnel, ifr); 2828 break; 2829 2830 case SIOCSLIFPHYRTABLE: 2831 if (ifr->ifr_rdomainid < 0 || 2832 ifr->ifr_rdomainid > RT_TABLEID_MAX || 2833 !rtable_exists(ifr->ifr_rdomainid)) { 2834 error = EINVAL; 2835 break; 2836 } 2837 tunnel->t_rtableid = ifr->ifr_rdomainid; 2838 nvgre_flush_map(sc); 2839 break; 2840 case SIOCGLIFPHYRTABLE: 2841 ifr->ifr_rdomainid = tunnel->t_rtableid; 2842 break; 2843 2844 case SIOCSLIFPHYDF: 2845 /* commit */ 2846 tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 2847 break; 2848 case SIOCGLIFPHYDF: 2849 ifr->ifr_df = tunnel->t_df ? 1 : 0; 2850 break; 2851 2852 case SIOCSLIFPHYTTL: 2853 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 2854 error = EINVAL; 2855 break; 2856 } 2857 2858 /* commit */ 2859 tunnel->t_ttl = ifr->ifr_ttl; 2860 break; 2861 2862 case SIOCGLIFPHYTTL: 2863 ifr->ifr_ttl = tunnel->t_ttl; 2864 break; 2865 2866 case SIOCSTXHPRIO: 2867 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 2868 if (error != 0) 2869 break; 2870 2871 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio; 2872 break; 2873 case SIOCGTXHPRIO: 2874 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio; 2875 break; 2876 2877 case SIOCSRXHPRIO: 2878 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 2879 if (error != 0) 2880 break; 2881 2882 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio; 2883 break; 2884 case SIOCGRXHPRIO: 2885 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio; 2886 break; 2887 2888 case SIOCBRDGSCACHE: 2889 if (bparam->ifbrp_csize < 1) { 2890 error = EINVAL; 2891 break; 2892 } 2893 2894 /* commit */ 2895 sc->sc_ether_max = bparam->ifbrp_csize; 2896 break; 2897 case SIOCBRDGGCACHE: 2898 bparam->ifbrp_csize = sc->sc_ether_max; 2899 break; 2900 2901 case SIOCBRDGSTO: 2902 if (bparam->ifbrp_ctime < 0 || 2903 bparam->ifbrp_ctime > INT_MAX / hz) { 2904 error = EINVAL; 2905 break; 2906 } 2907 sc->sc_ether_tmo = bparam->ifbrp_ctime * hz; 2908 break; 2909 case SIOCBRDGGTO: 2910 bparam->ifbrp_ctime = sc->sc_ether_tmo / hz; 2911 break; 2912 2913 case SIOCBRDGRTS: 2914 error = nvgre_rtfind(sc, (struct ifbaconf *)data); 2915 break; 2916 case SIOCBRDGFLUSH: 2917 nvgre_flush_map(sc); 2918 break; 2919 2920 case SIOCADDMULTI: 2921 case SIOCDELMULTI: 2922 break; 2923 2924 default: 2925 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 2926 break; 2927 } 2928 2929 if (error == ENETRESET) { 2930 /* no hardware to program */ 2931 error = 0; 2932 } 2933 2934 return (error); 2935 } 2936 2937 static int 2938 eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2939 { 2940 struct eoip_softc *sc = ifp->if_softc; 2941 struct ifreq *ifr = (struct ifreq *)data; 2942 struct ifkalivereq *ikar = (struct ifkalivereq *)data; 2943 int error = 0; 2944 2945 switch(cmd) { 2946 case SIOCSIFADDR: 2947 break; 2948 case SIOCSIFFLAGS: 2949 if (ISSET(ifp->if_flags, IFF_UP)) { 2950 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 2951 error = eoip_up(sc); 2952 else 2953 error = 0; 2954 } else { 2955 if (ISSET(ifp->if_flags, IFF_RUNNING)) 2956 error = eoip_down(sc); 2957 } 2958 break; 2959 2960 case SIOCSETKALIVE: 2961 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2962 error = EBUSY; 2963 break; 2964 } 2965 2966 if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 || 2967 ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256) 2968 return (EINVAL); 2969 2970 if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) { 2971 sc->sc_ka_count = 0; 2972 sc->sc_ka_timeo = 0; 2973 sc->sc_ka_state = GRE_KA_NONE; 2974 } else { 2975 sc->sc_ka_count = ikar->ikar_cnt; 2976 sc->sc_ka_timeo = ikar->ikar_timeo; 2977 sc->sc_ka_state = GRE_KA_DOWN; 2978 } 2979 break; 2980 2981 case SIOCGETKALIVE: 2982 ikar->ikar_cnt = sc->sc_ka_count; 2983 ikar->ikar_timeo = sc->sc_ka_timeo; 2984 break; 2985 2986 case SIOCSVNETID: 2987 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 2988 error = EBUSY; 2989 break; 2990 } 2991 if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff) 2992 return (EINVAL); 2993 2994 sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */ 2995 sc->sc_tunnel_id = htole16(ifr->ifr_vnetid); 2996 break; 2997 2998 case SIOCGVNETID: 2999 ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id); 3000 break; 3001 3002 case SIOCSLIFPHYADDR: 3003 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 3004 error = EBUSY; 3005 break; 3006 } 3007 3008 error = gre_set_tunnel(&sc->sc_tunnel, 3009 (struct if_laddrreq *)data, 1); 3010 break; 3011 case SIOCGLIFPHYADDR: 3012 error = gre_get_tunnel(&sc->sc_tunnel, 3013 (struct if_laddrreq *)data); 3014 break; 3015 case SIOCDIFPHYADDR: 3016 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 3017 error = EBUSY; 3018 break; 3019 } 3020 3021 error = gre_del_tunnel(&sc->sc_tunnel); 3022 break; 3023 3024 case SIOCSLIFPHYRTABLE: 3025 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 3026 error = EBUSY; 3027 break; 3028 } 3029 3030 if (ifr->ifr_rdomainid < 0 || 3031 ifr->ifr_rdomainid > RT_TABLEID_MAX || 3032 !rtable_exists(ifr->ifr_rdomainid)) { 3033 error = EINVAL; 3034 break; 3035 } 3036 sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid; 3037 break; 3038 case SIOCGLIFPHYRTABLE: 3039 ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid; 3040 break; 3041 3042 case SIOCSLIFPHYTTL: 3043 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 3044 error = EINVAL; 3045 break; 3046 } 3047 3048 /* commit */ 3049 sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl; 3050 break; 3051 case SIOCGLIFPHYTTL: 3052 ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl; 3053 break; 3054 3055 case SIOCSLIFPHYDF: 3056 /* commit */ 3057 sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 3058 break; 3059 case SIOCGLIFPHYDF: 3060 ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0; 3061 break; 3062 3063 case SIOCSTXHPRIO: 3064 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 3065 if (error != 0) 3066 break; 3067 3068 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio; 3069 break; 3070 case SIOCGTXHPRIO: 3071 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio; 3072 break; 3073 3074 case SIOCSRXHPRIO: 3075 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 3076 if (error != 0) 3077 break; 3078 3079 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio; 3080 break; 3081 case SIOCGRXHPRIO: 3082 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio; 3083 break; 3084 3085 case SIOCADDMULTI: 3086 case SIOCDELMULTI: 3087 break; 3088 3089 default: 3090 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 3091 break; 3092 } 3093 3094 if (error == ENETRESET) { 3095 /* no hardware to program */ 3096 error = 0; 3097 } 3098 3099 return (error); 3100 } 3101 3102 static int 3103 gre_up(struct gre_softc *sc) 3104 { 3105 NET_ASSERT_LOCKED(); 3106 SET(sc->sc_if.if_flags, IFF_RUNNING); 3107 3108 if (sc->sc_ka_state != GRE_KA_NONE) 3109 gre_keepalive_send(sc); 3110 3111 return (0); 3112 } 3113 3114 static int 3115 gre_down(struct gre_softc *sc) 3116 { 3117 NET_ASSERT_LOCKED(); 3118 CLR(sc->sc_if.if_flags, IFF_RUNNING); 3119 3120 if (sc->sc_ka_state != GRE_KA_NONE) { 3121 timeout_del_barrier(&sc->sc_ka_hold); 3122 timeout_del_barrier(&sc->sc_ka_send); 3123 3124 sc->sc_ka_state = GRE_KA_DOWN; 3125 gre_link_state(&sc->sc_if, sc->sc_ka_state); 3126 } 3127 3128 return (0); 3129 } 3130 3131 static void 3132 gre_link_state(struct ifnet *ifp, unsigned int state) 3133 { 3134 int link_state = LINK_STATE_UNKNOWN; 3135 3136 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 3137 switch (state) { 3138 case GRE_KA_NONE: 3139 /* maybe up? or down? it's unknown, really */ 3140 break; 3141 case GRE_KA_UP: 3142 link_state = LINK_STATE_UP; 3143 break; 3144 default: 3145 link_state = LINK_STATE_KALIVE_DOWN; 3146 break; 3147 } 3148 } 3149 3150 if (ifp->if_link_state != link_state) { 3151 ifp->if_link_state = link_state; 3152 if_link_state_change(ifp); 3153 } 3154 } 3155 3156 static void 3157 gre_keepalive_send(void *arg) 3158 { 3159 struct gre_tunnel t; 3160 struct gre_softc *sc = arg; 3161 struct mbuf *m; 3162 struct gre_keepalive *gk; 3163 SIPHASH_CTX ctx; 3164 int linkhdr, len; 3165 uint16_t proto; 3166 uint8_t ttl; 3167 uint8_t tos; 3168 3169 /* 3170 * re-schedule immediately, so we deal with incomplete configuation 3171 * or temporary errors. 3172 */ 3173 if (sc->sc_ka_timeo) 3174 timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo); 3175 3176 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 3177 sc->sc_ka_state == GRE_KA_NONE || 3178 sc->sc_tunnel.t_af == AF_UNSPEC || 3179 sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain) 3180 return; 3181 3182 /* this is really conservative */ 3183 #ifdef INET6 3184 linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) + 3185 sizeof(struct gre_header) + sizeof(struct gre_h_key); 3186 #else 3187 linkhdr = max_linkhdr + sizeof(struct ip) + 3188 sizeof(struct gre_header) + sizeof(struct gre_h_key); 3189 #endif 3190 len = linkhdr + sizeof(*gk); 3191 3192 MGETHDR(m, M_DONTWAIT, MT_DATA); 3193 if (m == NULL) 3194 return; 3195 3196 if (len > MHLEN) { 3197 MCLGETI(m, M_DONTWAIT, NULL, len); 3198 if (!ISSET(m->m_flags, M_EXT)) { 3199 m_freem(m); 3200 return; 3201 } 3202 } 3203 3204 m->m_pkthdr.len = m->m_len = len; 3205 m_adj(m, linkhdr); 3206 3207 /* 3208 * build the inside packet 3209 */ 3210 gk = mtod(m, struct gre_keepalive *); 3211 htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks); 3212 htobem32(&gk->gk_random, arc4random()); 3213 3214 SipHash24_Init(&ctx, &sc->sc_ka_key); 3215 SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime)); 3216 SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random)); 3217 SipHash24_Final(gk->gk_digest, &ctx); 3218 3219 ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl; 3220 3221 m->m_pkthdr.pf.prio = sc->sc_if.if_llprio; 3222 tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio)); 3223 3224 t.t_af = sc->sc_tunnel.t_af; 3225 t.t_df = sc->sc_tunnel.t_df; 3226 t.t_src = sc->sc_tunnel.t_dst; 3227 t.t_dst = sc->sc_tunnel.t_src; 3228 t.t_key = sc->sc_tunnel.t_key; 3229 t.t_key_mask = sc->sc_tunnel.t_key_mask; 3230 3231 m = gre_encap(&t, m, htons(0), ttl, tos); 3232 if (m == NULL) 3233 return; 3234 3235 switch (sc->sc_tunnel.t_af) { 3236 case AF_INET: { 3237 struct ip *ip; 3238 3239 ip = mtod(m, struct ip *); 3240 ip->ip_id = htons(ip_randomid()); 3241 ip->ip_sum = 0; 3242 ip->ip_sum = in_cksum(m, sizeof(*ip)); 3243 3244 proto = htons(ETHERTYPE_IP); 3245 break; 3246 } 3247 #ifdef INET6 3248 case AF_INET6: 3249 proto = htons(ETHERTYPE_IPV6); 3250 break; 3251 #endif 3252 default: 3253 m_freem(m); 3254 return; 3255 } 3256 3257 /* 3258 * put it in the tunnel 3259 */ 3260 m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos); 3261 if (m == NULL) 3262 return; 3263 3264 gre_ip_output(&sc->sc_tunnel, m); 3265 } 3266 3267 static void 3268 gre_keepalive_hold(void *arg) 3269 { 3270 struct gre_softc *sc = arg; 3271 struct ifnet *ifp = &sc->sc_if; 3272 3273 if (!ISSET(ifp->if_flags, IFF_RUNNING) || 3274 sc->sc_ka_state == GRE_KA_NONE) 3275 return; 3276 3277 NET_LOCK(); 3278 sc->sc_ka_state = GRE_KA_DOWN; 3279 gre_link_state(ifp, sc->sc_ka_state); 3280 NET_UNLOCK(); 3281 } 3282 3283 static int 3284 gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast) 3285 { 3286 struct sockaddr *src = (struct sockaddr *)&req->addr; 3287 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; 3288 struct sockaddr_in *src4, *dst4; 3289 #ifdef INET6 3290 struct sockaddr_in6 *src6, *dst6; 3291 int error; 3292 #endif 3293 3294 /* sa_family and sa_len must be equal */ 3295 if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) 3296 return (EINVAL); 3297 3298 /* validate */ 3299 switch (dst->sa_family) { 3300 case AF_INET: 3301 if (dst->sa_len != sizeof(*dst4)) 3302 return (EINVAL); 3303 3304 src4 = (struct sockaddr_in *)src; 3305 if (in_nullhost(src4->sin_addr) || 3306 IN_MULTICAST(src4->sin_addr.s_addr)) 3307 return (EINVAL); 3308 3309 dst4 = (struct sockaddr_in *)dst; 3310 if (in_nullhost(dst4->sin_addr) || 3311 (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast)) 3312 return (EINVAL); 3313 3314 tunnel->t_src4 = src4->sin_addr; 3315 tunnel->t_dst4 = dst4->sin_addr; 3316 3317 break; 3318 #ifdef INET6 3319 case AF_INET6: 3320 if (dst->sa_len != sizeof(*dst6)) 3321 return (EINVAL); 3322 3323 src6 = (struct sockaddr_in6 *)src; 3324 if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || 3325 IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) 3326 return (EINVAL); 3327 3328 dst6 = (struct sockaddr_in6 *)dst; 3329 if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) || 3330 IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast) 3331 return (EINVAL); 3332 3333 if (src6->sin6_scope_id != dst6->sin6_scope_id) 3334 return (EINVAL); 3335 3336 error = in6_embedscope(&tunnel->t_src6, src6, NULL); 3337 if (error != 0) 3338 return (error); 3339 3340 error = in6_embedscope(&tunnel->t_dst6, dst6, NULL); 3341 if (error != 0) 3342 return (error); 3343 3344 break; 3345 #endif 3346 default: 3347 return (EAFNOSUPPORT); 3348 } 3349 3350 /* commit */ 3351 tunnel->t_af = dst->sa_family; 3352 3353 return (0); 3354 } 3355 3356 static int 3357 gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req) 3358 { 3359 struct sockaddr *src = (struct sockaddr *)&req->addr; 3360 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; 3361 struct sockaddr_in *sin; 3362 #ifdef INET6 /* ifconfig already embeds the scopeid */ 3363 struct sockaddr_in6 *sin6; 3364 #endif 3365 3366 switch (tunnel->t_af) { 3367 case AF_UNSPEC: 3368 return (EADDRNOTAVAIL); 3369 case AF_INET: 3370 sin = (struct sockaddr_in *)src; 3371 memset(sin, 0, sizeof(*sin)); 3372 sin->sin_family = AF_INET; 3373 sin->sin_len = sizeof(*sin); 3374 sin->sin_addr = tunnel->t_src4; 3375 3376 sin = (struct sockaddr_in *)dst; 3377 memset(sin, 0, sizeof(*sin)); 3378 sin->sin_family = AF_INET; 3379 sin->sin_len = sizeof(*sin); 3380 sin->sin_addr = tunnel->t_dst4; 3381 3382 break; 3383 3384 #ifdef INET6 3385 case AF_INET6: 3386 sin6 = (struct sockaddr_in6 *)src; 3387 memset(sin6, 0, sizeof(*sin6)); 3388 sin6->sin6_family = AF_INET6; 3389 sin6->sin6_len = sizeof(*sin6); 3390 in6_recoverscope(sin6, &tunnel->t_src6); 3391 3392 sin6 = (struct sockaddr_in6 *)dst; 3393 memset(sin6, 0, sizeof(*sin6)); 3394 sin6->sin6_family = AF_INET6; 3395 sin6->sin6_len = sizeof(*sin6); 3396 in6_recoverscope(sin6, &tunnel->t_dst6); 3397 3398 break; 3399 #endif 3400 default: 3401 return (EAFNOSUPPORT); 3402 } 3403 3404 return (0); 3405 } 3406 3407 static int 3408 gre_del_tunnel(struct gre_tunnel *tunnel) 3409 { 3410 /* commit */ 3411 tunnel->t_af = AF_UNSPEC; 3412 3413 return (0); 3414 } 3415 3416 static int 3417 gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr) 3418 { 3419 uint32_t key; 3420 uint32_t min = GRE_KEY_MIN; 3421 uint32_t max = GRE_KEY_MAX; 3422 unsigned int shift = GRE_KEY_SHIFT; 3423 uint32_t mask = GRE_KEY_MASK; 3424 3425 if (tunnel->t_key_mask == GRE_KEY_ENTROPY) { 3426 min = GRE_KEY_ENTROPY_MIN; 3427 max = GRE_KEY_ENTROPY_MAX; 3428 shift = GRE_KEY_ENTROPY_SHIFT; 3429 mask = GRE_KEY_ENTROPY; 3430 } 3431 3432 if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max) 3433 return (EINVAL); 3434 3435 key = htonl(ifr->ifr_vnetid << shift); 3436 3437 /* commit */ 3438 tunnel->t_key_mask = mask; 3439 tunnel->t_key = key; 3440 3441 return (0); 3442 } 3443 3444 static int 3445 gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr) 3446 { 3447 int shift; 3448 3449 switch (tunnel->t_key_mask) { 3450 case GRE_KEY_NONE: 3451 return (EADDRNOTAVAIL); 3452 case GRE_KEY_ENTROPY: 3453 shift = GRE_KEY_ENTROPY_SHIFT; 3454 break; 3455 case GRE_KEY_MASK: 3456 shift = GRE_KEY_SHIFT; 3457 break; 3458 } 3459 3460 ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift; 3461 3462 return (0); 3463 } 3464 3465 static int 3466 gre_del_vnetid(struct gre_tunnel *tunnel) 3467 { 3468 tunnel->t_key_mask = GRE_KEY_NONE; 3469 3470 return (0); 3471 } 3472 3473 static int 3474 gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr) 3475 { 3476 uint32_t mask, key; 3477 3478 if (tunnel->t_key_mask == GRE_KEY_NONE) 3479 return (EADDRNOTAVAIL); 3480 3481 mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK; 3482 if (tunnel->t_key_mask == mask) { 3483 /* nop */ 3484 return (0); 3485 } 3486 3487 key = ntohl(tunnel->t_key); 3488 if (mask == GRE_KEY_ENTROPY) { 3489 if (key > GRE_KEY_ENTROPY_MAX) 3490 return (ERANGE); 3491 3492 key = htonl(key << GRE_KEY_ENTROPY_SHIFT); 3493 } else 3494 key = htonl(key >> GRE_KEY_ENTROPY_SHIFT); 3495 3496 /* commit */ 3497 tunnel->t_key_mask = mask; 3498 tunnel->t_key = key; 3499 3500 return (0); 3501 } 3502 3503 static int 3504 gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr) 3505 { 3506 if (tunnel->t_key_mask == GRE_KEY_NONE) 3507 return (EADDRNOTAVAIL); 3508 3509 ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY; 3510 3511 return (0); 3512 } 3513 3514 static int 3515 mgre_up(struct mgre_softc *sc) 3516 { 3517 unsigned int hlen; 3518 3519 switch (sc->sc_tunnel.t_af) { 3520 case AF_UNSPEC: 3521 return (EDESTADDRREQ); 3522 case AF_INET: 3523 hlen = sizeof(struct ip); 3524 break; 3525 #ifdef INET6 3526 case AF_INET6: 3527 hlen = sizeof(struct ip6_hdr); 3528 break; 3529 #endif /* INET6 */ 3530 default: 3531 unhandled_af(sc->sc_tunnel.t_af); 3532 } 3533 3534 hlen += sizeof(struct gre_header); 3535 if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE) 3536 hlen += sizeof(struct gre_h_key); 3537 3538 NET_ASSERT_LOCKED(); 3539 3540 if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL) 3541 return (EADDRINUSE); 3542 3543 sc->sc_if.if_hdrlen = hlen; 3544 SET(sc->sc_if.if_flags, IFF_RUNNING); 3545 3546 return (0); 3547 } 3548 3549 static int 3550 mgre_down(struct mgre_softc *sc) 3551 { 3552 NET_ASSERT_LOCKED(); 3553 3554 CLR(sc->sc_if.if_flags, IFF_RUNNING); 3555 sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */ 3556 3557 RBT_REMOVE(mgre_tree, &mgre_tree, sc); 3558 3559 /* barrier? */ 3560 3561 return (0); 3562 } 3563 3564 static int 3565 egre_up(struct egre_softc *sc) 3566 { 3567 if (sc->sc_tunnel.t_af == AF_UNSPEC) 3568 return (EDESTADDRREQ); 3569 3570 NET_ASSERT_LOCKED(); 3571 3572 if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL) 3573 return (EADDRINUSE); 3574 3575 SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); 3576 3577 return (0); 3578 } 3579 3580 static int 3581 egre_down(struct egre_softc *sc) 3582 { 3583 NET_ASSERT_LOCKED(); 3584 3585 CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); 3586 3587 RBT_REMOVE(egre_tree, &egre_tree, sc); 3588 3589 /* barrier? */ 3590 3591 return (0); 3592 } 3593 3594 static int 3595 egre_media_change(struct ifnet *ifp) 3596 { 3597 return (ENOTTY); 3598 } 3599 3600 static void 3601 egre_media_status(struct ifnet *ifp, struct ifmediareq *imr) 3602 { 3603 imr->ifm_active = IFM_ETHER | IFM_AUTO; 3604 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 3605 } 3606 3607 static int 3608 nvgre_up(struct nvgre_softc *sc) 3609 { 3610 struct gre_tunnel *tunnel = &sc->sc_tunnel; 3611 struct ifnet *ifp0; 3612 void *inm; 3613 int error; 3614 3615 if (tunnel->t_af == AF_UNSPEC) 3616 return (EDESTADDRREQ); 3617 3618 ifp0 = if_get(sc->sc_ifp0); 3619 if (ifp0 == NULL) 3620 return (ENXIO); 3621 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 3622 error = ENODEV; 3623 goto put; 3624 } 3625 3626 NET_ASSERT_LOCKED(); 3627 3628 if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) { 3629 error = EADDRINUSE; 3630 goto put; 3631 } 3632 if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) { 3633 error = EADDRINUSE; 3634 goto remove_mcast; 3635 } 3636 3637 switch (tunnel->t_af) { 3638 case AF_INET: 3639 inm = in_addmulti(&tunnel->t_dst4, ifp0); 3640 if (inm == NULL) { 3641 error = ECONNABORTED; 3642 goto remove_ucast; 3643 } 3644 break; 3645 #ifdef INET6 3646 case AF_INET6: 3647 inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error); 3648 if (inm == NULL) { 3649 /* error is already set */ 3650 goto remove_ucast; 3651 } 3652 break; 3653 #endif /* INET6 */ 3654 default: 3655 unhandled_af(tunnel->t_af); 3656 } 3657 3658 sc->sc_lhcookie = hook_establish(ifp0->if_linkstatehooks, 0, 3659 nvgre_link_change, sc); 3660 if (sc->sc_lhcookie == NULL) { 3661 error = ENOMEM; 3662 goto delmulti; 3663 } 3664 3665 sc->sc_dhcookie = hook_establish(ifp0->if_detachhooks, 0, 3666 nvgre_detach, sc); 3667 if (sc->sc_dhcookie == NULL) { 3668 error = ENOMEM; 3669 goto dislh; 3670 } 3671 3672 if_put(ifp0); 3673 3674 sc->sc_inm = inm; 3675 SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); 3676 3677 timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO); 3678 3679 return (0); 3680 3681 dislh: 3682 hook_disestablish(ifp0->if_linkstatehooks, sc->sc_lhcookie); 3683 delmulti: 3684 switch (tunnel->t_af) { 3685 case AF_INET: 3686 in_delmulti(inm); 3687 break; 3688 #ifdef INET6 3689 case AF_INET6: 3690 in6_delmulti(inm); 3691 break; 3692 #endif 3693 default: 3694 unhandled_af(tunnel->t_af); 3695 } 3696 remove_ucast: 3697 RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc); 3698 remove_mcast: 3699 RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc); 3700 put: 3701 if_put(ifp0); 3702 return (error); 3703 } 3704 3705 static int 3706 nvgre_down(struct nvgre_softc *sc) 3707 { 3708 struct gre_tunnel *tunnel = &sc->sc_tunnel; 3709 struct ifnet *ifp = &sc->sc_ac.ac_if; 3710 struct taskq *softnet = net_tq(ifp->if_index); 3711 struct ifnet *ifp0; 3712 3713 NET_ASSERT_LOCKED(); 3714 3715 CLR(ifp->if_flags, IFF_RUNNING); 3716 3717 NET_UNLOCK(); 3718 timeout_del_barrier(&sc->sc_ether_age); 3719 ifq_barrier(&ifp->if_snd); 3720 if (!task_del(softnet, &sc->sc_send_task)) 3721 taskq_barrier(softnet); 3722 NET_LOCK(); 3723 3724 mq_purge(&sc->sc_send_list); 3725 3726 ifp0 = if_get(sc->sc_ifp0); 3727 if (ifp0 != NULL) { 3728 hook_disestablish(ifp0->if_detachhooks, sc->sc_dhcookie); 3729 hook_disestablish(ifp0->if_linkstatehooks, sc->sc_lhcookie); 3730 } 3731 if_put(ifp0); 3732 3733 switch (tunnel->t_af) { 3734 case AF_INET: 3735 in_delmulti(sc->sc_inm); 3736 break; 3737 3738 #ifdef INET6 3739 case AF_INET6: 3740 in6_delmulti(sc->sc_inm); 3741 break; 3742 #endif 3743 default: 3744 unhandled_af(tunnel->t_af); 3745 } 3746 3747 RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc); 3748 RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc); 3749 3750 return (0); 3751 } 3752 3753 static void 3754 nvgre_link_change(void *arg) 3755 { 3756 /* nop */ 3757 } 3758 3759 static void 3760 nvgre_detach(void *arg) 3761 { 3762 struct nvgre_softc *sc = arg; 3763 struct ifnet *ifp = &sc->sc_ac.ac_if; 3764 3765 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 3766 nvgre_down(sc); 3767 if_down(ifp); 3768 } 3769 3770 sc->sc_ifp0 = 0; 3771 } 3772 3773 static int 3774 nvgre_set_parent(struct nvgre_softc *sc, const char *parent) 3775 { 3776 struct ifnet *ifp0; 3777 3778 ifp0 = ifunit(parent); /* doesn't need an if_put */ 3779 if (ifp0 == NULL) 3780 return (EINVAL); 3781 3782 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) 3783 return (EPROTONOSUPPORT); 3784 3785 /* commit */ 3786 sc->sc_ifp0 = ifp0->if_index; 3787 3788 return (0); 3789 } 3790 3791 static void 3792 nvgre_age(void *arg) 3793 { 3794 struct nvgre_softc *sc = arg; 3795 struct nvgre_entry *nv, *nnv; 3796 int tmo = sc->sc_ether_tmo * 2; 3797 int diff; 3798 3799 if (!ISSET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING)) 3800 return; 3801 3802 rw_enter_write(&sc->sc_ether_lock); /* XXX */ 3803 RBT_FOREACH_SAFE(nv, nvgre_map, &sc->sc_ether_map, nnv) { 3804 if (nv->nv_type != NVGRE_ENTRY_DYNAMIC) 3805 continue; 3806 3807 diff = ticks - nv->nv_age; 3808 if (diff < tmo) 3809 continue; 3810 3811 sc->sc_ether_num--; 3812 RBT_REMOVE(nvgre_map, &sc->sc_ether_map, nv); 3813 if (refcnt_rele(&nv->nv_refs)) 3814 pool_put(&nvgre_pool, nv); 3815 } 3816 rw_exit_write(&sc->sc_ether_lock); 3817 3818 timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO); 3819 } 3820 3821 static inline int 3822 nvgre_entry_valid(struct nvgre_softc *sc, const struct nvgre_entry *nv) 3823 { 3824 int diff; 3825 3826 if (nv == NULL) 3827 return (0); 3828 3829 if (nv->nv_type == NVGRE_ENTRY_STATIC) 3830 return (1); 3831 3832 diff = ticks - nv->nv_age; 3833 if (diff < sc->sc_ether_tmo) 3834 return (1); 3835 3836 return (0); 3837 } 3838 3839 static void 3840 nvgre_start(struct ifnet *ifp) 3841 { 3842 struct nvgre_softc *sc = ifp->if_softc; 3843 const struct gre_tunnel *tunnel = &sc->sc_tunnel; 3844 union gre_addr gateway; 3845 struct nvgre_entry *nv, key; 3846 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 3847 struct ether_header *eh; 3848 struct mbuf *m, *m0; 3849 #if NBPFILTER > 0 3850 caddr_t if_bpf; 3851 #endif 3852 3853 if (!gre_allow) { 3854 ifq_purge(&ifp->if_snd); 3855 return; 3856 } 3857 3858 while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) { 3859 #if NBPFILTER > 0 3860 if_bpf = ifp->if_bpf; 3861 if (if_bpf) 3862 bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT); 3863 #endif 3864 3865 eh = mtod(m0, struct ether_header *); 3866 if (ether_isbcast(eh->ether_dhost)) 3867 gateway = tunnel->t_dst; 3868 else { 3869 memcpy(&key.nv_dst, eh->ether_dhost, 3870 sizeof(key.nv_dst)); 3871 3872 rw_enter_read(&sc->sc_ether_lock); 3873 nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &key); 3874 if (nvgre_entry_valid(sc, nv)) 3875 gateway = nv->nv_gateway; 3876 else { 3877 /* "flood" to unknown hosts */ 3878 gateway = tunnel->t_dst; 3879 } 3880 rw_exit_read(&sc->sc_ether_lock); 3881 } 3882 3883 /* force prepend mbuf because of alignment problems */ 3884 m = m_get(M_DONTWAIT, m0->m_type); 3885 if (m == NULL) { 3886 m_freem(m0); 3887 continue; 3888 } 3889 3890 M_MOVE_PKTHDR(m, m0); 3891 m->m_next = m0; 3892 3893 m_align(m, 0); 3894 m->m_len = 0; 3895 3896 m = gre_encap_dst(tunnel, &gateway, m, 3897 htons(ETHERTYPE_TRANSETHER), 3898 tunnel->t_ttl, gre_l2_tos(tunnel, m)); 3899 if (m == NULL) 3900 continue; 3901 3902 m->m_flags &= ~(M_BCAST|M_MCAST); 3903 m->m_pkthdr.ph_rtableid = tunnel->t_rtableid; 3904 3905 #if NPF > 0 3906 pf_pkt_addr_changed(m); 3907 #endif 3908 3909 ml_enqueue(&ml, m); 3910 } 3911 3912 if (!ml_empty(&ml)) { 3913 if (mq_enlist(&sc->sc_send_list, &ml) == 0) 3914 task_add(net_tq(ifp->if_index), &sc->sc_send_task); 3915 /* else set OACTIVE? */ 3916 } 3917 } 3918 3919 static uint64_t 3920 nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml) 3921 { 3922 struct ip_moptions imo; 3923 struct mbuf *m; 3924 uint64_t oerrors = 0; 3925 3926 imo.imo_ifidx = sc->sc_ifp0; 3927 imo.imo_ttl = sc->sc_tunnel.t_ttl; 3928 imo.imo_loop = 0; 3929 3930 NET_RLOCK(); 3931 while ((m = ml_dequeue(ml)) != NULL) { 3932 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0) 3933 oerrors++; 3934 } 3935 NET_RUNLOCK(); 3936 3937 return (oerrors); 3938 } 3939 3940 #ifdef INET6 3941 static uint64_t 3942 nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml) 3943 { 3944 struct ip6_moptions im6o; 3945 struct mbuf *m; 3946 uint64_t oerrors = 0; 3947 3948 im6o.im6o_ifidx = sc->sc_ifp0; 3949 im6o.im6o_hlim = sc->sc_tunnel.t_ttl; 3950 im6o.im6o_loop = 0; 3951 3952 NET_RLOCK(); 3953 while ((m = ml_dequeue(ml)) != NULL) { 3954 if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0) 3955 oerrors++; 3956 } 3957 NET_RUNLOCK(); 3958 3959 return (oerrors); 3960 } 3961 #endif /* INET6 */ 3962 3963 static void 3964 nvgre_send(void *arg) 3965 { 3966 struct nvgre_softc *sc = arg; 3967 struct ifnet *ifp = &sc->sc_ac.ac_if; 3968 sa_family_t af = sc->sc_tunnel.t_af; 3969 struct mbuf_list ml; 3970 uint64_t oerrors; 3971 3972 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 3973 return; 3974 3975 mq_delist(&sc->sc_send_list, &ml); 3976 if (ml_empty(&ml)) 3977 return; 3978 3979 switch (af) { 3980 case AF_INET: 3981 oerrors = nvgre_send4(sc, &ml); 3982 break; 3983 #ifdef INET6 3984 case AF_INET6: 3985 oerrors = nvgre_send6(sc, &ml); 3986 break; 3987 #endif 3988 default: 3989 unhandled_af(af); 3990 /* NOTREACHED */ 3991 } 3992 3993 ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */ 3994 } 3995 3996 static int 3997 eoip_up(struct eoip_softc *sc) 3998 { 3999 if (sc->sc_tunnel.t_af == AF_UNSPEC) 4000 return (EDESTADDRREQ); 4001 4002 NET_ASSERT_LOCKED(); 4003 4004 if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL) 4005 return (EADDRINUSE); 4006 4007 SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); 4008 4009 if (sc->sc_ka_state != GRE_KA_NONE) { 4010 sc->sc_ka_holdmax = sc->sc_ka_count; 4011 eoip_keepalive_send(sc); 4012 } 4013 4014 return (0); 4015 } 4016 4017 static int 4018 eoip_down(struct eoip_softc *sc) 4019 { 4020 NET_ASSERT_LOCKED(); 4021 CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); 4022 4023 if (sc->sc_ka_state != GRE_KA_NONE) { 4024 timeout_del_barrier(&sc->sc_ka_hold); 4025 timeout_del_barrier(&sc->sc_ka_send); 4026 4027 sc->sc_ka_state = GRE_KA_DOWN; 4028 gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state); 4029 } 4030 4031 RBT_REMOVE(eoip_tree, &eoip_tree, sc); 4032 4033 return (0); 4034 } 4035 4036 static void 4037 eoip_start(struct ifnet *ifp) 4038 { 4039 struct eoip_softc *sc = ifp->if_softc; 4040 struct mbuf *m0, *m; 4041 #if NBPFILTER > 0 4042 caddr_t if_bpf; 4043 #endif 4044 4045 if (!gre_allow) { 4046 ifq_purge(&ifp->if_snd); 4047 return; 4048 } 4049 4050 while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) { 4051 #if NBPFILTER > 0 4052 if_bpf = ifp->if_bpf; 4053 if (if_bpf) 4054 bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT); 4055 #endif 4056 4057 /* force prepend mbuf because of alignment problems */ 4058 m = m_get(M_DONTWAIT, m0->m_type); 4059 if (m == NULL) { 4060 m_freem(m0); 4061 continue; 4062 } 4063 4064 M_MOVE_PKTHDR(m, m0); 4065 m->m_next = m0; 4066 4067 m_align(m, 0); 4068 m->m_len = 0; 4069 4070 m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m)); 4071 if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) { 4072 ifp->if_oerrors++; 4073 continue; 4074 } 4075 } 4076 } 4077 4078 static struct mbuf * 4079 eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos) 4080 { 4081 struct gre_header *gh; 4082 struct gre_h_key_eoip *eoiph; 4083 int len = m->m_pkthdr.len; 4084 4085 m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT); 4086 if (m == NULL) 4087 return (NULL); 4088 4089 gh = mtod(m, struct gre_header *); 4090 gh->gre_flags = htons(GRE_VERS_1 | GRE_KP); 4091 gh->gre_proto = htons(GRE_EOIP); 4092 4093 eoiph = (struct gre_h_key_eoip *)(gh + 1); 4094 htobem16(&eoiph->eoip_len, len); 4095 eoiph->eoip_tunnel_id = sc->sc_tunnel_id; 4096 4097 return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos)); 4098 } 4099 4100 static void 4101 eoip_keepalive_send(void *arg) 4102 { 4103 struct eoip_softc *sc = arg; 4104 struct ifnet *ifp = &sc->sc_ac.ac_if; 4105 struct mbuf *m; 4106 int linkhdr; 4107 4108 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 4109 return; 4110 4111 /* this is really conservative */ 4112 #ifdef INET6 4113 linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) + 4114 sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip); 4115 #else 4116 linkhdr = max_linkhdr + sizeof(struct ip) + 4117 sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip); 4118 #endif 4119 MGETHDR(m, M_DONTWAIT, MT_DATA); 4120 if (m == NULL) 4121 return; 4122 4123 if (linkhdr > MHLEN) { 4124 MCLGETI(m, M_DONTWAIT, NULL, linkhdr); 4125 if (!ISSET(m->m_flags, M_EXT)) { 4126 m_freem(m); 4127 return; 4128 } 4129 } 4130 4131 m->m_pkthdr.pf.prio = ifp->if_llprio; 4132 m->m_pkthdr.len = m->m_len = linkhdr; 4133 m_adj(m, linkhdr); 4134 4135 m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m)); 4136 if (m == NULL) 4137 return; 4138 4139 gre_ip_output(&sc->sc_tunnel, m); 4140 4141 timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo); 4142 } 4143 4144 static void 4145 eoip_keepalive_hold(void *arg) 4146 { 4147 struct eoip_softc *sc = arg; 4148 struct ifnet *ifp = &sc->sc_ac.ac_if; 4149 4150 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 4151 return; 4152 4153 NET_LOCK(); 4154 sc->sc_ka_state = GRE_KA_DOWN; 4155 gre_link_state(ifp, sc->sc_ka_state); 4156 NET_UNLOCK(); 4157 } 4158 4159 static void 4160 eoip_keepalive_recv(struct eoip_softc *sc) 4161 { 4162 switch (sc->sc_ka_state) { 4163 case GRE_KA_NONE: 4164 return; 4165 case GRE_KA_DOWN: 4166 sc->sc_ka_state = GRE_KA_HOLD; 4167 sc->sc_ka_holdcnt = sc->sc_ka_holdmax; 4168 sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2, 4169 16 * sc->sc_ka_count); 4170 break; 4171 case GRE_KA_HOLD: 4172 if (--sc->sc_ka_holdcnt > 0) 4173 break; 4174 4175 sc->sc_ka_state = GRE_KA_UP; 4176 gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state); 4177 break; 4178 4179 case GRE_KA_UP: 4180 sc->sc_ka_holdmax--; 4181 sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count); 4182 break; 4183 } 4184 4185 timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count); 4186 } 4187 4188 static struct mbuf * 4189 eoip_input(struct gre_tunnel *key, struct mbuf *m, 4190 const struct gre_header *gh, uint8_t otos, int iphlen) 4191 { 4192 struct eoip_softc *sc; 4193 struct gre_h_key_eoip *eoiph; 4194 int hlen, len; 4195 caddr_t buf; 4196 4197 if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1)) 4198 goto decline; 4199 4200 hlen = iphlen + sizeof(*gh) + sizeof(*eoiph); 4201 if (m->m_pkthdr.len < hlen) 4202 goto decline; 4203 4204 m = m_pullup(m, hlen); 4205 if (m == NULL) 4206 return (NULL); 4207 4208 buf = mtod(m, caddr_t); 4209 gh = (struct gre_header *)(buf + iphlen); 4210 eoiph = (struct gre_h_key_eoip *)(gh + 1); 4211 4212 key->t_key = eoiph->eoip_tunnel_id; 4213 4214 NET_ASSERT_LOCKED(); 4215 sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key); 4216 if (sc == NULL) 4217 goto decline; 4218 4219 /* it's ours now */ 4220 len = bemtoh16(&eoiph->eoip_len); 4221 if (len == 0) { 4222 eoip_keepalive_recv(sc); 4223 goto drop; 4224 } 4225 4226 m = gre_ether_align(m, hlen); 4227 if (m == NULL) 4228 return (NULL); 4229 4230 if (m->m_pkthdr.len < len) 4231 goto drop; 4232 if (m->m_pkthdr.len != len) 4233 m_adj(m, len - m->m_pkthdr.len); 4234 4235 gre_l2_prio(&sc->sc_tunnel, m, otos); 4236 4237 m->m_flags &= ~(M_MCAST|M_BCAST); 4238 4239 #if NPF > 0 4240 pf_pkt_addr_changed(m); 4241 #endif 4242 4243 if_vinput(&sc->sc_ac.ac_if, m); 4244 4245 return (NULL); 4246 4247 decline: 4248 return (m); 4249 drop: 4250 m_freem(m); 4251 return (NULL); 4252 } 4253 4254 int 4255 gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 4256 size_t newlen) 4257 { 4258 int error; 4259 4260 /* All sysctl names at this level are terminal. */ 4261 if (namelen != 1) 4262 return (ENOTDIR); 4263 4264 switch (name[0]) { 4265 case GRECTL_ALLOW: 4266 NET_LOCK(); 4267 error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow); 4268 NET_UNLOCK(); 4269 return (error); 4270 case GRECTL_WCCP: 4271 NET_LOCK(); 4272 error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp); 4273 NET_UNLOCK(); 4274 return (error); 4275 default: 4276 return (ENOPROTOOPT); 4277 } 4278 /* NOTREACHED */ 4279 } 4280 4281 static inline int 4282 gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b) 4283 { 4284 switch (af) { 4285 #ifdef INET6 4286 case AF_INET6: 4287 return (memcmp(&a->in6, &b->in6, sizeof(a->in6))); 4288 #endif /* INET6 */ 4289 case AF_INET: 4290 return (memcmp(&a->in4, &b->in4, sizeof(a->in4))); 4291 default: 4292 panic("%s: unsupported af %d\n", __func__, af); 4293 } 4294 4295 return (0); 4296 } 4297 4298 static int 4299 gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b) 4300 { 4301 uint32_t ka, kb; 4302 uint32_t mask; 4303 int rv; 4304 4305 /* is K set at all? */ 4306 ka = a->t_key_mask & GRE_KEY_ENTROPY; 4307 kb = b->t_key_mask & GRE_KEY_ENTROPY; 4308 4309 /* sort by whether K is set */ 4310 if (ka > kb) 4311 return (1); 4312 if (ka < kb) 4313 return (-1); 4314 4315 /* is K set on both? */ 4316 if (ka != GRE_KEY_NONE) { 4317 /* get common prefix */ 4318 mask = a->t_key_mask & b->t_key_mask; 4319 4320 ka = a->t_key & mask; 4321 kb = b->t_key & mask; 4322 4323 /* sort by common prefix */ 4324 if (ka > kb) 4325 return (1); 4326 if (ka < kb) 4327 return (-1); 4328 } 4329 4330 /* sort by routing table */ 4331 if (a->t_rtableid > b->t_rtableid) 4332 return (1); 4333 if (a->t_rtableid < b->t_rtableid) 4334 return (-1); 4335 4336 /* sort by address */ 4337 if (a->t_af > b->t_af) 4338 return (1); 4339 if (a->t_af < b->t_af) 4340 return (-1); 4341 4342 rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src); 4343 if (rv != 0) 4344 return (rv); 4345 4346 return (0); 4347 } 4348 4349 static int 4350 gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b) 4351 { 4352 int rv; 4353 4354 rv = gre_cmp_src(a, b); 4355 if (rv != 0) 4356 return (rv); 4357 4358 return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst)); 4359 } 4360 4361 static inline int 4362 mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b) 4363 { 4364 return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel)); 4365 } 4366 4367 RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp); 4368 4369 static inline int 4370 egre_cmp(const struct egre_softc *a, const struct egre_softc *b) 4371 { 4372 return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel)); 4373 } 4374 4375 RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp); 4376 4377 static inline int 4378 nvgre_entry_cmp(const struct nvgre_entry *a, const struct nvgre_entry *b) 4379 { 4380 return (memcmp(&a->nv_dst, &b->nv_dst, sizeof(a->nv_dst))); 4381 } 4382 4383 RBT_GENERATE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp); 4384 4385 static int 4386 nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b) 4387 { 4388 uint32_t ka, kb; 4389 4390 ka = a->t_key & GRE_KEY_ENTROPY; 4391 kb = b->t_key & GRE_KEY_ENTROPY; 4392 4393 /* sort by common prefix */ 4394 if (ka > kb) 4395 return (1); 4396 if (ka < kb) 4397 return (-1); 4398 4399 /* sort by routing table */ 4400 if (a->t_rtableid > b->t_rtableid) 4401 return (1); 4402 if (a->t_rtableid < b->t_rtableid) 4403 return (-1); 4404 4405 /* sort by address */ 4406 if (a->t_af > b->t_af) 4407 return (1); 4408 if (a->t_af < b->t_af) 4409 return (-1); 4410 4411 return (0); 4412 } 4413 4414 static inline int 4415 nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb) 4416 { 4417 const struct gre_tunnel *a = &na->sc_tunnel; 4418 const struct gre_tunnel *b = &nb->sc_tunnel; 4419 int rv; 4420 4421 rv = nvgre_cmp_tunnel(a, b); 4422 if (rv != 0) 4423 return (rv); 4424 4425 rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src); 4426 if (rv != 0) 4427 return (rv); 4428 4429 return (0); 4430 } 4431 4432 static int 4433 nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa, 4434 unsigned int if0idxa, const struct gre_tunnel *b, 4435 const union gre_addr *ab,unsigned int if0idxb) 4436 { 4437 int rv; 4438 4439 rv = nvgre_cmp_tunnel(a, b); 4440 if (rv != 0) 4441 return (rv); 4442 4443 rv = gre_ip_cmp(a->t_af, aa, ab); 4444 if (rv != 0) 4445 return (rv); 4446 4447 if (if0idxa > if0idxb) 4448 return (1); 4449 if (if0idxa < if0idxb) 4450 return (-1); 4451 4452 return (0); 4453 } 4454 4455 static inline int 4456 nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb) 4457 { 4458 const struct gre_tunnel *a = &na->sc_tunnel; 4459 const struct gre_tunnel *b = &nb->sc_tunnel; 4460 4461 return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0, 4462 b, &b->t_dst, nb->sc_ifp0)); 4463 } 4464 4465 RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast); 4466 RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc); 4467 4468 static inline int 4469 eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb) 4470 { 4471 const struct gre_tunnel *a = &ea->sc_tunnel; 4472 const struct gre_tunnel *b = &eb->sc_tunnel; 4473 int rv; 4474 4475 if (a->t_key > b->t_key) 4476 return (1); 4477 if (a->t_key < b->t_key) 4478 return (-1); 4479 4480 /* sort by routing table */ 4481 if (a->t_rtableid > b->t_rtableid) 4482 return (1); 4483 if (a->t_rtableid < b->t_rtableid) 4484 return (-1); 4485 4486 /* sort by address */ 4487 if (a->t_af > b->t_af) 4488 return (1); 4489 if (a->t_af < b->t_af) 4490 return (-1); 4491 4492 rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src); 4493 if (rv != 0) 4494 return (rv); 4495 4496 rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst); 4497 if (rv != 0) 4498 return (rv); 4499 4500 return (0); 4501 } 4502 4503 RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp); 4504