1 /* $OpenBSD: if_tun.c,v 1.250 2024/12/30 02:46:00 guenther Exp $ */ 2 /* $NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk> 6 * Nottingham University 1987. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * This driver takes packets off the IP i/f and hands them up to a 32 * user process to have its wicked way with. This driver has its 33 * roots in a similar driver written by Phil Cockcroft (formerly) at 34 * UCL. This driver is based much more on read/write/select mode of 35 * operation though. 36 */ 37 38 /* #define TUN_DEBUG 9 */ 39 40 #include <sys/param.h> 41 #include <sys/kernel.h> 42 #include <sys/proc.h> 43 #include <sys/systm.h> 44 #include <sys/mbuf.h> 45 #include <sys/sigio.h> 46 #include <sys/socket.h> 47 #include <sys/ioctl.h> 48 #include <sys/errno.h> 49 #include <sys/syslog.h> 50 #include <sys/fcntl.h> 51 #include <sys/time.h> 52 #include <sys/device.h> 53 #include <sys/vnode.h> 54 #include <sys/signalvar.h> 55 #include <sys/conf.h> 56 #include <sys/event.h> 57 #include <sys/mutex.h> 58 #include <sys/smr.h> 59 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/netisr.h> 63 #include <net/rtable.h> 64 65 #include <netinet/in.h> 66 #include <netinet/if_ether.h> 67 68 #include "bpfilter.h" 69 #if NBPFILTER > 0 70 #include <net/bpf.h> 71 #endif 72 73 #ifdef MPLS 74 #include <netmpls/mpls.h> 75 #endif /* MPLS */ 76 77 #include <net/if_tun.h> 78 79 struct tun_softc { 80 struct arpcom sc_ac; /* ethernet common data */ 81 #define sc_if sc_ac.ac_if 82 struct mutex sc_mtx; 83 struct klist sc_rklist; /* knotes for read */ 84 struct klist sc_wklist; /* knotes for write (unused) */ 85 SMR_LIST_ENTRY(tun_softc) 86 sc_entry; /* all tunnel interfaces */ 87 int sc_unit; 88 struct sigio_ref sc_sigio; /* async I/O registration */ 89 unsigned int sc_flags; /* misc flags */ 90 #define TUN_DEAD (1 << 16) 91 #define TUN_HDR (1 << 17) 92 93 dev_t sc_dev; 94 struct refcnt sc_refs; 95 unsigned int sc_reading; 96 }; 97 98 #ifdef TUN_DEBUG 99 int tundebug = TUN_DEBUG; 100 #define TUNDEBUG(a) (tundebug? printf a : 0) 101 #else 102 #define TUNDEBUG(a) /* (tundebug? printf a : 0) */ 103 #endif 104 105 /* Pretend that these IFF flags are changeable by TUNSIFINFO */ 106 #define TUN_IFF_FLAGS (IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST) 107 108 #define TUN_IF_CAPS ( \ 109 IFCAP_CSUM_IPv4 | \ 110 IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4|IFCAP_CSUM_TCPv6|IFCAP_CSUM_UDPv6 | \ 111 IFCAP_VLAN_MTU|IFCAP_VLAN_HWTAGGING|IFCAP_VLAN_HWOFFLOAD | \ 112 IFCAP_TSOv4|IFCAP_TSOv6|IFCAP_LRO \ 113 ) 114 115 void tunattach(int); 116 117 int tun_dev_open(dev_t, const struct if_clone *, int, struct proc *); 118 int tun_dev_close(dev_t, struct proc *); 119 int tun_dev_ioctl(dev_t, u_long, void *); 120 int tun_dev_read(dev_t, struct uio *, int); 121 int tun_dev_write(dev_t, struct uio *, int, int); 122 int tun_dev_kqfilter(dev_t, struct knote *); 123 124 int tun_ioctl(struct ifnet *, u_long, caddr_t); 125 void tun_input(struct ifnet *, struct mbuf *); 126 int tun_output(struct ifnet *, struct mbuf *, struct sockaddr *, 127 struct rtentry *); 128 int tun_enqueue(struct ifnet *, struct mbuf *); 129 int tun_clone_create(struct if_clone *, int); 130 int tap_clone_create(struct if_clone *, int); 131 int tun_create(struct if_clone *, int, int); 132 int tun_clone_destroy(struct ifnet *); 133 void tun_wakeup(struct tun_softc *); 134 void tun_start(struct ifnet *); 135 int filt_tunread(struct knote *, long); 136 int filt_tunwrite(struct knote *, long); 137 int filt_tunmodify(struct kevent *, struct knote *); 138 int filt_tunprocess(struct knote *, struct kevent *); 139 void filt_tunrdetach(struct knote *); 140 void filt_tunwdetach(struct knote *); 141 void tun_link_state(struct ifnet *, int); 142 143 const struct filterops tunread_filtops = { 144 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 145 .f_attach = NULL, 146 .f_detach = filt_tunrdetach, 147 .f_event = filt_tunread, 148 .f_modify = filt_tunmodify, 149 .f_process = filt_tunprocess, 150 }; 151 152 const struct filterops tunwrite_filtops = { 153 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 154 .f_attach = NULL, 155 .f_detach = filt_tunwdetach, 156 .f_event = filt_tunwrite, 157 .f_modify = filt_tunmodify, 158 .f_process = filt_tunprocess, 159 }; 160 161 SMR_LIST_HEAD(tun_list, tun_softc); 162 163 struct if_clone tun_cloner = 164 IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy); 165 166 struct if_clone tap_cloner = 167 IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy); 168 169 void 170 tunattach(int n) 171 { 172 if_clone_attach(&tun_cloner); 173 if_clone_attach(&tap_cloner); 174 } 175 176 int 177 tun_clone_create(struct if_clone *ifc, int unit) 178 { 179 return (tun_create(ifc, unit, 0)); 180 } 181 182 int 183 tap_clone_create(struct if_clone *ifc, int unit) 184 { 185 return (tun_create(ifc, unit, TUN_LAYER2)); 186 } 187 188 struct tun_list tun_devs_list = SMR_LIST_HEAD_INITIALIZER(tun_list); 189 190 struct tun_softc * 191 tun_name_lookup(const char *name) 192 { 193 struct tun_softc *sc; 194 195 KERNEL_ASSERT_LOCKED(); 196 197 SMR_LIST_FOREACH_LOCKED(sc, &tun_devs_list, sc_entry) { 198 if (strcmp(sc->sc_if.if_xname, name) == 0) 199 return (sc); 200 } 201 202 return (NULL); 203 } 204 205 int 206 tun_insert(struct tun_softc *sc) 207 { 208 int error = 0; 209 210 /* check for a race */ 211 if (tun_name_lookup(sc->sc_if.if_xname) != NULL) 212 error = EEXIST; 213 else { 214 /* tun_name_lookup checks for the right lock already */ 215 SMR_LIST_INSERT_HEAD_LOCKED(&tun_devs_list, sc, sc_entry); 216 } 217 218 return (error); 219 } 220 221 int 222 tun_create(struct if_clone *ifc, int unit, int flags) 223 { 224 struct tun_softc *sc; 225 struct ifnet *ifp; 226 227 if (unit > minor(~0U)) 228 return (ENXIO); 229 230 KERNEL_ASSERT_LOCKED(); 231 232 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 233 refcnt_init(&sc->sc_refs); 234 235 ifp = &sc->sc_if; 236 snprintf(ifp->if_xname, sizeof(ifp->if_xname), 237 "%s%d", ifc->ifc_name, unit); 238 mtx_init(&sc->sc_mtx, IPL_NET); 239 klist_init_mutex(&sc->sc_rklist, &sc->sc_mtx); 240 klist_init_mutex(&sc->sc_wklist, &sc->sc_mtx); 241 ifp->if_softc = sc; 242 243 /* this is enough state for tun_dev_open to work with */ 244 245 if (tun_insert(sc) != 0) 246 goto exists; 247 248 /* build the interface */ 249 250 ifp->if_ioctl = tun_ioctl; 251 ifp->if_enqueue = tun_enqueue; 252 ifp->if_start = tun_start; 253 ifp->if_hardmtu = TUNMRU; 254 ifp->if_link_state = LINK_STATE_DOWN; 255 256 if_counters_alloc(ifp); 257 258 if ((flags & TUN_LAYER2) == 0) { 259 #if NBPFILTER > 0 260 ifp->if_bpf_mtap = bpf_mtap; 261 #endif 262 ifp->if_input = tun_input; 263 ifp->if_output = tun_output; 264 ifp->if_mtu = ETHERMTU; 265 ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST); 266 ifp->if_type = IFT_TUNNEL; 267 ifp->if_hdrlen = sizeof(u_int32_t); 268 ifp->if_rtrequest = p2p_rtrequest; 269 270 if_attach(ifp); 271 if_alloc_sadl(ifp); 272 273 #if NBPFILTER > 0 274 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t)); 275 #endif 276 } else { 277 sc->sc_flags |= TUN_LAYER2; 278 ether_fakeaddr(ifp); 279 ifp->if_flags = 280 (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST); 281 282 if_attach(ifp); 283 ether_ifattach(ifp); 284 } 285 286 sigio_init(&sc->sc_sigio); 287 288 /* tell tun_dev_open we're initialised */ 289 290 sc->sc_flags |= TUN_INITED|TUN_STAYUP; 291 wakeup(sc); 292 293 return (0); 294 295 exists: 296 klist_free(&sc->sc_rklist); 297 klist_free(&sc->sc_wklist); 298 free(sc, M_DEVBUF, sizeof(*sc)); 299 return (EEXIST); 300 } 301 302 int 303 tun_clone_destroy(struct ifnet *ifp) 304 { 305 struct tun_softc *sc = ifp->if_softc; 306 dev_t dev; 307 308 KERNEL_ASSERT_LOCKED(); 309 310 if (ISSET(sc->sc_flags, TUN_DEAD)) 311 return (ENXIO); 312 SET(sc->sc_flags, TUN_DEAD); 313 314 /* kick userland off the device */ 315 dev = sc->sc_dev; 316 if (dev) { 317 struct vnode *vp; 318 319 if (vfinddev(dev, VCHR, &vp)) 320 VOP_REVOKE(vp, REVOKEALL); 321 322 KASSERT(sc->sc_dev == 0); 323 } 324 325 /* prevent userland from getting to the device again */ 326 SMR_LIST_REMOVE_LOCKED(sc, sc_entry); 327 smr_barrier(); 328 329 /* help read() give up */ 330 if (sc->sc_reading) 331 wakeup(&ifp->if_snd); 332 333 /* wait for device entrypoints to finish */ 334 refcnt_finalize(&sc->sc_refs, "tundtor"); 335 336 klist_invalidate(&sc->sc_rklist); 337 klist_invalidate(&sc->sc_wklist); 338 339 klist_free(&sc->sc_rklist); 340 klist_free(&sc->sc_wklist); 341 342 if (ISSET(sc->sc_flags, TUN_LAYER2)) 343 ether_ifdetach(ifp); 344 345 if_detach(ifp); 346 sigio_free(&sc->sc_sigio); 347 348 free(sc, M_DEVBUF, sizeof *sc); 349 return (0); 350 } 351 352 static struct tun_softc * 353 tun_get(dev_t dev) 354 { 355 struct tun_softc *sc; 356 357 smr_read_enter(); 358 SMR_LIST_FOREACH(sc, &tun_devs_list, sc_entry) { 359 if (sc->sc_dev == dev) { 360 refcnt_take(&sc->sc_refs); 361 break; 362 } 363 } 364 smr_read_leave(); 365 366 return (sc); 367 } 368 369 static inline void 370 tun_put(struct tun_softc *sc) 371 { 372 refcnt_rele_wake(&sc->sc_refs); 373 } 374 375 int 376 tunopen(dev_t dev, int flag, int mode, struct proc *p) 377 { 378 return (tun_dev_open(dev, &tun_cloner, mode, p)); 379 } 380 381 int 382 tapopen(dev_t dev, int flag, int mode, struct proc *p) 383 { 384 return (tun_dev_open(dev, &tap_cloner, mode, p)); 385 } 386 387 int 388 tun_dev_open(dev_t dev, const struct if_clone *ifc, int mode, struct proc *p) 389 { 390 struct tun_softc *sc; 391 struct ifnet *ifp; 392 int error; 393 u_short stayup = 0; 394 struct vnode *vp; 395 396 char name[IFNAMSIZ]; 397 unsigned int rdomain; 398 399 /* 400 * Find the vnode associated with this open before we sleep 401 * and let something else revoke it. Our caller has a reference 402 * to it so we don't need to account for it. 403 */ 404 if (!vfinddev(dev, VCHR, &vp)) 405 panic("%s vfinddev failed", __func__); 406 407 snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev)); 408 rdomain = rtable_l2(p->p_p->ps_rtableid); 409 410 /* let's find or make an interface to work with */ 411 while ((sc = tun_name_lookup(name)) == NULL) { 412 error = if_clone_create(name, rdomain); 413 switch (error) { 414 case 0: /* it's probably ours */ 415 stayup = TUN_STAYUP; 416 /* FALLTHROUGH */ 417 case EEXIST: /* we may have lost a race with someone else */ 418 break; 419 default: 420 return (error); 421 } 422 } 423 424 refcnt_take(&sc->sc_refs); 425 426 /* wait for it to be fully constructed before we use it */ 427 for (;;) { 428 if (ISSET(sc->sc_flags, TUN_DEAD)) { 429 error = ENXIO; 430 goto done; 431 } 432 433 if (ISSET(sc->sc_flags, TUN_INITED)) 434 break; 435 436 error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP); 437 if (error != 0) { 438 /* XXX if_clone_destroy if stayup? */ 439 goto done; 440 } 441 } 442 443 /* Has tun_clone_destroy torn the rug out under us? */ 444 if (vp->v_type == VBAD) { 445 error = ENXIO; 446 goto done; 447 } 448 449 if (sc->sc_dev != 0) { 450 /* aww, we lost */ 451 error = EBUSY; 452 goto done; 453 } 454 /* it's ours now */ 455 sc->sc_dev = dev; 456 CLR(sc->sc_flags, stayup); 457 458 /* automatically mark the interface running on open */ 459 ifp = &sc->sc_if; 460 NET_LOCK(); 461 SET(ifp->if_flags, IFF_UP | IFF_RUNNING); 462 NET_UNLOCK(); 463 tun_link_state(ifp, LINK_STATE_FULL_DUPLEX); 464 error = 0; 465 466 done: 467 tun_put(sc); 468 return (error); 469 } 470 471 /* 472 * tunclose - close the device; if closing the real device, flush pending 473 * output and unless STAYUP bring down and destroy the interface. 474 */ 475 int 476 tunclose(dev_t dev, int flag, int mode, struct proc *p) 477 { 478 return (tun_dev_close(dev, p)); 479 } 480 481 int 482 tapclose(dev_t dev, int flag, int mode, struct proc *p) 483 { 484 return (tun_dev_close(dev, p)); 485 } 486 487 int 488 tun_dev_close(dev_t dev, struct proc *p) 489 { 490 struct tun_softc *sc; 491 struct ifnet *ifp; 492 int error = 0; 493 char name[IFNAMSIZ]; 494 int destroy = 0; 495 496 sc = tun_get(dev); 497 if (sc == NULL) 498 return (ENXIO); 499 500 ifp = &sc->sc_if; 501 502 /* 503 * junk all pending output 504 */ 505 NET_LOCK(); 506 CLR(ifp->if_flags, IFF_UP | IFF_RUNNING); 507 CLR(ifp->if_capabilities, TUN_IF_CAPS); 508 NET_UNLOCK(); 509 ifq_purge(&ifp->if_snd); 510 511 CLR(sc->sc_flags, TUN_ASYNC|TUN_HDR); 512 sigio_free(&sc->sc_sigio); 513 514 if (!ISSET(sc->sc_flags, TUN_DEAD)) { 515 /* we can't hold a reference to sc before we start a dtor */ 516 if (!ISSET(sc->sc_flags, TUN_STAYUP)) { 517 destroy = 1; 518 strlcpy(name, ifp->if_xname, sizeof(name)); 519 } else { 520 tun_link_state(ifp, LINK_STATE_DOWN); 521 } 522 } 523 524 sc->sc_dev = 0; 525 526 tun_put(sc); 527 528 if (destroy) 529 if_clone_destroy(name); 530 531 return (error); 532 } 533 534 /* 535 * Process an ioctl request. 536 */ 537 int 538 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 539 { 540 struct tun_softc *sc = (struct tun_softc *)(ifp->if_softc); 541 struct ifreq *ifr = (struct ifreq *)data; 542 int error = 0; 543 544 switch (cmd) { 545 case SIOCSIFADDR: 546 SET(ifp->if_flags, IFF_UP); 547 /* FALLTHROUGH */ 548 case SIOCSIFFLAGS: 549 if (ISSET(ifp->if_flags, IFF_UP)) 550 SET(ifp->if_flags, IFF_RUNNING); 551 else 552 CLR(ifp->if_flags, IFF_RUNNING); 553 break; 554 555 case SIOCSIFMTU: 556 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU) 557 error = EINVAL; 558 else 559 ifp->if_mtu = ifr->ifr_mtu; 560 break; 561 case SIOCADDMULTI: 562 case SIOCDELMULTI: 563 break; 564 default: 565 if (sc->sc_flags & TUN_LAYER2) 566 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 567 else 568 error = ENOTTY; 569 } 570 571 return (error); 572 } 573 574 /* 575 * tun_output - queue packets from higher level ready to put out. 576 */ 577 int 578 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst, 579 struct rtentry *rt) 580 { 581 u_int32_t *af; 582 583 if (!ISSET(ifp->if_flags, IFF_RUNNING)) { 584 m_freem(m0); 585 return (EHOSTDOWN); 586 } 587 588 M_PREPEND(m0, sizeof(*af), M_DONTWAIT); 589 if (m0 == NULL) 590 return (ENOBUFS); 591 af = mtod(m0, u_int32_t *); 592 *af = htonl(dst->sa_family); 593 594 return (if_enqueue(ifp, m0)); 595 } 596 597 int 598 tun_enqueue(struct ifnet *ifp, struct mbuf *m0) 599 { 600 struct tun_softc *sc = ifp->if_softc; 601 int error; 602 603 error = ifq_enqueue(&ifp->if_snd, m0); 604 if (error != 0) 605 return (error); 606 607 tun_wakeup(sc); 608 609 return (0); 610 } 611 612 void 613 tun_wakeup(struct tun_softc *sc) 614 { 615 if (sc->sc_reading) 616 wakeup(&sc->sc_if.if_snd); 617 618 knote(&sc->sc_rklist, 0); 619 620 if (sc->sc_flags & TUN_ASYNC) 621 pgsigio(&sc->sc_sigio, SIGIO, 0); 622 } 623 624 /* 625 * the cdevsw interface is now pretty minimal. 626 */ 627 int 628 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) 629 { 630 return (tun_dev_ioctl(dev, cmd, data)); 631 } 632 633 int 634 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) 635 { 636 return (tun_dev_ioctl(dev, cmd, data)); 637 } 638 639 static int 640 tun_set_capabilities(struct tun_softc *sc, const struct tun_capabilities *cap) 641 { 642 if (ISSET(cap->tun_if_capabilities, ~TUN_IF_CAPS)) 643 return (EINVAL); 644 645 KERNEL_ASSERT_LOCKED(); 646 SET(sc->sc_flags, TUN_HDR); 647 648 NET_LOCK(); 649 CLR(sc->sc_if.if_capabilities, TUN_IF_CAPS); 650 SET(sc->sc_if.if_capabilities, cap->tun_if_capabilities); 651 NET_UNLOCK(); 652 return (0); 653 } 654 655 static int 656 tun_get_capabilities(struct tun_softc *sc, struct tun_capabilities *cap) 657 { 658 int error = 0; 659 660 NET_LOCK_SHARED(); 661 if (ISSET(sc->sc_flags, TUN_HDR)) { 662 cap->tun_if_capabilities = 663 (sc->sc_if.if_capabilities & TUN_IF_CAPS); 664 } else 665 error = ENODEV; 666 NET_UNLOCK_SHARED(); 667 668 return (error); 669 } 670 671 static int 672 tun_del_capabilities(struct tun_softc *sc) 673 { 674 NET_LOCK(); 675 CLR(sc->sc_if.if_capabilities, TUN_IF_CAPS); 676 NET_UNLOCK(); 677 678 KERNEL_ASSERT_LOCKED(); 679 CLR(sc->sc_flags, TUN_HDR); 680 681 return (0); 682 } 683 684 static int 685 tun_hdatalen(struct tun_softc *sc) 686 { 687 struct ifnet *ifp = &sc->sc_if; 688 int len; 689 690 len = ifq_hdatalen(&ifp->if_snd); 691 if (len > 0 && ISSET(sc->sc_flags, TUN_HDR)) 692 len += sizeof(struct tun_hdr); 693 694 return (len); 695 } 696 697 int 698 tun_dev_ioctl(dev_t dev, u_long cmd, void *data) 699 { 700 struct tun_softc *sc; 701 struct tuninfo *tunp; 702 int error = 0; 703 704 sc = tun_get(dev); 705 if (sc == NULL) 706 return (ENXIO); 707 708 switch (cmd) { 709 case TUNSIFINFO: 710 tunp = (struct tuninfo *)data; 711 if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) { 712 error = EINVAL; 713 break; 714 } 715 if (tunp->type != sc->sc_if.if_type) { 716 error = EINVAL; 717 break; 718 } 719 if (tunp->flags != (sc->sc_if.if_flags & TUN_IFF_FLAGS)) { 720 error = EINVAL; 721 break; 722 } 723 sc->sc_if.if_mtu = tunp->mtu; 724 sc->sc_if.if_baudrate = tunp->baudrate; 725 break; 726 case TUNGIFINFO: 727 tunp = (struct tuninfo *)data; 728 tunp->mtu = sc->sc_if.if_mtu; 729 tunp->type = sc->sc_if.if_type; 730 tunp->flags = sc->sc_if.if_flags & TUN_IFF_FLAGS; 731 tunp->baudrate = sc->sc_if.if_baudrate; 732 break; 733 #ifdef TUN_DEBUG 734 case TUNSDEBUG: 735 tundebug = *(int *)data; 736 break; 737 case TUNGDEBUG: 738 *(int *)data = tundebug; 739 break; 740 #endif 741 case TUNSIFMODE: 742 if (*(int *)data != (sc->sc_if.if_flags & TUN_IFF_FLAGS)) { 743 error = EINVAL; 744 break; 745 } 746 break; 747 748 case TUNSCAP: 749 error = tun_set_capabilities(sc, 750 (const struct tun_capabilities *)data); 751 break; 752 case TUNGCAP: 753 error = tun_get_capabilities(sc, 754 (struct tun_capabilities *)data); 755 break; 756 case TUNDCAP: 757 error = tun_del_capabilities(sc); 758 break; 759 760 case FIOASYNC: 761 if (*(int *)data) 762 sc->sc_flags |= TUN_ASYNC; 763 else 764 sc->sc_flags &= ~TUN_ASYNC; 765 break; 766 case FIONREAD: 767 *(int *)data = tun_hdatalen(sc); 768 break; 769 case FIOSETOWN: 770 case TIOCSPGRP: 771 error = sigio_setown(&sc->sc_sigio, cmd, data); 772 break; 773 case FIOGETOWN: 774 case TIOCGPGRP: 775 sigio_getown(&sc->sc_sigio, cmd, data); 776 break; 777 case SIOCGIFADDR: 778 if (!(sc->sc_flags & TUN_LAYER2)) { 779 error = EINVAL; 780 break; 781 } 782 bcopy(sc->sc_ac.ac_enaddr, data, 783 sizeof(sc->sc_ac.ac_enaddr)); 784 break; 785 786 case SIOCSIFADDR: 787 if (!(sc->sc_flags & TUN_LAYER2)) { 788 error = EINVAL; 789 break; 790 } 791 bcopy(data, sc->sc_ac.ac_enaddr, 792 sizeof(sc->sc_ac.ac_enaddr)); 793 break; 794 default: 795 error = ENOTTY; 796 break; 797 } 798 799 tun_put(sc); 800 return (error); 801 } 802 803 /* 804 * The cdevsw read interface - reads a packet at a time, or at 805 * least as much of a packet as can be read. 806 */ 807 int 808 tunread(dev_t dev, struct uio *uio, int ioflag) 809 { 810 return (tun_dev_read(dev, uio, ioflag)); 811 } 812 813 int 814 tapread(dev_t dev, struct uio *uio, int ioflag) 815 { 816 return (tun_dev_read(dev, uio, ioflag)); 817 } 818 819 int 820 tun_dev_read(dev_t dev, struct uio *uio, int ioflag) 821 { 822 struct tun_softc *sc; 823 struct ifnet *ifp; 824 struct mbuf *m, *m0; 825 size_t len; 826 int error = 0; 827 828 sc = tun_get(dev); 829 if (sc == NULL) 830 return (ENXIO); 831 832 ifp = &sc->sc_if; 833 834 error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY), 835 (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev); 836 if (error != 0) 837 goto put; 838 839 #if NBPFILTER > 0 840 if (ifp->if_bpf) 841 bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT); 842 #endif 843 844 if (ISSET(sc->sc_flags, TUN_HDR)) { 845 struct tun_hdr th; 846 847 KASSERT(ISSET(m0->m_flags, M_PKTHDR)); 848 849 th.th_flags = 0; 850 if (ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) 851 SET(th.th_flags, TUN_H_IPV4_CSUM); 852 if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) 853 SET(th.th_flags, TUN_H_TCP_CSUM); 854 if (ISSET(m0->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) 855 SET(th.th_flags, TUN_H_UDP_CSUM); 856 if (ISSET(m0->m_pkthdr.csum_flags, M_ICMP_CSUM_OUT)) 857 SET(th.th_flags, TUN_H_ICMP_CSUM); 858 859 th.th_pad = 0; 860 861 th.th_vtag = 0; 862 if (ISSET(m0->m_flags, M_VLANTAG)) { 863 SET(th.th_flags, TUN_H_VTAG); 864 th.th_vtag = m0->m_pkthdr.ether_vtag; 865 } 866 867 th.th_mss = 0; 868 if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) { 869 SET(th.th_flags, TUN_H_TCP_MSS); 870 th.th_mss = m0->m_pkthdr.ph_mss; 871 } 872 873 len = ulmin(uio->uio_resid, sizeof(th)); 874 if (len > 0) { 875 error = uiomove(&th, len, uio); 876 if (error != 0) 877 goto free; 878 } 879 } 880 881 m = m0; 882 while (uio->uio_resid > 0) { 883 len = ulmin(uio->uio_resid, m->m_len); 884 if (len > 0) { 885 error = uiomove(mtod(m, void *), len, uio); 886 if (error != 0) 887 break; 888 } 889 890 m = m->m_next; 891 if (m == NULL) 892 break; 893 } 894 895 free: 896 m_freem(m0); 897 898 put: 899 tun_put(sc); 900 return (error); 901 } 902 903 /* 904 * the cdevsw write interface - an atomic write is a packet - or else! 905 */ 906 int 907 tunwrite(dev_t dev, struct uio *uio, int ioflag) 908 { 909 return (tun_dev_write(dev, uio, ioflag, 0)); 910 } 911 912 int 913 tapwrite(dev_t dev, struct uio *uio, int ioflag) 914 { 915 return (tun_dev_write(dev, uio, ioflag, ETHER_ALIGN)); 916 } 917 918 int 919 tun_dev_write(dev_t dev, struct uio *uio, int ioflag, int align) 920 { 921 struct tun_softc *sc; 922 struct ifnet *ifp; 923 struct mbuf *m0, *m, *n; 924 int error = 0; 925 size_t len, alen, mlen; 926 size_t hlen; 927 struct tun_hdr th; 928 929 sc = tun_get(dev); 930 if (sc == NULL) 931 return (ENXIO); 932 933 ifp = &sc->sc_if; 934 935 hlen = ifp->if_hdrlen; 936 if (ISSET(sc->sc_flags, TUN_HDR)) 937 hlen += sizeof(th); 938 if (uio->uio_resid < hlen || 939 uio->uio_resid > (hlen + MAXMCLBYTES)) { 940 error = EMSGSIZE; 941 goto put; 942 } 943 944 m0 = m_gethdr(M_DONTWAIT, MT_DATA); 945 if (m0 == NULL) { 946 error = ENOMEM; 947 goto put; 948 } 949 950 if (ISSET(sc->sc_flags, TUN_HDR)) { 951 error = uiomove(&th, sizeof(th), uio); 952 if (error != 0) 953 goto drop; 954 955 if (ISSET(th.th_flags, TUN_H_IPV4_CSUM)) { 956 SET(m0->m_pkthdr.csum_flags, 957 M_IPV4_CSUM_OUT | M_IPV4_CSUM_IN_OK); 958 } 959 960 switch (th.th_flags & 961 (TUN_H_TCP_CSUM|TUN_H_UDP_CSUM|TUN_H_ICMP_CSUM)) { 962 case 0: 963 break; 964 case TUN_H_TCP_CSUM: 965 SET(m0->m_pkthdr.csum_flags, 966 M_TCP_CSUM_OUT | M_TCP_CSUM_IN_OK); 967 break; 968 case TUN_H_UDP_CSUM: 969 SET(m0->m_pkthdr.csum_flags, 970 M_UDP_CSUM_OUT | M_UDP_CSUM_IN_OK); 971 break; 972 case TUN_H_ICMP_CSUM: 973 SET(m0->m_pkthdr.csum_flags, 974 M_ICMP_CSUM_OUT | M_ICMP_CSUM_IN_OK); 975 break; 976 default: 977 error = EINVAL; 978 goto drop; 979 } 980 981 if (ISSET(th.th_flags, TUN_H_VTAG)) { 982 if (!ISSET(sc->sc_flags, TUN_LAYER2)) { 983 error = EINVAL; 984 goto drop; 985 } 986 SET(m0->m_flags, M_VLANTAG); 987 m0->m_pkthdr.ether_vtag = th.th_vtag; 988 } 989 990 if (ISSET(th.th_flags, TUN_H_TCP_MSS)) { 991 SET(m0->m_pkthdr.csum_flags, M_TCP_TSO); 992 m0->m_pkthdr.ph_mss = th.th_mss; 993 } 994 } 995 996 align += roundup(max_linkhdr, sizeof(long)); 997 mlen = MHLEN; /* how much space in the mbuf */ 998 999 len = uio->uio_resid; 1000 m0->m_pkthdr.len = len; 1001 1002 m = m0; 1003 for (;;) { 1004 alen = align + len; /* what we want to put in this mbuf */ 1005 if (alen > mlen) { 1006 if (alen > MAXMCLBYTES) 1007 alen = MAXMCLBYTES; 1008 m_clget(m, M_DONTWAIT, alen); 1009 if (!ISSET(m->m_flags, M_EXT)) { 1010 error = ENOMEM; 1011 goto put; 1012 } 1013 } 1014 1015 m->m_len = alen; 1016 if (align > 0) { 1017 /* avoid m_adj to protect m0->m_pkthdr.len */ 1018 m->m_data += align; 1019 m->m_len -= align; 1020 } 1021 1022 error = uiomove(mtod(m, void *), m->m_len, uio); 1023 if (error != 0) 1024 goto drop; 1025 1026 len = uio->uio_resid; 1027 if (len == 0) 1028 break; 1029 1030 n = m_get(M_DONTWAIT, MT_DATA); 1031 if (n == NULL) { 1032 error = ENOMEM; 1033 goto put; 1034 } 1035 1036 align = 0; 1037 mlen = MLEN; 1038 1039 m->m_next = n; 1040 m = n; 1041 } 1042 1043 NET_LOCK(); 1044 if_vinput(ifp, m0); 1045 NET_UNLOCK(); 1046 1047 tun_put(sc); 1048 return (0); 1049 1050 drop: 1051 m_freem(m0); 1052 put: 1053 tun_put(sc); 1054 return (error); 1055 } 1056 1057 void 1058 tun_input(struct ifnet *ifp, struct mbuf *m0) 1059 { 1060 uint32_t af; 1061 1062 KASSERT(m0->m_len >= sizeof(af)); 1063 1064 af = *mtod(m0, uint32_t *); 1065 /* strip the tunnel header */ 1066 m_adj(m0, sizeof(af)); 1067 1068 switch (ntohl(af)) { 1069 case AF_INET: 1070 ipv4_input(ifp, m0); 1071 break; 1072 #ifdef INET6 1073 case AF_INET6: 1074 ipv6_input(ifp, m0); 1075 break; 1076 #endif 1077 #ifdef MPLS 1078 case AF_MPLS: 1079 mpls_input(ifp, m0); 1080 break; 1081 #endif 1082 default: 1083 m_freem(m0); 1084 break; 1085 } 1086 } 1087 1088 int 1089 tunkqfilter(dev_t dev, struct knote *kn) 1090 { 1091 return (tun_dev_kqfilter(dev, kn)); 1092 } 1093 1094 int 1095 tapkqfilter(dev_t dev, struct knote *kn) 1096 { 1097 return (tun_dev_kqfilter(dev, kn)); 1098 } 1099 1100 int 1101 tun_dev_kqfilter(dev_t dev, struct knote *kn) 1102 { 1103 struct tun_softc *sc; 1104 struct klist *klist; 1105 int error = 0; 1106 1107 sc = tun_get(dev); 1108 if (sc == NULL) 1109 return (ENXIO); 1110 1111 switch (kn->kn_filter) { 1112 case EVFILT_READ: 1113 klist = &sc->sc_rklist; 1114 kn->kn_fop = &tunread_filtops; 1115 break; 1116 case EVFILT_WRITE: 1117 klist = &sc->sc_wklist; 1118 kn->kn_fop = &tunwrite_filtops; 1119 break; 1120 default: 1121 error = EINVAL; 1122 goto put; 1123 } 1124 1125 kn->kn_hook = sc; 1126 1127 klist_insert(klist, kn); 1128 1129 put: 1130 tun_put(sc); 1131 return (error); 1132 } 1133 1134 void 1135 filt_tunrdetach(struct knote *kn) 1136 { 1137 struct tun_softc *sc = kn->kn_hook; 1138 1139 klist_remove(&sc->sc_rklist, kn); 1140 } 1141 1142 int 1143 filt_tunread(struct knote *kn, long hint) 1144 { 1145 struct tun_softc *sc = kn->kn_hook; 1146 1147 MUTEX_ASSERT_LOCKED(&sc->sc_mtx); 1148 1149 kn->kn_data = tun_hdatalen(sc); 1150 1151 return (kn->kn_data > 0); 1152 } 1153 1154 void 1155 filt_tunwdetach(struct knote *kn) 1156 { 1157 struct tun_softc *sc = kn->kn_hook; 1158 1159 klist_remove(&sc->sc_wklist, kn); 1160 } 1161 1162 int 1163 filt_tunwrite(struct knote *kn, long hint) 1164 { 1165 struct tun_softc *sc = kn->kn_hook; 1166 struct ifnet *ifp = &sc->sc_if; 1167 1168 MUTEX_ASSERT_LOCKED(&sc->sc_mtx); 1169 1170 kn->kn_data = ifp->if_hdrlen + ifp->if_hardmtu; 1171 1172 return (1); 1173 } 1174 1175 int 1176 filt_tunmodify(struct kevent *kev, struct knote *kn) 1177 { 1178 struct tun_softc *sc = kn->kn_hook; 1179 int active; 1180 1181 mtx_enter(&sc->sc_mtx); 1182 active = knote_modify(kev, kn); 1183 mtx_leave(&sc->sc_mtx); 1184 1185 return (active); 1186 } 1187 1188 int 1189 filt_tunprocess(struct knote *kn, struct kevent *kev) 1190 { 1191 struct tun_softc *sc = kn->kn_hook; 1192 int active; 1193 1194 mtx_enter(&sc->sc_mtx); 1195 active = knote_process(kn, kev); 1196 mtx_leave(&sc->sc_mtx); 1197 1198 return (active); 1199 } 1200 1201 void 1202 tun_start(struct ifnet *ifp) 1203 { 1204 struct tun_softc *sc = ifp->if_softc; 1205 1206 splassert(IPL_NET); 1207 1208 if (ifq_len(&ifp->if_snd)) 1209 tun_wakeup(sc); 1210 } 1211 1212 void 1213 tun_link_state(struct ifnet *ifp, int link_state) 1214 { 1215 if (ifp->if_link_state != link_state) { 1216 ifp->if_link_state = link_state; 1217 if_link_state_change(ifp); 1218 } 1219 } 1220