1 /* $OpenBSD: if_mpe.c,v 1.97 2020/08/21 22:59:27 kn Exp $ */ 2 3 /* 4 * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@spootnik.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/mbuf.h> 22 #include <sys/socket.h> 23 #include <sys/sockio.h> 24 #include <sys/ioctl.h> 25 26 #include <net/if.h> 27 #include <net/if_dl.h> 28 #include <net/if_var.h> 29 #include <net/if_types.h> 30 #include <net/netisr.h> 31 #include <net/route.h> 32 33 #include <netinet/in.h> 34 #include <netinet/ip.h> 35 36 #ifdef INET6 37 #include <netinet/ip6.h> 38 #endif /* INET6 */ 39 40 #include "bpfilter.h" 41 #if NBPFILTER > 0 42 #include <net/bpf.h> 43 #endif 44 45 #include <netmpls/mpls.h> 46 47 48 49 #ifdef MPLS_DEBUG 50 #define DPRINTF(x) do { if (mpedebug) printf x ; } while (0) 51 #else 52 #define DPRINTF(x) 53 #endif 54 55 struct mpe_softc { 56 struct ifnet sc_if; /* the interface */ 57 int sc_txhprio; 58 int sc_rxhprio; 59 unsigned int sc_rdomain; 60 struct ifaddr sc_ifa; 61 struct sockaddr_mpls sc_smpls; 62 63 int sc_dead; 64 }; 65 66 #define MPE_HDRLEN sizeof(struct shim_hdr) 67 #define MPE_MTU 1500 68 #define MPE_MTU_MIN 256 69 #define MPE_MTU_MAX 8192 70 71 void mpeattach(int); 72 int mpe_output(struct ifnet *, struct mbuf *, struct sockaddr *, 73 struct rtentry *); 74 int mpe_ioctl(struct ifnet *, u_long, caddr_t); 75 void mpe_start(struct ifnet *); 76 int mpe_clone_create(struct if_clone *, int); 77 int mpe_clone_destroy(struct ifnet *); 78 void mpe_input(struct ifnet *, struct mbuf *); 79 80 struct if_clone mpe_cloner = 81 IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy); 82 83 extern int mpls_mapttl_ip; 84 #ifdef INET6 85 extern int mpls_mapttl_ip6; 86 #endif 87 88 void 89 mpeattach(int nmpe) 90 { 91 if_clone_attach(&mpe_cloner); 92 } 93 94 int 95 mpe_clone_create(struct if_clone *ifc, int unit) 96 { 97 struct mpe_softc *sc; 98 struct ifnet *ifp; 99 100 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); 101 if (sc == NULL) 102 return (ENOMEM); 103 104 ifp = &sc->sc_if; 105 snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit); 106 ifp->if_flags = IFF_POINTOPOINT; 107 ifp->if_xflags = IFXF_CLONED; 108 ifp->if_softc = sc; 109 ifp->if_mtu = MPE_MTU; 110 ifp->if_ioctl = mpe_ioctl; 111 ifp->if_output = mpe_output; 112 ifp->if_start = mpe_start; 113 ifp->if_type = IFT_MPLS; 114 ifp->if_hdrlen = MPE_HDRLEN; 115 116 sc->sc_dead = 0; 117 118 if_attach(ifp); 119 if_alloc_sadl(ifp); 120 #if NBPFILTER > 0 121 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t)); 122 #endif 123 124 sc->sc_txhprio = 0; 125 sc->sc_rxhprio = IF_HDRPRIO_PACKET; 126 sc->sc_rdomain = 0; 127 sc->sc_ifa.ifa_ifp = ifp; 128 sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); 129 sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls); 130 sc->sc_smpls.smpls_family = AF_MPLS; 131 132 return (0); 133 } 134 135 int 136 mpe_clone_destroy(struct ifnet *ifp) 137 { 138 struct mpe_softc *sc = ifp->if_softc; 139 140 NET_LOCK(); 141 CLR(ifp->if_flags, IFF_RUNNING); 142 sc->sc_dead = 1; 143 144 if (sc->sc_smpls.smpls_label) { 145 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 146 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 147 } 148 NET_UNLOCK(); 149 150 ifq_barrier(&ifp->if_snd); 151 152 if_detach(ifp); 153 free(sc, M_DEVBUF, sizeof *sc); 154 return (0); 155 } 156 157 /* 158 * Start output on the mpe interface. 159 */ 160 void 161 mpe_start(struct ifnet *ifp) 162 { 163 struct mpe_softc *sc = ifp->if_softc; 164 struct mbuf *m; 165 struct sockaddr *sa; 166 struct sockaddr smpls = { .sa_family = AF_MPLS }; 167 struct rtentry *rt; 168 struct ifnet *ifp0; 169 170 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 171 sa = mtod(m, struct sockaddr *); 172 rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain); 173 if (!rtisvalid(rt)) { 174 m_freem(m); 175 rtfree(rt); 176 continue; 177 } 178 179 ifp0 = if_get(rt->rt_ifidx); 180 if (ifp0 == NULL) { 181 m_freem(m); 182 rtfree(rt); 183 continue; 184 } 185 186 m_adj(m, sa->sa_len); 187 188 #if NBPFILTER > 0 189 if (ifp->if_bpf) { 190 /* remove MPLS label before passing packet to bpf */ 191 m->m_data += sizeof(struct shim_hdr); 192 m->m_len -= sizeof(struct shim_hdr); 193 m->m_pkthdr.len -= sizeof(struct shim_hdr); 194 bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, 195 m, BPF_DIRECTION_OUT); 196 m->m_data -= sizeof(struct shim_hdr); 197 m->m_len += sizeof(struct shim_hdr); 198 m->m_pkthdr.len += sizeof(struct shim_hdr); 199 } 200 #endif 201 202 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 203 CLR(m->m_flags, M_BCAST|M_MCAST); 204 205 mpls_output(ifp0, m, &smpls, rt); 206 if_put(ifp0); 207 rtfree(rt); 208 } 209 } 210 211 int 212 mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 213 struct rtentry *rt) 214 { 215 struct mpe_softc *sc; 216 struct rt_mpls *rtmpls; 217 struct shim_hdr shim; 218 int error; 219 int txprio; 220 uint8_t ttl = mpls_defttl; 221 uint8_t tos, prio; 222 size_t ttloff; 223 socklen_t slen; 224 225 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) { 226 m_freem(m); 227 return (ENETUNREACH); 228 } 229 230 if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) { 231 mpe_input(ifp, m); 232 return (0); 233 } 234 235 #ifdef DIAGNOSTIC 236 if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) { 237 printf("%s: trying to send packet on wrong domain. " 238 "if %d vs. mbuf %d\n", ifp->if_xname, 239 ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid)); 240 } 241 #endif 242 243 rtmpls = (struct rt_mpls *)rt->rt_llinfo; 244 if (rtmpls->mpls_operation != MPLS_OP_PUSH) { 245 m_freem(m); 246 return (ENETUNREACH); 247 } 248 249 error = 0; 250 switch (dst->sa_family) { 251 case AF_INET: { 252 struct ip *ip = mtod(m, struct ip *); 253 tos = ip->ip_tos; 254 ttloff = offsetof(struct ip, ip_ttl); 255 slen = sizeof(struct sockaddr_in); 256 break; 257 } 258 #ifdef INET6 259 case AF_INET6: { 260 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 261 uint32_t flow = bemtoh32(&ip6->ip6_flow); 262 tos = flow >> 20; 263 ttloff = offsetof(struct ip6_hdr, ip6_hlim); 264 slen = sizeof(struct sockaddr_in6); 265 break; 266 } 267 #endif 268 default: 269 m_freem(m); 270 return (EPFNOSUPPORT); 271 } 272 273 if (mpls_mapttl_ip) { 274 /* assumes the ip header is already contig */ 275 ttl = *(mtod(m, uint8_t *) + ttloff); 276 } 277 278 sc = ifp->if_softc; 279 txprio = sc->sc_txhprio; 280 281 switch (txprio) { 282 case IF_HDRPRIO_PACKET: 283 prio = m->m_pkthdr.pf.prio; 284 break; 285 case IF_HDRPRIO_PAYLOAD: 286 prio = IFQ_TOS2PRIO(tos); 287 break; 288 default: 289 prio = txprio; 290 break; 291 } 292 293 shim.shim_label = rtmpls->mpls_label | htonl(prio << MPLS_EXP_OFFSET) | 294 MPLS_BOS_MASK | htonl(ttl); 295 296 m = m_prepend(m, sizeof(shim), M_NOWAIT); 297 if (m == NULL) { 298 error = ENOMEM; 299 goto out; 300 } 301 *mtod(m, struct shim_hdr *) = shim; 302 303 m = m_prepend(m, slen, M_WAITOK); 304 if (m == NULL) { 305 error = ENOMEM; 306 goto out; 307 } 308 memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen); 309 mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */ 310 311 m->m_pkthdr.ph_family = dst->sa_family; 312 313 error = if_enqueue(ifp, m); 314 out: 315 if (error) 316 ifp->if_oerrors++; 317 return (error); 318 } 319 320 int 321 mpe_set_label(struct mpe_softc *sc, uint32_t label, unsigned int rdomain) 322 { 323 int error; 324 325 if (sc->sc_dead) 326 return (ENXIO); 327 328 if (sc->sc_smpls.smpls_label) { 329 /* remove old MPLS route */ 330 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 331 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 332 } 333 334 /* add new MPLS route */ 335 sc->sc_smpls.smpls_label = label; 336 sc->sc_rdomain = rdomain; 337 338 error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 339 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 340 if (error) 341 sc->sc_smpls.smpls_label = 0; 342 343 return (error); 344 } 345 346 int 347 mpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 348 { 349 struct mpe_softc *sc = ifp->if_softc; 350 struct ifreq *ifr; 351 struct shim_hdr shim; 352 int error = 0; 353 354 ifr = (struct ifreq *)data; 355 switch (cmd) { 356 case SIOCSIFADDR: 357 break; 358 case SIOCSIFFLAGS: 359 if (ifp->if_flags & IFF_UP) 360 ifp->if_flags |= IFF_RUNNING; 361 else 362 ifp->if_flags &= ~IFF_RUNNING; 363 break; 364 case SIOCSIFMTU: 365 if (ifr->ifr_mtu < MPE_MTU_MIN || 366 ifr->ifr_mtu > MPE_MTU_MAX) 367 error = EINVAL; 368 else 369 ifp->if_mtu = ifr->ifr_mtu; 370 break; 371 case SIOCGETLABEL: 372 shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label); 373 if (shim.shim_label == 0) { 374 error = EADDRNOTAVAIL; 375 break; 376 } 377 error = copyout(&shim, ifr->ifr_data, sizeof(shim)); 378 break; 379 case SIOCSETLABEL: 380 error = copyin(ifr->ifr_data, &shim, sizeof(shim)); 381 if (error != 0) 382 break; 383 if (shim.shim_label > MPLS_LABEL_MAX || 384 shim.shim_label <= MPLS_LABEL_RESERVED_MAX) { 385 error = EINVAL; 386 break; 387 } 388 shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label); 389 if (sc->sc_smpls.smpls_label != shim.shim_label) { 390 error = mpe_set_label(sc, shim.shim_label, 391 sc->sc_rdomain); 392 } 393 break; 394 case SIOCDELLABEL: 395 if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) { 396 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 397 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 398 399 } 400 shim.shim_label = MPLS_LABEL2SHIM(0); 401 break; 402 403 case SIOCSLIFPHYRTABLE: 404 if (ifr->ifr_rdomainid < 0 || 405 ifr->ifr_rdomainid > RT_TABLEID_MAX || 406 !rtable_exists(ifr->ifr_rdomainid) || 407 ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) { 408 error = EINVAL; 409 break; 410 } 411 if (sc->sc_rdomain != ifr->ifr_rdomainid) { 412 error = mpe_set_label(sc, sc->sc_smpls.smpls_label, 413 ifr->ifr_rdomainid); 414 } 415 break; 416 case SIOCGLIFPHYRTABLE: 417 ifr->ifr_rdomainid = sc->sc_rdomain; 418 break; 419 420 case SIOCSTXHPRIO: 421 error = if_txhprio_l3_check(ifr->ifr_hdrprio); 422 if (error != 0) 423 break; 424 425 sc->sc_txhprio = ifr->ifr_hdrprio; 426 break; 427 case SIOCGTXHPRIO: 428 ifr->ifr_hdrprio = sc->sc_txhprio; 429 break; 430 431 case SIOCSRXHPRIO: 432 error = if_rxhprio_l3_check(ifr->ifr_hdrprio); 433 if (error != 0) 434 break; 435 436 sc->sc_rxhprio = ifr->ifr_hdrprio; 437 break; 438 case SIOCGRXHPRIO: 439 ifr->ifr_hdrprio = sc->sc_rxhprio; 440 break; 441 442 default: 443 return (ENOTTY); 444 } 445 446 return (error); 447 } 448 449 void 450 mpe_input(struct ifnet *ifp, struct mbuf *m) 451 { 452 struct mpe_softc *sc = ifp->if_softc; 453 struct shim_hdr *shim; 454 struct mbuf *n; 455 uint8_t ttl, tos; 456 uint32_t exp; 457 void (*input)(struct ifnet *, struct mbuf *); 458 int rxprio = sc->sc_rxhprio; 459 460 shim = mtod(m, struct shim_hdr *); 461 exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET; 462 if (!MPLS_BOS_ISSET(shim->shim_label)) 463 goto drop; 464 465 ttl = ntohl(shim->shim_label & MPLS_TTL_MASK); 466 m_adj(m, sizeof(*shim)); 467 468 n = m; 469 while (n->m_len == 0) { 470 n = n->m_next; 471 if (n == NULL) 472 goto drop; 473 } 474 475 switch (*mtod(n, uint8_t *) >> 4) { 476 case 4: { 477 struct ip *ip; 478 if (m->m_len < sizeof(*ip)) { 479 m = m_pullup(m, sizeof(*ip)); 480 if (m == NULL) 481 return; 482 } 483 ip = mtod(m, struct ip *); 484 tos = ip->ip_tos; 485 486 if (mpls_mapttl_ip) { 487 m = mpls_ip_adjttl(m, ttl); 488 if (m == NULL) 489 return; 490 } 491 input = ipv4_input; 492 m->m_pkthdr.ph_family = AF_INET; 493 break; 494 } 495 #ifdef INET6 496 case 6: { 497 struct ip6_hdr *ip6; 498 uint32_t flow; 499 if (m->m_len < sizeof(*ip6)) { 500 m = m_pullup(m, sizeof(*ip6)); 501 if (m == NULL) 502 return; 503 } 504 ip6 = mtod(m, struct ip6_hdr *); 505 flow = bemtoh32(&ip6->ip6_flow); 506 tos = flow >> 20; 507 508 if (mpls_mapttl_ip6) { 509 m = mpls_ip6_adjttl(m, ttl); 510 if (m == NULL) 511 return; 512 } 513 input = ipv6_input; 514 m->m_pkthdr.ph_family = AF_INET6; 515 break; 516 } 517 #endif /* INET6 */ 518 default: 519 goto drop; 520 } 521 522 switch (rxprio) { 523 case IF_HDRPRIO_PACKET: 524 /* nop */ 525 break; 526 case IF_HDRPRIO_OUTER: 527 m->m_pkthdr.pf.prio = exp; 528 break; 529 case IF_HDRPRIO_PAYLOAD: 530 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos); 531 break; 532 default: 533 m->m_pkthdr.pf.prio = rxprio; 534 break; 535 } 536 537 /* new receive if and move into correct rtable */ 538 m->m_pkthdr.ph_ifidx = ifp->if_index; 539 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 540 541 /* packet has not been processed by PF yet. */ 542 KASSERT(m->m_pkthdr.pf.statekey == NULL); 543 544 #if NBPFILTER > 0 545 if (ifp->if_bpf) { 546 bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, 547 m, BPF_DIRECTION_IN); 548 } 549 #endif 550 551 (*input)(ifp, m); 552 return; 553 drop: 554 m_freem(m); 555 } 556