1 /* $OpenBSD: if_mpe.c,v 1.102 2022/08/29 07:51:45 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@spootnik.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/mbuf.h> 22 #include <sys/socket.h> 23 #include <sys/sockio.h> 24 #include <sys/ioctl.h> 25 26 #include <net/if.h> 27 #include <net/if_dl.h> 28 #include <net/if_var.h> 29 #include <net/if_types.h> 30 #include <net/netisr.h> 31 #include <net/route.h> 32 33 #include <netinet/in.h> 34 #include <netinet/ip.h> 35 36 #ifdef INET6 37 #include <netinet/ip6.h> 38 #endif /* INET6 */ 39 40 #include "bpfilter.h" 41 #if NBPFILTER > 0 42 #include <net/bpf.h> 43 #endif 44 45 #include <netmpls/mpls.h> 46 47 48 49 #ifdef MPLS_DEBUG 50 #define DPRINTF(x) do { if (mpedebug) printf x ; } while (0) 51 #else 52 #define DPRINTF(x) 53 #endif 54 55 struct mpe_softc { 56 struct ifnet sc_if; /* the interface */ 57 int sc_txhprio; 58 int sc_rxhprio; 59 unsigned int sc_rdomain; 60 struct ifaddr sc_ifa; 61 struct sockaddr_mpls sc_smpls; 62 63 int sc_dead; 64 }; 65 66 #define MPE_HDRLEN sizeof(struct shim_hdr) 67 #define MPE_MTU 1500 68 #define MPE_MTU_MIN 256 69 #define MPE_MTU_MAX 8192 70 71 void mpeattach(int); 72 int mpe_output(struct ifnet *, struct mbuf *, struct sockaddr *, 73 struct rtentry *); 74 int mpe_ioctl(struct ifnet *, u_long, caddr_t); 75 void mpe_start(struct ifnet *); 76 int mpe_clone_create(struct if_clone *, int); 77 int mpe_clone_destroy(struct ifnet *); 78 void mpe_input(struct ifnet *, struct mbuf *); 79 80 struct if_clone mpe_cloner = 81 IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy); 82 83 extern int mpls_mapttl_ip; 84 #ifdef INET6 85 extern int mpls_mapttl_ip6; 86 #endif 87 88 void 89 mpeattach(int nmpe) 90 { 91 if_clone_attach(&mpe_cloner); 92 } 93 94 int 95 mpe_clone_create(struct if_clone *ifc, int unit) 96 { 97 struct mpe_softc *sc; 98 struct ifnet *ifp; 99 100 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); 101 if (sc == NULL) 102 return (ENOMEM); 103 104 ifp = &sc->sc_if; 105 snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit); 106 ifp->if_flags = IFF_POINTOPOINT; 107 ifp->if_xflags = IFXF_CLONED; 108 ifp->if_softc = sc; 109 ifp->if_mtu = MPE_MTU; 110 ifp->if_ioctl = mpe_ioctl; 111 ifp->if_bpf_mtap = p2p_bpf_mtap; 112 ifp->if_input = p2p_input; 113 ifp->if_output = mpe_output; 114 ifp->if_start = mpe_start; 115 ifp->if_type = IFT_MPLS; 116 ifp->if_hdrlen = MPE_HDRLEN; 117 118 sc->sc_dead = 0; 119 120 if_attach(ifp); 121 if_alloc_sadl(ifp); 122 if_counters_alloc(ifp); 123 124 #if NBPFILTER > 0 125 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t)); 126 #endif 127 128 sc->sc_txhprio = 0; 129 sc->sc_rxhprio = IF_HDRPRIO_PACKET; 130 sc->sc_rdomain = 0; 131 refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR); 132 sc->sc_ifa.ifa_ifp = ifp; 133 sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); 134 sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls); 135 sc->sc_smpls.smpls_family = AF_MPLS; 136 137 return (0); 138 } 139 140 int 141 mpe_clone_destroy(struct ifnet *ifp) 142 { 143 struct mpe_softc *sc = ifp->if_softc; 144 145 NET_LOCK(); 146 CLR(ifp->if_flags, IFF_RUNNING); 147 sc->sc_dead = 1; 148 149 if (sc->sc_smpls.smpls_label) { 150 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 151 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 152 } 153 NET_UNLOCK(); 154 155 ifq_barrier(&ifp->if_snd); 156 157 if_detach(ifp); 158 if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) { 159 panic("%s: ifa refcnt has %u refs", __func__, 160 sc->sc_ifa.ifa_refcnt.r_refs); 161 } 162 free(sc, M_DEVBUF, sizeof *sc); 163 return (0); 164 } 165 166 /* 167 * Start output on the mpe interface. 168 */ 169 void 170 mpe_start(struct ifnet *ifp) 171 { 172 struct mpe_softc *sc = ifp->if_softc; 173 struct mbuf *m; 174 struct sockaddr *sa; 175 struct sockaddr smpls = { .sa_family = AF_MPLS }; 176 struct rtentry *rt; 177 struct ifnet *ifp0; 178 179 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 180 sa = mtod(m, struct sockaddr *); 181 rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain); 182 if (!rtisvalid(rt)) { 183 m_freem(m); 184 rtfree(rt); 185 continue; 186 } 187 188 ifp0 = if_get(rt->rt_ifidx); 189 if (ifp0 == NULL) { 190 m_freem(m); 191 rtfree(rt); 192 continue; 193 } 194 195 m_adj(m, sa->sa_len); 196 197 #if NBPFILTER > 0 198 if (ifp->if_bpf) { 199 /* remove MPLS label before passing packet to bpf */ 200 m->m_data += sizeof(struct shim_hdr); 201 m->m_len -= sizeof(struct shim_hdr); 202 m->m_pkthdr.len -= sizeof(struct shim_hdr); 203 bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, 204 m, BPF_DIRECTION_OUT); 205 m->m_data -= sizeof(struct shim_hdr); 206 m->m_len += sizeof(struct shim_hdr); 207 m->m_pkthdr.len += sizeof(struct shim_hdr); 208 } 209 #endif 210 211 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 212 CLR(m->m_flags, M_BCAST|M_MCAST); 213 214 mpls_output(ifp0, m, &smpls, rt); 215 if_put(ifp0); 216 rtfree(rt); 217 } 218 } 219 220 int 221 mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 222 struct rtentry *rt) 223 { 224 struct mpe_softc *sc; 225 struct rt_mpls *rtmpls; 226 struct shim_hdr shim; 227 int error; 228 int txprio; 229 uint8_t ttl = mpls_defttl; 230 uint8_t tos, prio; 231 size_t ttloff; 232 socklen_t slen; 233 234 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) { 235 m_freem(m); 236 return (ENETUNREACH); 237 } 238 239 if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) { 240 mpe_input(ifp, m); 241 return (0); 242 } 243 244 #ifdef DIAGNOSTIC 245 if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) { 246 printf("%s: trying to send packet on wrong domain. " 247 "if %d vs. mbuf %d\n", ifp->if_xname, 248 ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid)); 249 } 250 #endif 251 252 rtmpls = (struct rt_mpls *)rt->rt_llinfo; 253 if (rtmpls->mpls_operation != MPLS_OP_PUSH) { 254 m_freem(m); 255 return (ENETUNREACH); 256 } 257 258 error = 0; 259 switch (dst->sa_family) { 260 case AF_INET: { 261 struct ip *ip = mtod(m, struct ip *); 262 tos = ip->ip_tos; 263 ttloff = offsetof(struct ip, ip_ttl); 264 slen = sizeof(struct sockaddr_in); 265 break; 266 } 267 #ifdef INET6 268 case AF_INET6: { 269 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 270 uint32_t flow = bemtoh32(&ip6->ip6_flow); 271 tos = flow >> 20; 272 ttloff = offsetof(struct ip6_hdr, ip6_hlim); 273 slen = sizeof(struct sockaddr_in6); 274 break; 275 } 276 #endif 277 default: 278 m_freem(m); 279 return (EPFNOSUPPORT); 280 } 281 282 if (mpls_mapttl_ip) { 283 /* assumes the ip header is already contig */ 284 ttl = *(mtod(m, uint8_t *) + ttloff); 285 } 286 287 sc = ifp->if_softc; 288 txprio = sc->sc_txhprio; 289 290 switch (txprio) { 291 case IF_HDRPRIO_PACKET: 292 prio = m->m_pkthdr.pf.prio; 293 break; 294 case IF_HDRPRIO_PAYLOAD: 295 prio = IFQ_TOS2PRIO(tos); 296 break; 297 default: 298 prio = txprio; 299 break; 300 } 301 302 shim.shim_label = rtmpls->mpls_label | htonl(prio << MPLS_EXP_OFFSET) | 303 MPLS_BOS_MASK | htonl(ttl); 304 305 m = m_prepend(m, sizeof(shim), M_NOWAIT); 306 if (m == NULL) { 307 error = ENOMEM; 308 goto out; 309 } 310 *mtod(m, struct shim_hdr *) = shim; 311 312 m = m_prepend(m, slen, M_WAITOK); 313 if (m == NULL) { 314 error = ENOMEM; 315 goto out; 316 } 317 memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen); 318 mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */ 319 320 m->m_pkthdr.ph_family = dst->sa_family; 321 322 error = if_enqueue(ifp, m); 323 out: 324 if (error) 325 ifp->if_oerrors++; 326 return (error); 327 } 328 329 int 330 mpe_set_label(struct mpe_softc *sc, uint32_t label, unsigned int rdomain) 331 { 332 int error; 333 334 if (sc->sc_dead) 335 return (ENXIO); 336 337 if (sc->sc_smpls.smpls_label) { 338 /* remove old MPLS route */ 339 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 340 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 341 } 342 343 /* add new MPLS route */ 344 sc->sc_smpls.smpls_label = label; 345 sc->sc_rdomain = rdomain; 346 347 /* only install with a label or mpe_clone_destroy() will ignore it */ 348 if (sc->sc_smpls.smpls_label == MPLS_LABEL2SHIM(0)) 349 return 0; 350 351 error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 352 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 353 if (error) 354 sc->sc_smpls.smpls_label = 0; 355 356 return (error); 357 } 358 359 int 360 mpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 361 { 362 struct mpe_softc *sc = ifp->if_softc; 363 struct ifreq *ifr; 364 struct shim_hdr shim; 365 int error = 0; 366 367 ifr = (struct ifreq *)data; 368 switch (cmd) { 369 case SIOCSIFADDR: 370 break; 371 case SIOCSIFFLAGS: 372 if (ifp->if_flags & IFF_UP) 373 ifp->if_flags |= IFF_RUNNING; 374 else 375 ifp->if_flags &= ~IFF_RUNNING; 376 break; 377 case SIOCSIFMTU: 378 if (ifr->ifr_mtu < MPE_MTU_MIN || 379 ifr->ifr_mtu > MPE_MTU_MAX) 380 error = EINVAL; 381 else 382 ifp->if_mtu = ifr->ifr_mtu; 383 break; 384 case SIOCGETLABEL: 385 shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label); 386 if (shim.shim_label == 0) { 387 error = EADDRNOTAVAIL; 388 break; 389 } 390 error = copyout(&shim, ifr->ifr_data, sizeof(shim)); 391 break; 392 case SIOCSETLABEL: 393 error = copyin(ifr->ifr_data, &shim, sizeof(shim)); 394 if (error != 0) 395 break; 396 if (shim.shim_label > MPLS_LABEL_MAX || 397 shim.shim_label <= MPLS_LABEL_RESERVED_MAX) { 398 error = EINVAL; 399 break; 400 } 401 shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label); 402 if (sc->sc_smpls.smpls_label != shim.shim_label) { 403 error = mpe_set_label(sc, shim.shim_label, 404 sc->sc_rdomain); 405 } 406 break; 407 case SIOCDELLABEL: 408 if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) { 409 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 410 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 411 } 412 sc->sc_smpls.smpls_label = MPLS_LABEL2SHIM(0); 413 break; 414 415 case SIOCSLIFPHYRTABLE: 416 if (ifr->ifr_rdomainid < 0 || 417 ifr->ifr_rdomainid > RT_TABLEID_MAX || 418 !rtable_exists(ifr->ifr_rdomainid) || 419 ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) { 420 error = EINVAL; 421 break; 422 } 423 if (sc->sc_rdomain != ifr->ifr_rdomainid) { 424 error = mpe_set_label(sc, sc->sc_smpls.smpls_label, 425 ifr->ifr_rdomainid); 426 } 427 break; 428 case SIOCGLIFPHYRTABLE: 429 ifr->ifr_rdomainid = sc->sc_rdomain; 430 break; 431 432 case SIOCSTXHPRIO: 433 error = if_txhprio_l3_check(ifr->ifr_hdrprio); 434 if (error != 0) 435 break; 436 437 sc->sc_txhprio = ifr->ifr_hdrprio; 438 break; 439 case SIOCGTXHPRIO: 440 ifr->ifr_hdrprio = sc->sc_txhprio; 441 break; 442 443 case SIOCSRXHPRIO: 444 error = if_rxhprio_l3_check(ifr->ifr_hdrprio); 445 if (error != 0) 446 break; 447 448 sc->sc_rxhprio = ifr->ifr_hdrprio; 449 break; 450 case SIOCGRXHPRIO: 451 ifr->ifr_hdrprio = sc->sc_rxhprio; 452 break; 453 454 default: 455 return (ENOTTY); 456 } 457 458 return (error); 459 } 460 461 void 462 mpe_input(struct ifnet *ifp, struct mbuf *m) 463 { 464 struct mpe_softc *sc = ifp->if_softc; 465 struct shim_hdr *shim; 466 struct mbuf *n; 467 uint8_t ttl, tos; 468 uint32_t exp; 469 int rxprio = sc->sc_rxhprio; 470 471 shim = mtod(m, struct shim_hdr *); 472 exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET; 473 if (!MPLS_BOS_ISSET(shim->shim_label)) 474 goto drop; 475 476 ttl = ntohl(shim->shim_label & MPLS_TTL_MASK); 477 m_adj(m, sizeof(*shim)); 478 479 n = m; 480 while (n->m_len == 0) { 481 n = n->m_next; 482 if (n == NULL) 483 goto drop; 484 } 485 486 switch (*mtod(n, uint8_t *) >> 4) { 487 case 4: { 488 struct ip *ip; 489 if (m->m_len < sizeof(*ip)) { 490 m = m_pullup(m, sizeof(*ip)); 491 if (m == NULL) 492 return; 493 } 494 ip = mtod(m, struct ip *); 495 tos = ip->ip_tos; 496 497 if (mpls_mapttl_ip) { 498 m = mpls_ip_adjttl(m, ttl); 499 if (m == NULL) 500 return; 501 } 502 503 m->m_pkthdr.ph_family = AF_INET; 504 break; 505 } 506 #ifdef INET6 507 case 6: { 508 struct ip6_hdr *ip6; 509 uint32_t flow; 510 if (m->m_len < sizeof(*ip6)) { 511 m = m_pullup(m, sizeof(*ip6)); 512 if (m == NULL) 513 return; 514 } 515 ip6 = mtod(m, struct ip6_hdr *); 516 flow = bemtoh32(&ip6->ip6_flow); 517 tos = flow >> 20; 518 519 if (mpls_mapttl_ip6) { 520 m = mpls_ip6_adjttl(m, ttl); 521 if (m == NULL) 522 return; 523 } 524 525 m->m_pkthdr.ph_family = AF_INET6; 526 break; 527 } 528 #endif /* INET6 */ 529 default: 530 goto drop; 531 } 532 533 switch (rxprio) { 534 case IF_HDRPRIO_PACKET: 535 /* nop */ 536 break; 537 case IF_HDRPRIO_OUTER: 538 m->m_pkthdr.pf.prio = exp; 539 break; 540 case IF_HDRPRIO_PAYLOAD: 541 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos); 542 break; 543 default: 544 m->m_pkthdr.pf.prio = rxprio; 545 break; 546 } 547 548 if_vinput(ifp, m); 549 return; 550 drop: 551 m_freem(m); 552 } 553