1 /* $NetBSD: if_mpls.c,v 1.19 2015/08/24 22:21:26 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Mihai Chelaru <kefren@NetBSD.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.19 2015/08/24 22:21:26 pooka Exp $"); 34 35 #ifdef _KERNEL_OPT 36 #include "opt_inet.h" 37 #include "opt_mpls.h" 38 #endif 39 40 #include <sys/param.h> 41 42 #include <sys/errno.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/sysctl.h> 46 47 #include <net/bpf.h> 48 #include <net/if.h> 49 #include <net/if_types.h> 50 #include <net/netisr.h> 51 #include <net/route.h> 52 53 #ifdef INET 54 #include <netinet/in.h> 55 #include <netinet/in_systm.h> 56 #include <netinet/in_var.h> 57 #include <netinet/ip.h> 58 #include <netinet/ip_var.h> 59 #endif 60 61 #ifdef INET6 62 #include <netinet/ip6.h> 63 #include <netinet6/in6_var.h> 64 #include <netinet6/ip6_var.h> 65 #endif 66 67 #include <netmpls/mpls.h> 68 #include <netmpls/mpls_var.h> 69 70 #include "if_mpls.h" 71 72 #include "ioconf.h" 73 74 #define TRIM_LABEL do { \ 75 m_adj(m, sizeof(union mpls_shim)); \ 76 if (m->m_len < sizeof(union mpls_shim) && \ 77 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) \ 78 goto done; \ 79 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); \ 80 } while (/* CONSTCOND */ 0) 81 82 83 static int mpls_clone_create(struct if_clone *, int); 84 static int mpls_clone_destroy(struct ifnet *); 85 86 static struct if_clone mpls_if_cloner = 87 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy); 88 89 90 static void mpls_input(struct ifnet *, struct mbuf *); 91 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *, 92 struct rtentry *); 93 static int mpls_ioctl(struct ifnet *, u_long, void *); 94 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *); 95 static int mpls_lse(struct mbuf *); 96 97 #ifdef INET 98 static int mpls_unlabel_inet(struct mbuf *); 99 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint); 100 #endif 101 102 #ifdef INET6 103 static int mpls_unlabel_inet6(struct mbuf *); 104 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint); 105 #endif 106 107 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *); 108 109 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond, 110 mpls_forwarding, mpls_frame_accept, mpls_mapprec_inet, mpls_mapclass_inet6, 111 mpls_rfc4182; 112 113 /* ARGSUSED */ 114 void 115 ifmplsattach(int count) 116 { 117 if_clone_attach(&mpls_if_cloner); 118 } 119 120 static int 121 mpls_clone_create(struct if_clone *ifc, int unit) 122 { 123 struct mpls_softc *sc; 124 125 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 126 127 if_initname(&sc->sc_if, ifc->ifc_name, unit); 128 sc->sc_if.if_softc = sc; 129 sc->sc_if.if_type = IFT_MPLS; 130 sc->sc_if.if_addrlen = 0; 131 sc->sc_if.if_hdrlen = sizeof(union mpls_shim); 132 sc->sc_if.if_dlt = DLT_NULL; 133 sc->sc_if.if_mtu = 1500; 134 sc->sc_if.if_flags = 0; 135 sc->sc_if.if_input = mpls_input; 136 sc->sc_if.if_output = mpls_output; 137 sc->sc_if.if_ioctl = mpls_ioctl; 138 139 if_attach(&sc->sc_if); 140 if_alloc_sadl(&sc->sc_if); 141 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t)); 142 return 0; 143 } 144 145 static int 146 mpls_clone_destroy(struct ifnet *ifp) 147 { 148 int s; 149 150 bpf_detach(ifp); 151 152 s = splnet(); 153 if_detach(ifp); 154 splx(s); 155 156 free(ifp->if_softc, M_DEVBUF); 157 return 0; 158 } 159 160 static void 161 mpls_input(struct ifnet *ifp, struct mbuf *m) 162 { 163 #if 0 164 /* 165 * TODO - kefren 166 * I'd love to unshim the packet, guess family 167 * and pass it to bpf 168 */ 169 bpf_mtap_af(ifp, AF_MPLS, m); 170 #endif 171 172 mpls_lse(m); 173 } 174 175 void 176 mplsintr(void) 177 { 178 struct mbuf *m; 179 int s; 180 181 while (!IF_IS_EMPTY(&mplsintrq)) { 182 s = splnet(); 183 IF_DEQUEUE(&mplsintrq, m); 184 splx(s); 185 186 if (!m) 187 return; 188 189 if (((m->m_flags & M_PKTHDR) == 0) || 190 (m->m_pkthdr.rcvif == 0)) 191 panic("mplsintr(): no pkthdr or rcvif"); 192 193 #ifdef MBUFTRACE 194 m_claimm(m, &mpls_owner); 195 #endif 196 mpls_input(m->m_pkthdr.rcvif, m); 197 } 198 } 199 200 /* 201 * prepend shim and deliver 202 */ 203 static int 204 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt) 205 { 206 union mpls_shim mh, *pms; 207 struct rtentry *rt1; 208 int err; 209 uint psize = sizeof(struct sockaddr_mpls); 210 211 KASSERT(KERNEL_LOCKED_P()); 212 213 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { 214 m_freem(m); 215 return ENETDOWN; 216 } 217 218 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) { 219 m_freem(m); 220 return EINVAL; 221 } 222 223 bpf_mtap_af(ifp, dst->sa_family, m); 224 225 memset(&mh, 0, sizeof(mh)); 226 mh.s_addr = MPLS_GETSADDR(rt); 227 mh.shim.bos = 1; 228 mh.shim.exp = 0; 229 mh.shim.ttl = mpls_defttl; 230 231 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 232 233 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) { 234 pms++; 235 if (mh.shim.label != MPLS_LABEL_IMPLNULL && 236 ((m = mpls_prepend_shim(m, &mh)) == NULL)) 237 return ENOBUFS; 238 memset(&mh, 0, sizeof(mh)); 239 mh.s_addr = ntohl(pms->s_addr); 240 mh.shim.bos = mh.shim.exp = 0; 241 mh.shim.ttl = mpls_defttl; 242 psize += sizeof(mh); 243 } 244 245 switch(dst->sa_family) { 246 #ifdef INET 247 case AF_INET: 248 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls)); 249 break; 250 #endif 251 #ifdef INET6 252 case AF_INET6: 253 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls)); 254 break; 255 #endif 256 default: 257 m = mpls_prepend_shim(m, &mh); 258 break; 259 } 260 261 if (m == NULL) { 262 IF_DROP(&ifp->if_snd); 263 ifp->if_oerrors++; 264 return ENOBUFS; 265 } 266 267 ifp->if_opackets++; 268 ifp->if_obytes += m->m_pkthdr.len; 269 270 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) { 271 m_freem(m); 272 return EHOSTUNREACH; 273 } 274 275 err = mpls_send_frame(m, rt1->rt_ifp, rt); 276 rtfree(rt1); 277 return err; 278 } 279 280 static int 281 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data) 282 { 283 int error = 0, s = splnet(); 284 struct ifreq *ifr = data; 285 286 switch(cmd) { 287 case SIOCINITIFADDR: 288 ifp->if_flags |= IFF_UP | IFF_RUNNING; 289 break; 290 case SIOCSIFMTU: 291 if (ifr != NULL && ifr->ifr_mtu < 576) { 292 error = EINVAL; 293 break; 294 } 295 /* FALLTHROUGH */ 296 case SIOCGIFMTU: 297 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 298 error = 0; 299 break; 300 case SIOCSIFFLAGS: 301 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 302 break; 303 if (ifp->if_flags & IFF_UP) 304 ifp->if_flags |= IFF_RUNNING; 305 break; 306 default: 307 error = ifioctl_common(ifp, cmd, data); 308 break; 309 } 310 splx(s); 311 return error; 312 } 313 314 /* 315 * MPLS Label Switch Engine 316 */ 317 static int 318 mpls_lse(struct mbuf *m) 319 { 320 struct sockaddr_mpls dst; 321 union mpls_shim tshim, *htag; 322 struct rtentry *rt = NULL; 323 int error = ENOBUFS; 324 uint psize = sizeof(struct sockaddr_mpls); 325 bool push_back_alert = false; 326 327 if (m->m_len < sizeof(union mpls_shim) && 328 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) 329 goto done; 330 331 dst.smpls_len = sizeof(struct sockaddr_mpls); 332 dst.smpls_family = AF_MPLS; 333 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 334 335 /* Check if we're accepting MPLS Frames */ 336 error = EINVAL; 337 if (!mpls_frame_accept) 338 goto done; 339 340 /* TTL decrement */ 341 if ((m = mpls_ttl_dec(m)) == NULL) 342 goto done; 343 344 /* RFC 4182 */ 345 if (mpls_rfc4182 != 0) 346 while((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL || 347 dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) && 348 __predict_false(dst.smpls_addr.shim.bos == 0)) 349 TRIM_LABEL; 350 351 /* RFC 3032 Section 2.1 Page 4 */ 352 if (__predict_false(dst.smpls_addr.shim.label == MPLS_LABEL_RTALERT) && 353 dst.smpls_addr.shim.bos == 0) { 354 TRIM_LABEL; 355 push_back_alert = true; 356 } 357 358 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) { 359 /* Don't swap reserved labels */ 360 switch (dst.smpls_addr.shim.label) { 361 #ifdef INET 362 case MPLS_LABEL_IPV4NULL: 363 /* Pop shim and push mbuf to IP stack */ 364 if (dst.smpls_addr.shim.bos) 365 error = mpls_unlabel_inet(m); 366 break; 367 #endif 368 #ifdef INET6 369 case MPLS_LABEL_IPV6NULL: 370 /* Pop shim and push mbuf to IPv6 stack */ 371 if (dst.smpls_addr.shim.bos) 372 error = mpls_unlabel_inet6(m); 373 break; 374 #endif 375 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */ 376 case MPLS_LABEL_IMPLNULL: /* This is logical only */ 377 default: /* Rest are not allowed */ 378 break; 379 } 380 goto done; 381 } 382 383 /* Check if we should do MPLS forwarding */ 384 error = EHOSTUNREACH; 385 if (!mpls_forwarding) 386 goto done; 387 388 /* Get a route to dst */ 389 dst.smpls_addr.shim.ttl = 390 dst.smpls_addr.shim.bos = 391 dst.smpls_addr.shim.exp = 0; 392 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr); 393 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL) 394 goto done; 395 396 /* MPLS packet with no MPLS tagged route ? */ 397 if ((rt->rt_flags & RTF_GATEWAY) == 0 || 398 rt_gettag(rt) == NULL || 399 rt_gettag(rt)->sa_family != AF_MPLS) 400 goto done; 401 402 tshim.s_addr = MPLS_GETSADDR(rt); 403 404 /* Swap labels */ 405 if ((m->m_len < sizeof(union mpls_shim)) && 406 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) { 407 error = ENOBUFS; 408 goto done; 409 } 410 411 /* Replace only the label */ 412 htag = mtod(m, union mpls_shim *); 413 htag->s_addr = ntohl(htag->s_addr); 414 htag->shim.label = tshim.shim.label; 415 htag->s_addr = htonl(htag->s_addr); 416 417 /* check if there is anything more to prepend */ 418 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 419 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) { 420 htag++; 421 memset(&tshim, 0, sizeof(tshim)); 422 tshim.s_addr = ntohl(htag->s_addr); 423 tshim.shim.bos = tshim.shim.exp = 0; 424 tshim.shim.ttl = mpls_defttl; 425 if (tshim.shim.label != MPLS_LABEL_IMPLNULL && 426 ((m = mpls_prepend_shim(m, &tshim)) == NULL)) 427 return ENOBUFS; 428 psize += sizeof(tshim); 429 } 430 431 if (__predict_false(push_back_alert == true)) { 432 /* re-add the router alert label */ 433 memset(&tshim, 0, sizeof(tshim)); 434 tshim.s_addr = MPLS_LABEL_RTALERT; 435 tshim.shim.bos = tshim.shim.exp = 0; 436 tshim.shim.ttl = mpls_defttl; 437 if ((m = mpls_prepend_shim(m, &tshim)) == NULL) 438 return ENOBUFS; 439 } 440 441 error = mpls_send_frame(m, rt->rt_ifp, rt); 442 443 done: 444 if (error != 0 && m != NULL) 445 m_freem(m); 446 if (rt != NULL) 447 rtfree(rt); 448 449 return error; 450 } 451 452 static int 453 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt) 454 { 455 union mpls_shim msh; 456 int ret; 457 458 if ((rt->rt_flags & RTF_GATEWAY) == 0) 459 return EHOSTUNREACH; 460 461 rt->rt_use++; 462 463 msh.s_addr = MPLS_GETSADDR(rt); 464 if (msh.shim.label == MPLS_LABEL_IMPLNULL || 465 (m->m_flags & (M_MCAST | M_BCAST))) { 466 m_adj(m, sizeof(union mpls_shim)); 467 m->m_pkthdr.csum_flags = 0; 468 } 469 470 switch(ifp->if_type) { 471 /* only these are supported for now */ 472 case IFT_ETHER: 473 case IFT_TUNNEL: 474 case IFT_LOOP: 475 #ifdef INET 476 ret = ip_hresolv_output(ifp, m, rt->rt_gateway, rt); 477 #else 478 KERNEL_LOCK(1, NULL); 479 ret = (*ifp->if_output)(ifp, m, rt->rt_gateway, rt); 480 KERNEL_UNLOCK_ONE(NULL); 481 #endif 482 return ret; 483 break; 484 default: 485 return ENETUNREACH; 486 } 487 return 0; 488 } 489 490 491 492 #ifdef INET 493 static int 494 mpls_unlabel_inet(struct mbuf *m) 495 { 496 struct ip *iph; 497 union mpls_shim *ms; 498 int iphlen; 499 500 if (mpls_mapttl_inet || mpls_mapprec_inet) { 501 502 /* get shim info */ 503 ms = mtod(m, union mpls_shim *); 504 ms->s_addr = ntohl(ms->s_addr); 505 506 /* and get rid of it */ 507 m_adj(m, sizeof(union mpls_shim)); 508 509 /* get ip header */ 510 if (m->m_len < sizeof (struct ip) && 511 (m = m_pullup(m, sizeof(struct ip))) == NULL) 512 return ENOBUFS; 513 iph = mtod(m, struct ip *); 514 iphlen = iph->ip_hl << 2; 515 516 /* get it all */ 517 if (m->m_len < iphlen) { 518 if ((m = m_pullup(m, iphlen)) == NULL) 519 return ENOBUFS; 520 iph = mtod(m, struct ip *); 521 } 522 523 /* check ipsum */ 524 if (in_cksum(m, iphlen) != 0) { 525 m_freem(m); 526 return EINVAL; 527 } 528 529 /* set IP ttl from MPLS ttl */ 530 if (mpls_mapttl_inet) 531 iph->ip_ttl = ms->shim.ttl; 532 533 /* set IP Precedence from MPLS Exp */ 534 if (mpls_mapprec_inet) { 535 iph->ip_tos = (iph->ip_tos << 3) >> 3; 536 iph->ip_tos |= ms->shim.exp << 5; 537 } 538 539 /* reset ipsum because we modified TTL and TOS */ 540 iph->ip_sum = 0; 541 iph->ip_sum = in_cksum(m, iphlen); 542 } else 543 m_adj(m, sizeof(union mpls_shim)); 544 545 /* Put it on IP queue */ 546 if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) { 547 m_freem(m); 548 return ENOBUFS; 549 } 550 return 0; 551 } 552 553 /* 554 * Prepend MPLS label 555 */ 556 static struct mbuf * 557 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset) 558 { 559 struct ip iphdr; 560 561 if (mpls_mapttl_inet || mpls_mapprec_inet) { 562 if ((m->m_len < sizeof(struct ip)) && 563 (m = m_pullup(m, offset + sizeof(struct ip))) == 0) 564 return NULL; /* XXX */ 565 m_copydata(m, offset, sizeof(struct ip), &iphdr); 566 567 /* Map TTL */ 568 if (mpls_mapttl_inet) 569 ms->shim.ttl = iphdr.ip_ttl; 570 571 /* Copy IP precedence to EXP */ 572 if (mpls_mapprec_inet) 573 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5; 574 } 575 576 if ((m = mpls_prepend_shim(m, ms)) == NULL) 577 return NULL; 578 579 return m; 580 } 581 582 #endif /* INET */ 583 584 #ifdef INET6 585 586 static int 587 mpls_unlabel_inet6(struct mbuf *m) 588 { 589 struct ip6_hdr *ip6hdr; 590 union mpls_shim ms; 591 592 /* TODO: mapclass */ 593 if (mpls_mapttl_inet6) { 594 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 595 m_adj(m, sizeof(union mpls_shim)); 596 597 if (m->m_len < sizeof (struct ip6_hdr) && 598 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) 599 return ENOBUFS; 600 ip6hdr = mtod(m, struct ip6_hdr *); 601 602 /* Because we just decremented this in mpls_lse */ 603 ip6hdr->ip6_hlim = ms.shim.ttl + 1; 604 } else 605 m_adj(m, sizeof(union mpls_shim)); 606 607 /* Put it back on IPv6 queue. */ 608 if (__predict_false(!pktq_enqueue(ip6_pktq, m, 0))) { 609 m_freem(m); 610 return ENOBUFS; 611 } 612 return 0; 613 } 614 615 static struct mbuf * 616 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset) 617 { 618 struct ip6_hdr ip6h; 619 620 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) { 621 if (m->m_len < sizeof(struct ip6_hdr) && 622 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0) 623 return NULL; 624 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h); 625 626 if (mpls_mapttl_inet6) 627 ms->shim.ttl = ip6h.ip6_hlim; 628 629 if (mpls_mapclass_inet6) 630 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5; 631 } 632 633 if ((m = mpls_prepend_shim(m, ms)) == NULL) 634 return NULL; 635 636 return m; 637 } 638 639 #endif /* INET6 */ 640 641 static struct mbuf * 642 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms) 643 { 644 union mpls_shim *shim; 645 646 M_PREPEND(m, sizeof(*ms), M_DONTWAIT); 647 if (m == NULL) 648 return NULL; 649 650 if (m->m_len < sizeof(union mpls_shim) && 651 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) 652 return NULL; 653 654 shim = mtod(m, union mpls_shim *); 655 656 memcpy(shim, ms, sizeof(*shim)); 657 shim->s_addr = htonl(shim->s_addr); 658 659 return m; 660 } 661