1 /* $NetBSD: if_mpls.c,v 1.11 2013/10/25 09:25:32 kefren Exp $ */ 2 3 /* 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Mihai Chelaru <kefren@NetBSD.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.11 2013/10/25 09:25:32 kefren Exp $"); 34 35 #include "opt_inet.h" 36 #include "opt_mpls.h" 37 38 #include <sys/param.h> 39 40 #include <sys/errno.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/sysctl.h> 44 45 #include <net/bpf.h> 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/netisr.h> 49 #include <net/route.h> 50 51 #ifdef INET 52 #include <netinet/in.h> 53 #include <netinet/in_systm.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip.h> 56 #endif 57 58 #ifdef INET6 59 #include <netinet/ip6.h> 60 #include <netinet6/in6_var.h> 61 #include <netinet6/ip6_var.h> 62 #endif 63 64 #include <netmpls/mpls.h> 65 #include <netmpls/mpls_var.h> 66 67 #include "if_mpls.h" 68 69 #define TRIM_LABEL do { \ 70 m_adj(m, sizeof(union mpls_shim)); \ 71 if (m->m_len < sizeof(union mpls_shim) && \ 72 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) \ 73 goto done; \ 74 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); \ 75 } while (/* CONSTCOND */ 0) 76 77 78 void ifmplsattach(int); 79 80 static int mpls_clone_create(struct if_clone *, int); 81 static int mpls_clone_destroy(struct ifnet *); 82 83 static struct if_clone mpls_if_cloner = 84 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy); 85 86 87 static void mpls_input(struct ifnet *, struct mbuf *); 88 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *, 89 struct rtentry *); 90 static int mpls_ioctl(struct ifnet *, u_long, void *); 91 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *); 92 static int mpls_lse(struct mbuf *); 93 94 #ifdef INET 95 static int mpls_unlabel_inet(struct mbuf *); 96 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint); 97 #endif 98 99 #ifdef INET6 100 static int mpls_unlabel_inet6(struct mbuf *); 101 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint); 102 #endif 103 104 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *); 105 106 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond, 107 mpls_forwarding, mpls_accept, mpls_mapprec_inet, mpls_mapclass_inet6, 108 mpls_rfc4182; 109 110 /* ARGSUSED */ 111 void 112 ifmplsattach(int count) 113 { 114 if_clone_attach(&mpls_if_cloner); 115 } 116 117 static int 118 mpls_clone_create(struct if_clone *ifc, int unit) 119 { 120 struct mpls_softc *sc; 121 122 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 123 124 if_initname(&sc->sc_if, ifc->ifc_name, unit); 125 sc->sc_if.if_softc = sc; 126 sc->sc_if.if_type = IFT_MPLS; 127 sc->sc_if.if_addrlen = 0; 128 sc->sc_if.if_hdrlen = sizeof(union mpls_shim); 129 sc->sc_if.if_dlt = DLT_NULL; 130 sc->sc_if.if_mtu = 1500; 131 sc->sc_if.if_flags = 0; 132 sc->sc_if.if_input = mpls_input; 133 sc->sc_if.if_output = mpls_output; 134 sc->sc_if.if_ioctl = mpls_ioctl; 135 136 if_attach(&sc->sc_if); 137 if_alloc_sadl(&sc->sc_if); 138 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t)); 139 return 0; 140 } 141 142 static int 143 mpls_clone_destroy(struct ifnet *ifp) 144 { 145 int s; 146 147 bpf_detach(ifp); 148 149 s = splnet(); 150 if_detach(ifp); 151 splx(s); 152 153 free(ifp->if_softc, M_DEVBUF); 154 return 0; 155 } 156 157 static void 158 mpls_input(struct ifnet *ifp, struct mbuf *m) 159 { 160 #if 0 161 /* 162 * TODO - kefren 163 * I'd love to unshim the packet, guess family 164 * and pass it to bpf 165 */ 166 bpf_mtap_af(ifp, AF_MPLS, m); 167 #endif 168 169 mpls_lse(m); 170 } 171 172 void 173 mplsintr(void) 174 { 175 struct mbuf *m; 176 int s; 177 178 while (!IF_IS_EMPTY(&mplsintrq)) { 179 s = splnet(); 180 IF_DEQUEUE(&mplsintrq, m); 181 splx(s); 182 183 if (!m) 184 return; 185 186 if (((m->m_flags & M_PKTHDR) == 0) || 187 (m->m_pkthdr.rcvif == 0)) 188 panic("mplsintr(): no pkthdr or rcvif"); 189 190 #ifdef MBUFTRACE 191 m_claimm(m, &mpls_owner); 192 #endif 193 mpls_input(m->m_pkthdr.rcvif, m); 194 } 195 } 196 197 /* 198 * prepend shim and deliver 199 */ 200 static int 201 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt) 202 { 203 union mpls_shim mh, *pms; 204 struct rtentry *rt1; 205 int err; 206 uint psize = sizeof(struct sockaddr_mpls); 207 208 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { 209 m_freem(m); 210 return ENETDOWN; 211 } 212 213 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) { 214 m_freem(m); 215 return EINVAL; 216 } 217 218 bpf_mtap_af(ifp, dst->sa_family, m); 219 220 memset(&mh, 0, sizeof(mh)); 221 mh.s_addr = MPLS_GETSADDR(rt); 222 mh.shim.bos = 1; 223 mh.shim.exp = 0; 224 mh.shim.ttl = mpls_defttl; 225 226 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 227 228 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) { 229 pms++; 230 if (mh.shim.label != MPLS_LABEL_IMPLNULL && 231 ((m = mpls_prepend_shim(m, &mh)) == NULL)) 232 return ENOBUFS; 233 memset(&mh, 0, sizeof(mh)); 234 mh.s_addr = ntohl(pms->s_addr); 235 mh.shim.bos = mh.shim.exp = 0; 236 mh.shim.ttl = mpls_defttl; 237 psize += sizeof(mh); 238 } 239 240 switch(dst->sa_family) { 241 #ifdef INET 242 case AF_INET: 243 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls)); 244 break; 245 #endif 246 #ifdef INET6 247 case AF_INET6: 248 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls)); 249 break; 250 #endif 251 default: 252 m = mpls_prepend_shim(m, &mh); 253 break; 254 } 255 256 if (m == NULL) { 257 IF_DROP(&ifp->if_snd); 258 ifp->if_oerrors++; 259 return ENOBUFS; 260 } 261 262 ifp->if_opackets++; 263 ifp->if_obytes += m->m_pkthdr.len; 264 265 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) { 266 m_freem(m); 267 return EHOSTUNREACH; 268 } 269 270 err = mpls_send_frame(m, rt1->rt_ifp, rt); 271 RTFREE(rt1); 272 return err; 273 } 274 275 static int 276 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data) 277 { 278 int error = 0, s = splnet(); 279 struct ifreq *ifr = data; 280 281 switch(cmd) { 282 case SIOCINITIFADDR: 283 ifp->if_flags |= IFF_UP | IFF_RUNNING; 284 break; 285 case SIOCSIFMTU: 286 if (ifr != NULL && ifr->ifr_mtu < 576) { 287 error = EINVAL; 288 break; 289 } 290 /* FALLTHROUGH */ 291 case SIOCGIFMTU: 292 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 293 error = 0; 294 break; 295 case SIOCSIFFLAGS: 296 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 297 break; 298 if (ifp->if_flags & IFF_UP) 299 ifp->if_flags |= IFF_RUNNING; 300 break; 301 default: 302 error = ifioctl_common(ifp, cmd, data); 303 break; 304 } 305 splx(s); 306 return error; 307 } 308 309 /* 310 * MPLS Label Switch Engine 311 */ 312 static int 313 mpls_lse(struct mbuf *m) 314 { 315 struct sockaddr_mpls dst; 316 union mpls_shim tshim, *htag; 317 struct rtentry *rt = NULL; 318 int error = ENOBUFS; 319 uint psize = sizeof(struct sockaddr_mpls); 320 bool push_back_alert = false; 321 322 if (m->m_len < sizeof(union mpls_shim) && 323 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) 324 goto done; 325 326 dst.smpls_len = sizeof(struct sockaddr_mpls); 327 dst.smpls_family = AF_MPLS; 328 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 329 330 /* Check if we're accepting MPLS Frames */ 331 error = EINVAL; 332 if (!mpls_accept) 333 goto done; 334 335 /* TTL decrement */ 336 if ((m = mpls_ttl_dec(m)) == NULL) 337 goto done; 338 339 /* RFC 4182 */ 340 if (mpls_rfc4182 != 0) 341 while((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL || 342 dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) && 343 __predict_false(dst.smpls_addr.shim.bos == 0)) 344 TRIM_LABEL; 345 346 /* RFC 3032 Section 2.1 Page 4 */ 347 if (__predict_false(dst.smpls_addr.shim.label == MPLS_LABEL_RTALERT) && 348 dst.smpls_addr.shim.bos == 0) { 349 TRIM_LABEL; 350 push_back_alert = true; 351 } 352 353 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) { 354 /* Don't swap reserved labels */ 355 switch (dst.smpls_addr.shim.label) { 356 #ifdef INET 357 case MPLS_LABEL_IPV4NULL: 358 /* Pop shim and push mbuf to IP stack */ 359 if (dst.smpls_addr.shim.bos) 360 error = mpls_unlabel_inet(m); 361 break; 362 #endif 363 #ifdef INET6 364 case MPLS_LABEL_IPV6NULL: 365 /* Pop shim and push mbuf to IPv6 stack */ 366 if (dst.smpls_addr.shim.bos) 367 error = mpls_unlabel_inet6(m); 368 break; 369 #endif 370 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */ 371 case MPLS_LABEL_IMPLNULL: /* This is logical only */ 372 default: /* Rest are not allowed */ 373 break; 374 } 375 goto done; 376 } 377 378 /* Check if we should do MPLS forwarding */ 379 error = EHOSTUNREACH; 380 if (!mpls_forwarding) 381 goto done; 382 383 /* Get a route to dst */ 384 dst.smpls_addr.shim.ttl = 385 dst.smpls_addr.shim.bos = 386 dst.smpls_addr.shim.exp = 0; 387 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr); 388 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL) 389 goto done; 390 391 /* MPLS packet with no MPLS tagged route ? */ 392 if ((rt->rt_flags & RTF_GATEWAY) == 0 || 393 rt_gettag(rt) == NULL || 394 rt_gettag(rt)->sa_family != AF_MPLS) 395 goto done; 396 397 tshim.s_addr = MPLS_GETSADDR(rt); 398 399 /* Swap labels */ 400 if ((m->m_len < sizeof(union mpls_shim)) && 401 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) { 402 error = ENOBUFS; 403 goto done; 404 } 405 406 /* Replace only the label */ 407 htag = mtod(m, union mpls_shim *); 408 htag->s_addr = ntohl(htag->s_addr); 409 htag->shim.label = tshim.shim.label; 410 htag->s_addr = htonl(htag->s_addr); 411 412 /* check if there is anything more to prepend */ 413 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 414 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) { 415 htag++; 416 memset(&tshim, 0, sizeof(tshim)); 417 tshim.s_addr = ntohl(htag->s_addr); 418 tshim.shim.bos = tshim.shim.exp = 0; 419 tshim.shim.ttl = mpls_defttl; 420 if (tshim.shim.label != MPLS_LABEL_IMPLNULL && 421 ((m = mpls_prepend_shim(m, &tshim)) == NULL)) 422 return ENOBUFS; 423 psize += sizeof(tshim); 424 } 425 426 if (__predict_false(push_back_alert == true)) { 427 /* re-add the router alert label */ 428 memset(&tshim, 0, sizeof(tshim)); 429 tshim.s_addr = MPLS_LABEL_RTALERT; 430 tshim.shim.bos = tshim.shim.exp = 0; 431 tshim.shim.ttl = mpls_defttl; 432 if ((m = mpls_prepend_shim(m, &tshim)) == NULL) 433 return ENOBUFS; 434 } 435 436 error = mpls_send_frame(m, rt->rt_ifp, rt); 437 438 done: 439 if (error != 0 && m != NULL) 440 m_freem(m); 441 if (rt != NULL) 442 RTFREE(rt); 443 444 return error; 445 } 446 447 static int 448 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt) 449 { 450 union mpls_shim msh; 451 452 if ((rt->rt_flags & RTF_GATEWAY) == 0) 453 return EHOSTUNREACH; 454 455 rt->rt_use++; 456 457 msh.s_addr = MPLS_GETSADDR(rt); 458 if (msh.shim.label == MPLS_LABEL_IMPLNULL || 459 (m->m_flags & (M_MCAST | M_BCAST))) { 460 m_adj(m, sizeof(union mpls_shim)); 461 m->m_pkthdr.csum_flags = 0; 462 } 463 464 switch(ifp->if_type) { 465 /* only these are supported for now */ 466 case IFT_ETHER: 467 case IFT_TUNNEL: 468 case IFT_LOOP: 469 return (*ifp->if_output)(ifp, m, rt->rt_gateway, rt); 470 break; 471 default: 472 return ENETUNREACH; 473 } 474 return 0; 475 } 476 477 478 479 #ifdef INET 480 static int 481 mpls_unlabel_inet(struct mbuf *m) 482 { 483 int s, iphlen; 484 struct ip *iph; 485 union mpls_shim *ms; 486 struct ifqueue *inq; 487 488 if (mpls_mapttl_inet || mpls_mapprec_inet) { 489 490 /* get shim info */ 491 ms = mtod(m, union mpls_shim *); 492 ms->s_addr = ntohl(ms->s_addr); 493 494 /* and get rid of it */ 495 m_adj(m, sizeof(union mpls_shim)); 496 497 /* get ip header */ 498 if (m->m_len < sizeof (struct ip) && 499 (m = m_pullup(m, sizeof(struct ip))) == NULL) 500 return ENOBUFS; 501 iph = mtod(m, struct ip *); 502 iphlen = iph->ip_hl << 2; 503 504 /* get it all */ 505 if (m->m_len < iphlen) { 506 if ((m = m_pullup(m, iphlen)) == NULL) 507 return ENOBUFS; 508 iph = mtod(m, struct ip *); 509 } 510 511 /* check ipsum */ 512 if (in_cksum(m, iphlen) != 0) { 513 m_freem(m); 514 return EINVAL; 515 } 516 517 /* set IP ttl from MPLS ttl */ 518 if (mpls_mapttl_inet) 519 iph->ip_ttl = ms->shim.ttl; 520 521 /* set IP Precedence from MPLS Exp */ 522 if (mpls_mapprec_inet) { 523 iph->ip_tos = (iph->ip_tos << 3) >> 3; 524 iph->ip_tos |= ms->shim.exp << 5; 525 } 526 527 /* reset ipsum because we modified TTL and TOS */ 528 iph->ip_sum = 0; 529 iph->ip_sum = in_cksum(m, iphlen); 530 } else 531 m_adj(m, sizeof(union mpls_shim)); 532 533 /* Put it on IP queue */ 534 inq = &ipintrq; 535 s = splnet(); 536 if (IF_QFULL(inq)) { 537 IF_DROP(inq); 538 splx(s); 539 m_freem(m); 540 return ENOBUFS; 541 } 542 IF_ENQUEUE(inq, m); 543 splx(s); 544 schednetisr(NETISR_IP); 545 546 return 0; 547 } 548 549 /* 550 * Prepend MPLS label 551 */ 552 static struct mbuf * 553 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset) 554 { 555 struct ip iphdr; 556 557 if (mpls_mapttl_inet || mpls_mapprec_inet) { 558 if ((m->m_len < sizeof(struct ip)) && 559 (m = m_pullup(m, offset + sizeof(struct ip))) == 0) 560 return NULL; /* XXX */ 561 m_copydata(m, offset, sizeof(struct ip), &iphdr); 562 563 /* Map TTL */ 564 if (mpls_mapttl_inet) 565 ms->shim.ttl = iphdr.ip_ttl; 566 567 /* Copy IP precedence to EXP */ 568 if (mpls_mapprec_inet) 569 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5; 570 } 571 572 if ((m = mpls_prepend_shim(m, ms)) == NULL) 573 return NULL; 574 575 return m; 576 } 577 578 #endif /* INET */ 579 580 #ifdef INET6 581 582 static int 583 mpls_unlabel_inet6(struct mbuf *m) 584 { 585 struct ip6_hdr *ip6hdr; 586 union mpls_shim ms; 587 struct ifqueue *inq; 588 int s; 589 590 /* TODO: mapclass */ 591 if (mpls_mapttl_inet6) { 592 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 593 m_adj(m, sizeof(union mpls_shim)); 594 595 if (m->m_len < sizeof (struct ip6_hdr) && 596 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) 597 return ENOBUFS; 598 ip6hdr = mtod(m, struct ip6_hdr *); 599 600 /* Because we just decremented this in mpls_lse */ 601 ip6hdr->ip6_hlim = ms.shim.ttl + 1; 602 } else 603 m_adj(m, sizeof(union mpls_shim)); 604 605 /* Put it back on IPv6 stack */ 606 schednetisr(NETISR_IPV6); 607 inq = &ip6intrq; 608 s = splnet(); 609 if (IF_QFULL(inq)) { 610 IF_DROP(inq); 611 splx(s); 612 m_freem(m); 613 return ENOBUFS; 614 } 615 616 IF_ENQUEUE(inq, m); 617 splx(s); 618 619 return 0; 620 } 621 622 static struct mbuf * 623 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset) 624 { 625 struct ip6_hdr ip6h; 626 627 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) { 628 if (m->m_len < sizeof(struct ip6_hdr) && 629 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0) 630 return NULL; 631 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h); 632 633 if (mpls_mapttl_inet6) 634 ms->shim.ttl = ip6h.ip6_hlim; 635 636 if (mpls_mapclass_inet6) 637 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5; 638 } 639 640 if ((m = mpls_prepend_shim(m, ms)) == NULL) 641 return NULL; 642 643 return m; 644 } 645 646 #endif /* INET6 */ 647 648 static struct mbuf * 649 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms) 650 { 651 union mpls_shim *shim; 652 653 M_PREPEND(m, sizeof(*ms), M_DONTWAIT); 654 if (m == NULL) 655 return NULL; 656 657 if (m->m_len < sizeof(union mpls_shim) && 658 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) 659 return NULL; 660 661 shim = mtod(m, union mpls_shim *); 662 663 memcpy(shim, ms, sizeof(*shim)); 664 shim->s_addr = htonl(shim->s_addr); 665 666 return m; 667 } 668