1 /* $NetBSD: if_mpls.c,v 1.17 2015/06/04 09:19:59 ozaki-r Exp $ */ 2 3 /* 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Mihai Chelaru <kefren@NetBSD.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.17 2015/06/04 09:19:59 ozaki-r Exp $"); 34 35 #include "opt_inet.h" 36 #include "opt_mpls.h" 37 38 #include <sys/param.h> 39 40 #include <sys/errno.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/sysctl.h> 44 45 #include <net/bpf.h> 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/netisr.h> 49 #include <net/route.h> 50 51 #ifdef INET 52 #include <netinet/in.h> 53 #include <netinet/in_systm.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip.h> 56 #include <netinet/ip_var.h> 57 #endif 58 59 #ifdef INET6 60 #include <netinet/ip6.h> 61 #include <netinet6/in6_var.h> 62 #include <netinet6/ip6_var.h> 63 #endif 64 65 #include <netmpls/mpls.h> 66 #include <netmpls/mpls_var.h> 67 68 #include "if_mpls.h" 69 70 #define TRIM_LABEL do { \ 71 m_adj(m, sizeof(union mpls_shim)); \ 72 if (m->m_len < sizeof(union mpls_shim) && \ 73 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) \ 74 goto done; \ 75 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); \ 76 } while (/* CONSTCOND */ 0) 77 78 79 void ifmplsattach(int); 80 81 static int mpls_clone_create(struct if_clone *, int); 82 static int mpls_clone_destroy(struct ifnet *); 83 84 static struct if_clone mpls_if_cloner = 85 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy); 86 87 88 static void mpls_input(struct ifnet *, struct mbuf *); 89 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *, 90 struct rtentry *); 91 static int mpls_ioctl(struct ifnet *, u_long, void *); 92 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *); 93 static int mpls_lse(struct mbuf *); 94 95 #ifdef INET 96 static int mpls_unlabel_inet(struct mbuf *); 97 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint); 98 #endif 99 100 #ifdef INET6 101 static int mpls_unlabel_inet6(struct mbuf *); 102 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint); 103 #endif 104 105 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *); 106 107 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond, 108 mpls_forwarding, mpls_frame_accept, mpls_mapprec_inet, mpls_mapclass_inet6, 109 mpls_rfc4182; 110 111 /* ARGSUSED */ 112 void 113 ifmplsattach(int count) 114 { 115 if_clone_attach(&mpls_if_cloner); 116 } 117 118 static int 119 mpls_clone_create(struct if_clone *ifc, int unit) 120 { 121 struct mpls_softc *sc; 122 123 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 124 125 if_initname(&sc->sc_if, ifc->ifc_name, unit); 126 sc->sc_if.if_softc = sc; 127 sc->sc_if.if_type = IFT_MPLS; 128 sc->sc_if.if_addrlen = 0; 129 sc->sc_if.if_hdrlen = sizeof(union mpls_shim); 130 sc->sc_if.if_dlt = DLT_NULL; 131 sc->sc_if.if_mtu = 1500; 132 sc->sc_if.if_flags = 0; 133 sc->sc_if.if_input = mpls_input; 134 sc->sc_if.if_output = mpls_output; 135 sc->sc_if.if_ioctl = mpls_ioctl; 136 137 if_attach(&sc->sc_if); 138 if_alloc_sadl(&sc->sc_if); 139 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t)); 140 return 0; 141 } 142 143 static int 144 mpls_clone_destroy(struct ifnet *ifp) 145 { 146 int s; 147 148 bpf_detach(ifp); 149 150 s = splnet(); 151 if_detach(ifp); 152 splx(s); 153 154 free(ifp->if_softc, M_DEVBUF); 155 return 0; 156 } 157 158 static void 159 mpls_input(struct ifnet *ifp, struct mbuf *m) 160 { 161 #if 0 162 /* 163 * TODO - kefren 164 * I'd love to unshim the packet, guess family 165 * and pass it to bpf 166 */ 167 bpf_mtap_af(ifp, AF_MPLS, m); 168 #endif 169 170 mpls_lse(m); 171 } 172 173 void 174 mplsintr(void) 175 { 176 struct mbuf *m; 177 int s; 178 179 while (!IF_IS_EMPTY(&mplsintrq)) { 180 s = splnet(); 181 IF_DEQUEUE(&mplsintrq, m); 182 splx(s); 183 184 if (!m) 185 return; 186 187 if (((m->m_flags & M_PKTHDR) == 0) || 188 (m->m_pkthdr.rcvif == 0)) 189 panic("mplsintr(): no pkthdr or rcvif"); 190 191 #ifdef MBUFTRACE 192 m_claimm(m, &mpls_owner); 193 #endif 194 mpls_input(m->m_pkthdr.rcvif, m); 195 } 196 } 197 198 /* 199 * prepend shim and deliver 200 */ 201 static int 202 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt) 203 { 204 union mpls_shim mh, *pms; 205 struct rtentry *rt1; 206 int err; 207 uint psize = sizeof(struct sockaddr_mpls); 208 209 KASSERT(KERNEL_LOCKED_P()); 210 211 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { 212 m_freem(m); 213 return ENETDOWN; 214 } 215 216 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) { 217 m_freem(m); 218 return EINVAL; 219 } 220 221 bpf_mtap_af(ifp, dst->sa_family, m); 222 223 memset(&mh, 0, sizeof(mh)); 224 mh.s_addr = MPLS_GETSADDR(rt); 225 mh.shim.bos = 1; 226 mh.shim.exp = 0; 227 mh.shim.ttl = mpls_defttl; 228 229 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 230 231 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) { 232 pms++; 233 if (mh.shim.label != MPLS_LABEL_IMPLNULL && 234 ((m = mpls_prepend_shim(m, &mh)) == NULL)) 235 return ENOBUFS; 236 memset(&mh, 0, sizeof(mh)); 237 mh.s_addr = ntohl(pms->s_addr); 238 mh.shim.bos = mh.shim.exp = 0; 239 mh.shim.ttl = mpls_defttl; 240 psize += sizeof(mh); 241 } 242 243 switch(dst->sa_family) { 244 #ifdef INET 245 case AF_INET: 246 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls)); 247 break; 248 #endif 249 #ifdef INET6 250 case AF_INET6: 251 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls)); 252 break; 253 #endif 254 default: 255 m = mpls_prepend_shim(m, &mh); 256 break; 257 } 258 259 if (m == NULL) { 260 IF_DROP(&ifp->if_snd); 261 ifp->if_oerrors++; 262 return ENOBUFS; 263 } 264 265 ifp->if_opackets++; 266 ifp->if_obytes += m->m_pkthdr.len; 267 268 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) { 269 m_freem(m); 270 return EHOSTUNREACH; 271 } 272 273 err = mpls_send_frame(m, rt1->rt_ifp, rt); 274 rtfree(rt1); 275 return err; 276 } 277 278 static int 279 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data) 280 { 281 int error = 0, s = splnet(); 282 struct ifreq *ifr = data; 283 284 switch(cmd) { 285 case SIOCINITIFADDR: 286 ifp->if_flags |= IFF_UP | IFF_RUNNING; 287 break; 288 case SIOCSIFMTU: 289 if (ifr != NULL && ifr->ifr_mtu < 576) { 290 error = EINVAL; 291 break; 292 } 293 /* FALLTHROUGH */ 294 case SIOCGIFMTU: 295 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 296 error = 0; 297 break; 298 case SIOCSIFFLAGS: 299 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 300 break; 301 if (ifp->if_flags & IFF_UP) 302 ifp->if_flags |= IFF_RUNNING; 303 break; 304 default: 305 error = ifioctl_common(ifp, cmd, data); 306 break; 307 } 308 splx(s); 309 return error; 310 } 311 312 /* 313 * MPLS Label Switch Engine 314 */ 315 static int 316 mpls_lse(struct mbuf *m) 317 { 318 struct sockaddr_mpls dst; 319 union mpls_shim tshim, *htag; 320 struct rtentry *rt = NULL; 321 int error = ENOBUFS; 322 uint psize = sizeof(struct sockaddr_mpls); 323 bool push_back_alert = false; 324 325 if (m->m_len < sizeof(union mpls_shim) && 326 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) 327 goto done; 328 329 dst.smpls_len = sizeof(struct sockaddr_mpls); 330 dst.smpls_family = AF_MPLS; 331 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 332 333 /* Check if we're accepting MPLS Frames */ 334 error = EINVAL; 335 if (!mpls_frame_accept) 336 goto done; 337 338 /* TTL decrement */ 339 if ((m = mpls_ttl_dec(m)) == NULL) 340 goto done; 341 342 /* RFC 4182 */ 343 if (mpls_rfc4182 != 0) 344 while((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL || 345 dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) && 346 __predict_false(dst.smpls_addr.shim.bos == 0)) 347 TRIM_LABEL; 348 349 /* RFC 3032 Section 2.1 Page 4 */ 350 if (__predict_false(dst.smpls_addr.shim.label == MPLS_LABEL_RTALERT) && 351 dst.smpls_addr.shim.bos == 0) { 352 TRIM_LABEL; 353 push_back_alert = true; 354 } 355 356 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) { 357 /* Don't swap reserved labels */ 358 switch (dst.smpls_addr.shim.label) { 359 #ifdef INET 360 case MPLS_LABEL_IPV4NULL: 361 /* Pop shim and push mbuf to IP stack */ 362 if (dst.smpls_addr.shim.bos) 363 error = mpls_unlabel_inet(m); 364 break; 365 #endif 366 #ifdef INET6 367 case MPLS_LABEL_IPV6NULL: 368 /* Pop shim and push mbuf to IPv6 stack */ 369 if (dst.smpls_addr.shim.bos) 370 error = mpls_unlabel_inet6(m); 371 break; 372 #endif 373 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */ 374 case MPLS_LABEL_IMPLNULL: /* This is logical only */ 375 default: /* Rest are not allowed */ 376 break; 377 } 378 goto done; 379 } 380 381 /* Check if we should do MPLS forwarding */ 382 error = EHOSTUNREACH; 383 if (!mpls_forwarding) 384 goto done; 385 386 /* Get a route to dst */ 387 dst.smpls_addr.shim.ttl = 388 dst.smpls_addr.shim.bos = 389 dst.smpls_addr.shim.exp = 0; 390 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr); 391 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL) 392 goto done; 393 394 /* MPLS packet with no MPLS tagged route ? */ 395 if ((rt->rt_flags & RTF_GATEWAY) == 0 || 396 rt_gettag(rt) == NULL || 397 rt_gettag(rt)->sa_family != AF_MPLS) 398 goto done; 399 400 tshim.s_addr = MPLS_GETSADDR(rt); 401 402 /* Swap labels */ 403 if ((m->m_len < sizeof(union mpls_shim)) && 404 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) { 405 error = ENOBUFS; 406 goto done; 407 } 408 409 /* Replace only the label */ 410 htag = mtod(m, union mpls_shim *); 411 htag->s_addr = ntohl(htag->s_addr); 412 htag->shim.label = tshim.shim.label; 413 htag->s_addr = htonl(htag->s_addr); 414 415 /* check if there is anything more to prepend */ 416 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 417 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) { 418 htag++; 419 memset(&tshim, 0, sizeof(tshim)); 420 tshim.s_addr = ntohl(htag->s_addr); 421 tshim.shim.bos = tshim.shim.exp = 0; 422 tshim.shim.ttl = mpls_defttl; 423 if (tshim.shim.label != MPLS_LABEL_IMPLNULL && 424 ((m = mpls_prepend_shim(m, &tshim)) == NULL)) 425 return ENOBUFS; 426 psize += sizeof(tshim); 427 } 428 429 if (__predict_false(push_back_alert == true)) { 430 /* re-add the router alert label */ 431 memset(&tshim, 0, sizeof(tshim)); 432 tshim.s_addr = MPLS_LABEL_RTALERT; 433 tshim.shim.bos = tshim.shim.exp = 0; 434 tshim.shim.ttl = mpls_defttl; 435 if ((m = mpls_prepend_shim(m, &tshim)) == NULL) 436 return ENOBUFS; 437 } 438 439 error = mpls_send_frame(m, rt->rt_ifp, rt); 440 441 done: 442 if (error != 0 && m != NULL) 443 m_freem(m); 444 if (rt != NULL) 445 rtfree(rt); 446 447 return error; 448 } 449 450 static int 451 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt) 452 { 453 union mpls_shim msh; 454 int ret; 455 456 if ((rt->rt_flags & RTF_GATEWAY) == 0) 457 return EHOSTUNREACH; 458 459 rt->rt_use++; 460 461 msh.s_addr = MPLS_GETSADDR(rt); 462 if (msh.shim.label == MPLS_LABEL_IMPLNULL || 463 (m->m_flags & (M_MCAST | M_BCAST))) { 464 m_adj(m, sizeof(union mpls_shim)); 465 m->m_pkthdr.csum_flags = 0; 466 } 467 468 switch(ifp->if_type) { 469 /* only these are supported for now */ 470 case IFT_ETHER: 471 case IFT_TUNNEL: 472 case IFT_LOOP: 473 #ifdef INET 474 ret = ip_hresolv_output(ifp, m, rt->rt_gateway, rt); 475 #else 476 KERNEL_LOCK(1, NULL); 477 ret = (*ifp->if_output)(ifp, m, rt->rt_gateway, rt); 478 KERNEL_UNLOCK_ONE(NULL); 479 #endif 480 return ret; 481 break; 482 default: 483 return ENETUNREACH; 484 } 485 return 0; 486 } 487 488 489 490 #ifdef INET 491 static int 492 mpls_unlabel_inet(struct mbuf *m) 493 { 494 struct ip *iph; 495 union mpls_shim *ms; 496 int iphlen; 497 498 if (mpls_mapttl_inet || mpls_mapprec_inet) { 499 500 /* get shim info */ 501 ms = mtod(m, union mpls_shim *); 502 ms->s_addr = ntohl(ms->s_addr); 503 504 /* and get rid of it */ 505 m_adj(m, sizeof(union mpls_shim)); 506 507 /* get ip header */ 508 if (m->m_len < sizeof (struct ip) && 509 (m = m_pullup(m, sizeof(struct ip))) == NULL) 510 return ENOBUFS; 511 iph = mtod(m, struct ip *); 512 iphlen = iph->ip_hl << 2; 513 514 /* get it all */ 515 if (m->m_len < iphlen) { 516 if ((m = m_pullup(m, iphlen)) == NULL) 517 return ENOBUFS; 518 iph = mtod(m, struct ip *); 519 } 520 521 /* check ipsum */ 522 if (in_cksum(m, iphlen) != 0) { 523 m_freem(m); 524 return EINVAL; 525 } 526 527 /* set IP ttl from MPLS ttl */ 528 if (mpls_mapttl_inet) 529 iph->ip_ttl = ms->shim.ttl; 530 531 /* set IP Precedence from MPLS Exp */ 532 if (mpls_mapprec_inet) { 533 iph->ip_tos = (iph->ip_tos << 3) >> 3; 534 iph->ip_tos |= ms->shim.exp << 5; 535 } 536 537 /* reset ipsum because we modified TTL and TOS */ 538 iph->ip_sum = 0; 539 iph->ip_sum = in_cksum(m, iphlen); 540 } else 541 m_adj(m, sizeof(union mpls_shim)); 542 543 /* Put it on IP queue */ 544 if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) { 545 m_freem(m); 546 return ENOBUFS; 547 } 548 return 0; 549 } 550 551 /* 552 * Prepend MPLS label 553 */ 554 static struct mbuf * 555 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset) 556 { 557 struct ip iphdr; 558 559 if (mpls_mapttl_inet || mpls_mapprec_inet) { 560 if ((m->m_len < sizeof(struct ip)) && 561 (m = m_pullup(m, offset + sizeof(struct ip))) == 0) 562 return NULL; /* XXX */ 563 m_copydata(m, offset, sizeof(struct ip), &iphdr); 564 565 /* Map TTL */ 566 if (mpls_mapttl_inet) 567 ms->shim.ttl = iphdr.ip_ttl; 568 569 /* Copy IP precedence to EXP */ 570 if (mpls_mapprec_inet) 571 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5; 572 } 573 574 if ((m = mpls_prepend_shim(m, ms)) == NULL) 575 return NULL; 576 577 return m; 578 } 579 580 #endif /* INET */ 581 582 #ifdef INET6 583 584 static int 585 mpls_unlabel_inet6(struct mbuf *m) 586 { 587 struct ip6_hdr *ip6hdr; 588 union mpls_shim ms; 589 590 /* TODO: mapclass */ 591 if (mpls_mapttl_inet6) { 592 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 593 m_adj(m, sizeof(union mpls_shim)); 594 595 if (m->m_len < sizeof (struct ip6_hdr) && 596 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) 597 return ENOBUFS; 598 ip6hdr = mtod(m, struct ip6_hdr *); 599 600 /* Because we just decremented this in mpls_lse */ 601 ip6hdr->ip6_hlim = ms.shim.ttl + 1; 602 } else 603 m_adj(m, sizeof(union mpls_shim)); 604 605 /* Put it back on IPv6 queue. */ 606 if (__predict_false(!pktq_enqueue(ip6_pktq, m, 0))) { 607 m_freem(m); 608 return ENOBUFS; 609 } 610 return 0; 611 } 612 613 static struct mbuf * 614 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset) 615 { 616 struct ip6_hdr ip6h; 617 618 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) { 619 if (m->m_len < sizeof(struct ip6_hdr) && 620 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0) 621 return NULL; 622 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h); 623 624 if (mpls_mapttl_inet6) 625 ms->shim.ttl = ip6h.ip6_hlim; 626 627 if (mpls_mapclass_inet6) 628 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5; 629 } 630 631 if ((m = mpls_prepend_shim(m, ms)) == NULL) 632 return NULL; 633 634 return m; 635 } 636 637 #endif /* INET6 */ 638 639 static struct mbuf * 640 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms) 641 { 642 union mpls_shim *shim; 643 644 M_PREPEND(m, sizeof(*ms), M_DONTWAIT); 645 if (m == NULL) 646 return NULL; 647 648 if (m->m_len < sizeof(union mpls_shim) && 649 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) 650 return NULL; 651 652 shim = mtod(m, union mpls_shim *); 653 654 memcpy(shim, ms, sizeof(*shim)); 655 shim->s_addr = htonl(shim->s_addr); 656 657 return m; 658 } 659