1 /* $NetBSD: if_mpls.c,v 1.10 2013/07/23 11:11:55 kefren Exp $ */ 2 3 /* 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Mihai Chelaru <kefren@NetBSD.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.10 2013/07/23 11:11:55 kefren Exp $"); 34 35 #include "opt_inet.h" 36 #include "opt_mpls.h" 37 38 #include <sys/param.h> 39 40 #include <sys/errno.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/sysctl.h> 44 45 #include <net/bpf.h> 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/netisr.h> 49 #include <net/route.h> 50 51 #ifdef INET 52 #include <netinet/in.h> 53 #include <netinet/in_systm.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip.h> 56 #endif 57 58 #ifdef INET6 59 #include <netinet/ip6.h> 60 #include <netinet6/in6_var.h> 61 #include <netinet6/ip6_var.h> 62 #endif 63 64 #include <netmpls/mpls.h> 65 #include <netmpls/mpls_var.h> 66 67 #include "if_mpls.h" 68 69 void ifmplsattach(int); 70 71 static int mpls_clone_create(struct if_clone *, int); 72 static int mpls_clone_destroy(struct ifnet *); 73 74 static struct if_clone mpls_if_cloner = 75 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy); 76 77 78 static void mpls_input(struct ifnet *, struct mbuf *); 79 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *, 80 struct rtentry *); 81 static int mpls_ioctl(struct ifnet *, u_long, void *); 82 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *); 83 static int mpls_lse(struct mbuf *); 84 85 #ifdef INET 86 static int mpls_unlabel_inet(struct mbuf *); 87 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint); 88 #endif 89 90 #ifdef INET6 91 static int mpls_unlabel_inet6(struct mbuf *); 92 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint); 93 #endif 94 95 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *); 96 97 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond, 98 mpls_forwarding, mpls_accept, mpls_mapprec_inet, mpls_mapclass_inet6, 99 mpls_rfc4182; 100 101 /* ARGSUSED */ 102 void 103 ifmplsattach(int count) 104 { 105 if_clone_attach(&mpls_if_cloner); 106 } 107 108 static int 109 mpls_clone_create(struct if_clone *ifc, int unit) 110 { 111 struct mpls_softc *sc; 112 113 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 114 115 if_initname(&sc->sc_if, ifc->ifc_name, unit); 116 sc->sc_if.if_softc = sc; 117 sc->sc_if.if_type = IFT_MPLS; 118 sc->sc_if.if_addrlen = 0; 119 sc->sc_if.if_hdrlen = sizeof(union mpls_shim); 120 sc->sc_if.if_dlt = DLT_NULL; 121 sc->sc_if.if_mtu = 1500; 122 sc->sc_if.if_flags = 0; 123 sc->sc_if.if_input = mpls_input; 124 sc->sc_if.if_output = mpls_output; 125 sc->sc_if.if_ioctl = mpls_ioctl; 126 127 if_attach(&sc->sc_if); 128 if_alloc_sadl(&sc->sc_if); 129 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t)); 130 return 0; 131 } 132 133 static int 134 mpls_clone_destroy(struct ifnet *ifp) 135 { 136 int s; 137 138 bpf_detach(ifp); 139 140 s = splnet(); 141 if_detach(ifp); 142 splx(s); 143 144 free(ifp->if_softc, M_DEVBUF); 145 return 0; 146 } 147 148 static void 149 mpls_input(struct ifnet *ifp, struct mbuf *m) 150 { 151 #if 0 152 /* 153 * TODO - kefren 154 * I'd love to unshim the packet, guess family 155 * and pass it to bpf 156 */ 157 bpf_mtap_af(ifp, AF_MPLS, m); 158 #endif 159 160 mpls_lse(m); 161 } 162 163 void 164 mplsintr(void) 165 { 166 struct mbuf *m; 167 int s; 168 169 while (!IF_IS_EMPTY(&mplsintrq)) { 170 s = splnet(); 171 IF_DEQUEUE(&mplsintrq, m); 172 splx(s); 173 174 if (!m) 175 return; 176 177 if (((m->m_flags & M_PKTHDR) == 0) || 178 (m->m_pkthdr.rcvif == 0)) 179 panic("mplsintr(): no pkthdr or rcvif"); 180 181 #ifdef MBUFTRACE 182 m_claimm(m, &mpls_owner); 183 #endif 184 mpls_input(m->m_pkthdr.rcvif, m); 185 } 186 } 187 188 /* 189 * prepend shim and deliver 190 */ 191 static int 192 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt) 193 { 194 union mpls_shim mh, *pms; 195 struct rtentry *rt1; 196 int err; 197 uint psize = sizeof(struct sockaddr_mpls); 198 199 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { 200 m_freem(m); 201 return ENETDOWN; 202 } 203 204 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) { 205 m_freem(m); 206 return EINVAL; 207 } 208 209 bpf_mtap_af(ifp, dst->sa_family, m); 210 211 memset(&mh, 0, sizeof(mh)); 212 mh.s_addr = MPLS_GETSADDR(rt); 213 mh.shim.bos = 1; 214 mh.shim.exp = 0; 215 mh.shim.ttl = mpls_defttl; 216 217 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 218 219 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) { 220 pms++; 221 if (mh.shim.label != MPLS_LABEL_IMPLNULL && 222 ((m = mpls_prepend_shim(m, &mh)) == NULL)) 223 return ENOBUFS; 224 memset(&mh, 0, sizeof(mh)); 225 mh.s_addr = ntohl(pms->s_addr); 226 mh.shim.bos = mh.shim.exp = 0; 227 mh.shim.ttl = mpls_defttl; 228 psize += sizeof(mh); 229 } 230 231 switch(dst->sa_family) { 232 #ifdef INET 233 case AF_INET: 234 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls)); 235 break; 236 #endif 237 #ifdef INET6 238 case AF_INET6: 239 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls)); 240 break; 241 #endif 242 default: 243 m = mpls_prepend_shim(m, &mh); 244 break; 245 } 246 247 if (m == NULL) { 248 IF_DROP(&ifp->if_snd); 249 ifp->if_oerrors++; 250 return ENOBUFS; 251 } 252 253 ifp->if_opackets++; 254 ifp->if_obytes += m->m_pkthdr.len; 255 256 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) { 257 m_freem(m); 258 return EHOSTUNREACH; 259 } 260 261 err = mpls_send_frame(m, rt1->rt_ifp, rt); 262 RTFREE(rt1); 263 return err; 264 } 265 266 static int 267 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data) 268 { 269 int error = 0, s = splnet(); 270 struct ifreq *ifr = data; 271 272 switch(cmd) { 273 case SIOCINITIFADDR: 274 ifp->if_flags |= IFF_UP | IFF_RUNNING; 275 break; 276 case SIOCSIFMTU: 277 if (ifr != NULL && ifr->ifr_mtu < 576) { 278 error = EINVAL; 279 break; 280 } 281 /* FALLTHROUGH */ 282 case SIOCGIFMTU: 283 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 284 error = 0; 285 break; 286 case SIOCSIFFLAGS: 287 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 288 break; 289 if (ifp->if_flags & IFF_UP) 290 ifp->if_flags |= IFF_RUNNING; 291 break; 292 default: 293 error = ifioctl_common(ifp, cmd, data); 294 break; 295 } 296 splx(s); 297 return error; 298 } 299 300 /* 301 * MPLS Label Switch Engine 302 */ 303 static int 304 mpls_lse(struct mbuf *m) 305 { 306 struct sockaddr_mpls dst; 307 union mpls_shim tshim, *htag; 308 struct rtentry *rt = NULL; 309 int error = ENOBUFS; 310 uint psize = sizeof(struct sockaddr_mpls); 311 312 if (m->m_len < sizeof(union mpls_shim) && 313 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) 314 goto done; 315 316 dst.smpls_len = sizeof(struct sockaddr_mpls); 317 dst.smpls_family = AF_MPLS; 318 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 319 320 /* Check if we're accepting MPLS Frames */ 321 error = EINVAL; 322 if (!mpls_accept) 323 goto done; 324 325 /* TTL decrement */ 326 if ((m = mpls_ttl_dec(m)) == NULL) 327 goto done; 328 329 /* RFC 4182 */ 330 if (mpls_rfc4182 != 0) 331 while((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL || 332 dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) && 333 __predict_false(dst.smpls_addr.shim.bos == 0)) { 334 m_adj(m, sizeof(union mpls_shim)); 335 if (m->m_len < sizeof(union mpls_shim) && 336 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) 337 goto done; 338 dst.smpls_addr.s_addr = 339 ntohl(mtod(m, union mpls_shim *)->s_addr); 340 } 341 342 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) { 343 /* Don't swap reserved labels */ 344 switch (dst.smpls_addr.shim.label) { 345 #ifdef INET 346 case MPLS_LABEL_IPV4NULL: 347 /* Pop shim and push mbuf to IP stack */ 348 if (dst.smpls_addr.shim.bos) 349 error = mpls_unlabel_inet(m); 350 break; 351 #endif 352 #ifdef INET6 353 case MPLS_LABEL_IPV6NULL: 354 /* Pop shim and push mbuf to IPv6 stack */ 355 if (dst.smpls_addr.shim.bos) 356 error = mpls_unlabel_inet6(m); 357 break; 358 #endif 359 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */ 360 case MPLS_LABEL_IMPLNULL: /* This is logical only */ 361 default: /* Rest are not allowed */ 362 break; 363 } 364 goto done; 365 } 366 367 /* Check if we should do MPLS forwarding */ 368 error = EHOSTUNREACH; 369 if (!mpls_forwarding) 370 goto done; 371 372 /* Get a route to dst */ 373 dst.smpls_addr.shim.ttl = 374 dst.smpls_addr.shim.bos = 375 dst.smpls_addr.shim.exp = 0; 376 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr); 377 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL) 378 goto done; 379 380 /* MPLS packet with no MPLS tagged route ? */ 381 if ((rt->rt_flags & RTF_GATEWAY) == 0 || 382 rt_gettag(rt) == NULL || 383 rt_gettag(rt)->sa_family != AF_MPLS) 384 goto done; 385 386 tshim.s_addr = MPLS_GETSADDR(rt); 387 388 /* Swap labels */ 389 if ((m->m_len < sizeof(union mpls_shim)) && 390 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) { 391 error = ENOBUFS; 392 goto done; 393 } 394 395 /* Replace only the label */ 396 htag = mtod(m, union mpls_shim *); 397 htag->s_addr = ntohl(htag->s_addr); 398 htag->shim.label = tshim.shim.label; 399 htag->s_addr = htonl(htag->s_addr); 400 401 /* check if there is anything more to prepend */ 402 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 403 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) { 404 htag++; 405 memset(&tshim, 0, sizeof(tshim)); 406 tshim.s_addr = ntohl(htag->s_addr); 407 tshim.shim.bos = tshim.shim.exp = 0; 408 tshim.shim.ttl = mpls_defttl; 409 if (tshim.shim.label != MPLS_LABEL_IMPLNULL && 410 ((m = mpls_prepend_shim(m, &tshim)) == NULL)) 411 return ENOBUFS; 412 psize += sizeof(tshim); 413 } 414 415 error = mpls_send_frame(m, rt->rt_ifp, rt); 416 417 done: 418 if (error != 0 && m != NULL) 419 m_freem(m); 420 if (rt != NULL) 421 RTFREE(rt); 422 423 return error; 424 } 425 426 static int 427 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt) 428 { 429 union mpls_shim msh; 430 431 if ((rt->rt_flags & RTF_GATEWAY) == 0) 432 return EHOSTUNREACH; 433 434 rt->rt_use++; 435 436 msh.s_addr = MPLS_GETSADDR(rt); 437 if (msh.shim.label == MPLS_LABEL_IMPLNULL || 438 (m->m_flags & (M_MCAST | M_BCAST))) { 439 m_adj(m, sizeof(union mpls_shim)); 440 m->m_pkthdr.csum_flags = 0; 441 } 442 443 switch(ifp->if_type) { 444 /* only these are supported for now */ 445 case IFT_ETHER: 446 case IFT_TUNNEL: 447 case IFT_LOOP: 448 return (*ifp->if_output)(ifp, m, rt->rt_gateway, rt); 449 break; 450 default: 451 return ENETUNREACH; 452 } 453 return 0; 454 } 455 456 457 458 #ifdef INET 459 static int 460 mpls_unlabel_inet(struct mbuf *m) 461 { 462 int s, iphlen; 463 struct ip *iph; 464 union mpls_shim *ms; 465 struct ifqueue *inq; 466 467 if (mpls_mapttl_inet || mpls_mapprec_inet) { 468 469 /* get shim info */ 470 ms = mtod(m, union mpls_shim *); 471 ms->s_addr = ntohl(ms->s_addr); 472 473 /* and get rid of it */ 474 m_adj(m, sizeof(union mpls_shim)); 475 476 /* get ip header */ 477 if (m->m_len < sizeof (struct ip) && 478 (m = m_pullup(m, sizeof(struct ip))) == NULL) 479 return ENOBUFS; 480 iph = mtod(m, struct ip *); 481 iphlen = iph->ip_hl << 2; 482 483 /* get it all */ 484 if (m->m_len < iphlen) { 485 if ((m = m_pullup(m, iphlen)) == NULL) 486 return ENOBUFS; 487 iph = mtod(m, struct ip *); 488 } 489 490 /* check ipsum */ 491 if (in_cksum(m, iphlen) != 0) { 492 m_freem(m); 493 return EINVAL; 494 } 495 496 /* set IP ttl from MPLS ttl */ 497 if (mpls_mapttl_inet) 498 iph->ip_ttl = ms->shim.ttl; 499 500 /* set IP Precedence from MPLS Exp */ 501 if (mpls_mapprec_inet) { 502 iph->ip_tos = (iph->ip_tos << 3) >> 3; 503 iph->ip_tos |= ms->shim.exp << 5; 504 } 505 506 /* reset ipsum because we modified TTL and TOS */ 507 iph->ip_sum = 0; 508 iph->ip_sum = in_cksum(m, iphlen); 509 } else 510 m_adj(m, sizeof(union mpls_shim)); 511 512 /* Put it on IP queue */ 513 inq = &ipintrq; 514 s = splnet(); 515 if (IF_QFULL(inq)) { 516 IF_DROP(inq); 517 splx(s); 518 m_freem(m); 519 return ENOBUFS; 520 } 521 IF_ENQUEUE(inq, m); 522 splx(s); 523 schednetisr(NETISR_IP); 524 525 return 0; 526 } 527 528 /* 529 * Prepend MPLS label 530 */ 531 static struct mbuf * 532 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset) 533 { 534 struct ip iphdr; 535 536 if (mpls_mapttl_inet || mpls_mapprec_inet) { 537 if ((m->m_len < sizeof(struct ip)) && 538 (m = m_pullup(m, offset + sizeof(struct ip))) == 0) 539 return NULL; /* XXX */ 540 m_copydata(m, offset, sizeof(struct ip), &iphdr); 541 542 /* Map TTL */ 543 if (mpls_mapttl_inet) 544 ms->shim.ttl = iphdr.ip_ttl; 545 546 /* Copy IP precedence to EXP */ 547 if (mpls_mapprec_inet) 548 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5; 549 } 550 551 if ((m = mpls_prepend_shim(m, ms)) == NULL) 552 return NULL; 553 554 return m; 555 } 556 557 #endif /* INET */ 558 559 #ifdef INET6 560 561 static int 562 mpls_unlabel_inet6(struct mbuf *m) 563 { 564 struct ip6_hdr *ip6hdr; 565 union mpls_shim ms; 566 struct ifqueue *inq; 567 int s; 568 569 /* TODO: mapclass */ 570 if (mpls_mapttl_inet6) { 571 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 572 m_adj(m, sizeof(union mpls_shim)); 573 574 if (m->m_len < sizeof (struct ip6_hdr) && 575 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) 576 return ENOBUFS; 577 ip6hdr = mtod(m, struct ip6_hdr *); 578 579 /* Because we just decremented this in mpls_lse */ 580 ip6hdr->ip6_hlim = ms.shim.ttl + 1; 581 } else 582 m_adj(m, sizeof(union mpls_shim)); 583 584 /* Put it back on IPv6 stack */ 585 schednetisr(NETISR_IPV6); 586 inq = &ip6intrq; 587 s = splnet(); 588 if (IF_QFULL(inq)) { 589 IF_DROP(inq); 590 splx(s); 591 m_freem(m); 592 return ENOBUFS; 593 } 594 595 IF_ENQUEUE(inq, m); 596 splx(s); 597 598 return 0; 599 } 600 601 static struct mbuf * 602 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset) 603 { 604 struct ip6_hdr ip6h; 605 606 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) { 607 if (m->m_len < sizeof(struct ip6_hdr) && 608 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0) 609 return NULL; 610 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h); 611 612 if (mpls_mapttl_inet6) 613 ms->shim.ttl = ip6h.ip6_hlim; 614 615 if (mpls_mapclass_inet6) 616 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5; 617 } 618 619 if ((m = mpls_prepend_shim(m, ms)) == NULL) 620 return NULL; 621 622 return m; 623 } 624 625 #endif /* INET6 */ 626 627 static struct mbuf * 628 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms) 629 { 630 union mpls_shim *shim; 631 632 M_PREPEND(m, sizeof(*ms), M_DONTWAIT); 633 if (m == NULL) 634 return NULL; 635 636 if (m->m_len < sizeof(union mpls_shim) && 637 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) 638 return NULL; 639 640 shim = mtod(m, union mpls_shim *); 641 642 memcpy(shim, ms, sizeof(*shim)); 643 shim->s_addr = htonl(shim->s_addr); 644 645 return m; 646 } 647