xref: /netbsd-src/sys/net/if_mpls.c (revision 975a152cfcdb39ae6e496af647af0c7275ca0b61)
1 /*	$NetBSD: if_mpls.c,v 1.10 2013/07/23 11:11:55 kefren Exp $ */
2 
3 /*
4  * Copyright (c) 2010 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Mihai Chelaru <kefren@NetBSD.org>
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.10 2013/07/23 11:11:55 kefren Exp $");
34 
35 #include "opt_inet.h"
36 #include "opt_mpls.h"
37 
38 #include <sys/param.h>
39 
40 #include <sys/errno.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/sysctl.h>
44 
45 #include <net/bpf.h>
46 #include <net/if.h>
47 #include <net/if_types.h>
48 #include <net/netisr.h>
49 #include <net/route.h>
50 
51 #ifdef INET
52 #include <netinet/in.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip.h>
56 #endif
57 
58 #ifdef INET6
59 #include <netinet/ip6.h>
60 #include <netinet6/in6_var.h>
61 #include <netinet6/ip6_var.h>
62 #endif
63 
64 #include <netmpls/mpls.h>
65 #include <netmpls/mpls_var.h>
66 
67 #include "if_mpls.h"
68 
69 void ifmplsattach(int);
70 
71 static int mpls_clone_create(struct if_clone *, int);
72 static int mpls_clone_destroy(struct ifnet *);
73 
74 static struct if_clone mpls_if_cloner =
75 	IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy);
76 
77 
78 static void mpls_input(struct ifnet *, struct mbuf *);
79 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
80 	struct rtentry *);
81 static int mpls_ioctl(struct ifnet *, u_long, void *);
82 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *);
83 static int mpls_lse(struct mbuf *);
84 
85 #ifdef INET
86 static int mpls_unlabel_inet(struct mbuf *);
87 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint);
88 #endif
89 
90 #ifdef INET6
91 static int mpls_unlabel_inet6(struct mbuf *);
92 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint);
93 #endif
94 
95 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *);
96 
97 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond,
98 	mpls_forwarding, mpls_accept, mpls_mapprec_inet, mpls_mapclass_inet6,
99 	mpls_rfc4182;
100 
101 /* ARGSUSED */
102 void
103 ifmplsattach(int count)
104 {
105 	if_clone_attach(&mpls_if_cloner);
106 }
107 
108 static int
109 mpls_clone_create(struct if_clone *ifc, int unit)
110 {
111 	struct mpls_softc *sc;
112 
113 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
114 
115 	if_initname(&sc->sc_if, ifc->ifc_name, unit);
116 	sc->sc_if.if_softc = sc;
117 	sc->sc_if.if_type = IFT_MPLS;
118 	sc->sc_if.if_addrlen = 0;
119 	sc->sc_if.if_hdrlen = sizeof(union mpls_shim);
120 	sc->sc_if.if_dlt = DLT_NULL;
121 	sc->sc_if.if_mtu = 1500;
122 	sc->sc_if.if_flags = 0;
123 	sc->sc_if.if_input = mpls_input;
124 	sc->sc_if.if_output = mpls_output;
125 	sc->sc_if.if_ioctl = mpls_ioctl;
126 
127 	if_attach(&sc->sc_if);
128 	if_alloc_sadl(&sc->sc_if);
129 	bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t));
130 	return 0;
131 }
132 
133 static int
134 mpls_clone_destroy(struct ifnet *ifp)
135 {
136 	int s;
137 
138 	bpf_detach(ifp);
139 
140 	s = splnet();
141 	if_detach(ifp);
142 	splx(s);
143 
144 	free(ifp->if_softc, M_DEVBUF);
145 	return 0;
146 }
147 
148 static void
149 mpls_input(struct ifnet *ifp, struct mbuf *m)
150 {
151 #if 0
152 	/*
153 	 * TODO - kefren
154 	 * I'd love to unshim the packet, guess family
155 	 * and pass it to bpf
156 	 */
157 	bpf_mtap_af(ifp, AF_MPLS, m);
158 #endif
159 
160 	mpls_lse(m);
161 }
162 
163 void
164 mplsintr(void)
165 {
166 	struct mbuf *m;
167 	int s;
168 
169 	while (!IF_IS_EMPTY(&mplsintrq)) {
170 		s = splnet();
171 		IF_DEQUEUE(&mplsintrq, m);
172 		splx(s);
173 
174 		if (!m)
175 			return;
176 
177 		if (((m->m_flags & M_PKTHDR) == 0) ||
178 		    (m->m_pkthdr.rcvif == 0))
179 			panic("mplsintr(): no pkthdr or rcvif");
180 
181 #ifdef MBUFTRACE
182 		m_claimm(m, &mpls_owner);
183 #endif
184 		mpls_input(m->m_pkthdr.rcvif, m);
185 	}
186 }
187 
188 /*
189  * prepend shim and deliver
190  */
191 static int
192 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt)
193 {
194 	union mpls_shim mh, *pms;
195 	struct rtentry *rt1;
196 	int err;
197 	uint psize = sizeof(struct sockaddr_mpls);
198 
199 	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
200 		m_freem(m);
201 		return ENETDOWN;
202 	}
203 
204 	if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) {
205 		m_freem(m);
206 		return EINVAL;
207 	}
208 
209 	bpf_mtap_af(ifp, dst->sa_family, m);
210 
211 	memset(&mh, 0, sizeof(mh));
212 	mh.s_addr = MPLS_GETSADDR(rt);
213 	mh.shim.bos = 1;
214 	mh.shim.exp = 0;
215 	mh.shim.ttl = mpls_defttl;
216 
217 	pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
218 
219 	while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) {
220 		pms++;
221 		if (mh.shim.label != MPLS_LABEL_IMPLNULL &&
222 		    ((m = mpls_prepend_shim(m, &mh)) == NULL))
223 			return ENOBUFS;
224 		memset(&mh, 0, sizeof(mh));
225 		mh.s_addr = ntohl(pms->s_addr);
226 		mh.shim.bos = mh.shim.exp = 0;
227 		mh.shim.ttl = mpls_defttl;
228 		psize += sizeof(mh);
229 	}
230 
231 	switch(dst->sa_family) {
232 #ifdef INET
233 	case AF_INET:
234 		m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls));
235 		break;
236 #endif
237 #ifdef INET6
238 	case AF_INET6:
239 		m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls));
240 		break;
241 #endif
242 	default:
243 		m = mpls_prepend_shim(m, &mh);
244 		break;
245 	}
246 
247 	if (m == NULL) {
248 		IF_DROP(&ifp->if_snd);
249 		ifp->if_oerrors++;
250 		return ENOBUFS;
251 	}
252 
253 	ifp->if_opackets++;
254 	ifp->if_obytes += m->m_pkthdr.len;
255 
256 	if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) {
257 		m_freem(m);
258 		return EHOSTUNREACH;
259 	}
260 
261 	err = mpls_send_frame(m, rt1->rt_ifp, rt);
262 	RTFREE(rt1);
263 	return err;
264 }
265 
266 static int
267 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data)
268 {
269 	int error = 0, s = splnet();
270 	struct ifreq *ifr = data;
271 
272 	switch(cmd) {
273 	case SIOCINITIFADDR:
274 		ifp->if_flags |= IFF_UP | IFF_RUNNING;
275 		break;
276 	case SIOCSIFMTU:
277 		if (ifr != NULL && ifr->ifr_mtu < 576) {
278 			error = EINVAL;
279 			break;
280 		}
281 		/* FALLTHROUGH */
282 	case SIOCGIFMTU:
283 		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
284 			error = 0;
285 		break;
286 	case SIOCSIFFLAGS:
287 		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
288 			break;
289 		if (ifp->if_flags & IFF_UP)
290 			ifp->if_flags |= IFF_RUNNING;
291 		break;
292 	default:
293 		error = ifioctl_common(ifp, cmd, data);
294 		break;
295 	}
296 	splx(s);
297 	return error;
298 }
299 
300 /*
301  * MPLS Label Switch Engine
302  */
303 static int
304 mpls_lse(struct mbuf *m)
305 {
306 	struct sockaddr_mpls dst;
307 	union mpls_shim tshim, *htag;
308 	struct rtentry *rt = NULL;
309 	int error = ENOBUFS;
310 	uint psize = sizeof(struct sockaddr_mpls);
311 
312 	if (m->m_len < sizeof(union mpls_shim) &&
313 	    (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
314 		goto done;
315 
316 	dst.smpls_len = sizeof(struct sockaddr_mpls);
317 	dst.smpls_family = AF_MPLS;
318 	dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
319 
320 	/* Check if we're accepting MPLS Frames */
321 	error = EINVAL;
322 	if (!mpls_accept)
323 		goto done;
324 
325 	/* TTL decrement */
326 	if ((m = mpls_ttl_dec(m)) == NULL)
327 		goto done;
328 
329 	/* RFC 4182 */
330 	if (mpls_rfc4182 != 0)
331 		while((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL ||
332 		    dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) &&
333 		    __predict_false(dst.smpls_addr.shim.bos == 0)) {
334 			m_adj(m, sizeof(union mpls_shim));
335 			if (m->m_len < sizeof(union mpls_shim) &&
336 			    (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
337 				goto done;
338 			dst.smpls_addr.s_addr =
339 			    ntohl(mtod(m, union mpls_shim *)->s_addr);
340 		}
341 
342 	if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) {
343 		/* Don't swap reserved labels */
344 		switch (dst.smpls_addr.shim.label) {
345 #ifdef INET
346 		case MPLS_LABEL_IPV4NULL:
347 			/* Pop shim and push mbuf to IP stack */
348 			if (dst.smpls_addr.shim.bos)
349 				error = mpls_unlabel_inet(m);
350 			break;
351 #endif
352 #ifdef INET6
353 		case MPLS_LABEL_IPV6NULL:
354 			/* Pop shim and push mbuf to IPv6 stack */
355 			if (dst.smpls_addr.shim.bos)
356 				error = mpls_unlabel_inet6(m);
357 			break;
358 #endif
359 		case MPLS_LABEL_RTALERT:	/* Yeah, I'm all alerted */
360 		case MPLS_LABEL_IMPLNULL:	/* This is logical only */
361 		default:			/* Rest are not allowed */
362 			break;
363 		}
364 		goto done;
365 	}
366 
367 	/* Check if we should do MPLS forwarding */
368 	error = EHOSTUNREACH;
369 	if (!mpls_forwarding)
370 		goto done;
371 
372 	/* Get a route to dst */
373 	dst.smpls_addr.shim.ttl =
374 	    dst.smpls_addr.shim.bos =
375 	    dst.smpls_addr.shim.exp = 0;
376 	dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr);
377 	if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL)
378 		goto done;
379 
380 	/* MPLS packet with no MPLS tagged route ? */
381 	if ((rt->rt_flags & RTF_GATEWAY) == 0 ||
382 	     rt_gettag(rt) == NULL ||
383 	     rt_gettag(rt)->sa_family != AF_MPLS)
384 		goto done;
385 
386 	tshim.s_addr = MPLS_GETSADDR(rt);
387 
388 	/* Swap labels */
389 	if ((m->m_len < sizeof(union mpls_shim)) &&
390 	    (m = m_pullup(m, sizeof(union mpls_shim))) == 0) {
391 		error = ENOBUFS;
392 		goto done;
393 	}
394 
395 	/* Replace only the label */
396 	htag = mtod(m, union mpls_shim *);
397 	htag->s_addr = ntohl(htag->s_addr);
398 	htag->shim.label = tshim.shim.label;
399 	htag->s_addr = htonl(htag->s_addr);
400 
401 	/* check if there is anything more to prepend */
402 	htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
403 	while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) {
404 		htag++;
405 		memset(&tshim, 0, sizeof(tshim));
406 		tshim.s_addr = ntohl(htag->s_addr);
407 		tshim.shim.bos = tshim.shim.exp = 0;
408 		tshim.shim.ttl = mpls_defttl;
409 		if (tshim.shim.label != MPLS_LABEL_IMPLNULL &&
410 		    ((m = mpls_prepend_shim(m, &tshim)) == NULL))
411 			return ENOBUFS;
412 		psize += sizeof(tshim);
413 	}
414 
415 	error = mpls_send_frame(m, rt->rt_ifp, rt);
416 
417 done:
418 	if (error != 0 && m != NULL)
419 		m_freem(m);
420 	if (rt != NULL)
421 		RTFREE(rt);
422 
423 	return error;
424 }
425 
426 static int
427 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt)
428 {
429 	union mpls_shim msh;
430 
431 	if ((rt->rt_flags & RTF_GATEWAY) == 0)
432 		return EHOSTUNREACH;
433 
434 	rt->rt_use++;
435 
436 	msh.s_addr = MPLS_GETSADDR(rt);
437 	if (msh.shim.label == MPLS_LABEL_IMPLNULL ||
438 	    (m->m_flags & (M_MCAST | M_BCAST))) {
439 		m_adj(m, sizeof(union mpls_shim));
440 		m->m_pkthdr.csum_flags = 0;
441 	}
442 
443 	switch(ifp->if_type) {
444 	/* only these are supported for now */
445 	case IFT_ETHER:
446 	case IFT_TUNNEL:
447 	case IFT_LOOP:
448 		return (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
449 		break;
450 	default:
451 		return ENETUNREACH;
452 	}
453 	return 0;
454 }
455 
456 
457 
458 #ifdef INET
459 static int
460 mpls_unlabel_inet(struct mbuf *m)
461 {
462 	int s, iphlen;
463 	struct ip *iph;
464 	union mpls_shim *ms;
465 	struct ifqueue *inq;
466 
467 	if (mpls_mapttl_inet || mpls_mapprec_inet) {
468 
469 		/* get shim info */
470 		ms = mtod(m, union mpls_shim *);
471 		ms->s_addr = ntohl(ms->s_addr);
472 
473 		/* and get rid of it */
474 		m_adj(m, sizeof(union mpls_shim));
475 
476 		/* get ip header */
477 		if (m->m_len < sizeof (struct ip) &&
478 		    (m = m_pullup(m, sizeof(struct ip))) == NULL)
479 			return ENOBUFS;
480 		iph = mtod(m, struct ip *);
481 		iphlen = iph->ip_hl << 2;
482 
483 		/* get it all */
484 		if (m->m_len < iphlen) {
485 			if ((m = m_pullup(m, iphlen)) == NULL)
486 				return ENOBUFS;
487 			iph = mtod(m, struct ip *);
488 		}
489 
490 		/* check ipsum */
491 		if (in_cksum(m, iphlen) != 0) {
492 			m_freem(m);
493 			return EINVAL;
494 		}
495 
496 		/* set IP ttl from MPLS ttl */
497 		if (mpls_mapttl_inet)
498 			iph->ip_ttl = ms->shim.ttl;
499 
500 		/* set IP Precedence from MPLS Exp */
501 		if (mpls_mapprec_inet) {
502 			iph->ip_tos = (iph->ip_tos << 3) >> 3;
503 			iph->ip_tos |= ms->shim.exp << 5;
504 		}
505 
506 		/* reset ipsum because we modified TTL and TOS */
507 		iph->ip_sum = 0;
508 		iph->ip_sum = in_cksum(m, iphlen);
509 	} else
510 		m_adj(m, sizeof(union mpls_shim));
511 
512 	/* Put it on IP queue */
513 	inq = &ipintrq;
514 	s = splnet();
515 	if (IF_QFULL(inq)) {
516 		IF_DROP(inq);
517 		splx(s);
518 		m_freem(m);
519 		return ENOBUFS;
520 	}
521 	IF_ENQUEUE(inq, m);
522 	splx(s);
523 	schednetisr(NETISR_IP);
524 
525 	return 0;
526 }
527 
528 /*
529  * Prepend MPLS label
530  */
531 static struct mbuf *
532 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset)
533 {
534 	struct ip iphdr;
535 
536 	if (mpls_mapttl_inet || mpls_mapprec_inet) {
537 		if ((m->m_len < sizeof(struct ip)) &&
538 		    (m = m_pullup(m, offset + sizeof(struct ip))) == 0)
539 			return NULL; /* XXX */
540 		m_copydata(m, offset, sizeof(struct ip), &iphdr);
541 
542 		/* Map TTL */
543 		if (mpls_mapttl_inet)
544 			ms->shim.ttl = iphdr.ip_ttl;
545 
546 		/* Copy IP precedence to EXP */
547 		if (mpls_mapprec_inet)
548 			ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5;
549 	}
550 
551 	if ((m = mpls_prepend_shim(m, ms)) == NULL)
552 		return NULL;
553 
554 	return m;
555 }
556 
557 #endif	/* INET */
558 
559 #ifdef INET6
560 
561 static int
562 mpls_unlabel_inet6(struct mbuf *m)
563 {
564 	struct ip6_hdr *ip6hdr;
565 	union mpls_shim ms;
566 	struct ifqueue *inq;
567 	int s;
568 
569 	/* TODO: mapclass */
570 	if (mpls_mapttl_inet6) {
571 		ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
572 		m_adj(m, sizeof(union mpls_shim));
573 
574 		if (m->m_len < sizeof (struct ip6_hdr) &&
575 		    (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0)
576 			return ENOBUFS;
577 		ip6hdr = mtod(m, struct ip6_hdr *);
578 
579 		/* Because we just decremented this in mpls_lse */
580 		ip6hdr->ip6_hlim = ms.shim.ttl + 1;
581 	} else
582 		m_adj(m, sizeof(union mpls_shim));
583 
584 	/* Put it back on IPv6 stack */
585 	schednetisr(NETISR_IPV6);
586 	inq = &ip6intrq;
587 	s = splnet();
588 	if (IF_QFULL(inq)) {
589 		IF_DROP(inq);
590 		splx(s);
591 		m_freem(m);
592 		return ENOBUFS;
593 	}
594 
595 	IF_ENQUEUE(inq, m);
596 	splx(s);
597 
598 	return 0;
599 }
600 
601 static struct mbuf *
602 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset)
603 {
604 	struct ip6_hdr ip6h;
605 
606 	if (mpls_mapttl_inet6 || mpls_mapclass_inet6) {
607 		if (m->m_len < sizeof(struct ip6_hdr) &&
608 		    (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0)
609 			return NULL;
610 		m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h);
611 
612 		if (mpls_mapttl_inet6)
613 			ms->shim.ttl = ip6h.ip6_hlim;
614 
615 		if (mpls_mapclass_inet6)
616 			ms->shim.exp = ip6h.ip6_vfc << 1 >> 5;
617 	}
618 
619 	if ((m = mpls_prepend_shim(m, ms)) == NULL)
620 		return NULL;
621 
622 	return m;
623 }
624 
625 #endif	/* INET6 */
626 
627 static struct mbuf *
628 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms)
629 {
630 	union mpls_shim *shim;
631 
632 	M_PREPEND(m, sizeof(*ms), M_DONTWAIT);
633 	if (m == NULL)
634 		return NULL;
635 
636 	if (m->m_len < sizeof(union mpls_shim) &&
637 	    (m = m_pullup(m, sizeof(union mpls_shim))) == 0)
638 		return NULL;
639 
640 	shim = mtod(m, union mpls_shim *);
641 
642 	memcpy(shim, ms, sizeof(*shim));
643 	shim->s_addr = htonl(shim->s_addr);
644 
645 	return m;
646 }
647