xref: /netbsd-src/sys/net/if_mpls.c (revision 4fee23f98c45552038ad6b5bd05124a41302fb01)
1 /*	$NetBSD: if_mpls.c,v 1.5 2011/06/17 09:15:24 kefren Exp $ */
2 
3 /*
4  * Copyright (c) 2010 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Mihai Chelaru <kefren@NetBSD.org>
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.5 2011/06/17 09:15:24 kefren Exp $");
34 
35 #include "opt_inet.h"
36 #include "opt_mpls.h"
37 
38 #include <sys/param.h>
39 
40 #include <sys/errno.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/sysctl.h>
44 
45 #include <net/bpf.h>
46 #include <net/if.h>
47 #include <net/if_types.h>
48 #include <net/netisr.h>
49 #include <net/route.h>
50 
51 #ifdef INET
52 #include <netinet/in.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip.h>
56 #endif
57 
58 #ifdef INET6
59 #include <netinet/ip6.h>
60 #include <netinet6/in6_var.h>
61 #include <netinet6/ip6_var.h>
62 #endif
63 
64 #include <netmpls/mpls.h>
65 #include <netmpls/mpls_var.h>
66 
67 #include "if_mpls.h"
68 
69 void ifmplsattach(int);
70 
71 static int mpls_clone_create(struct if_clone *, int);
72 static int mpls_clone_destroy(struct ifnet *);
73 
74 static struct if_clone mpls_if_cloner =
75 	IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy);
76 
77 
78 static void mpls_input(struct ifnet *, struct mbuf *);
79 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
80 	struct rtentry *);
81 static int mpls_ioctl(struct ifnet *, u_long, void *);
82 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *);
83 static int mpls_lse(struct mbuf *);
84 
85 #ifdef INET
86 static int mpls_unlabel_inet(struct mbuf *);
87 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *);
88 #endif
89 
90 #ifdef INET6
91 static int mpls_unlabel_inet6(struct mbuf *);
92 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *);
93 #endif
94 
95 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *);
96 
97 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond,
98 	mpls_forwarding, mpls_accept, mpls_mapprec_inet, mpls_mapclass_inet6;
99 
100 /* ARGSUSED */
101 void
102 ifmplsattach(int count)
103 {
104 	if_clone_attach(&mpls_if_cloner);
105 }
106 
107 static int
108 mpls_clone_create(struct if_clone *ifc, int unit)
109 {
110 	struct mpls_softc *sc;
111 
112 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
113 
114 	if_initname(&sc->sc_if, ifc->ifc_name, unit);
115 	sc->sc_if.if_softc = sc;
116 	sc->sc_if.if_type = IFT_MPLS;
117 	sc->sc_if.if_addrlen = 0;
118 	sc->sc_if.if_hdrlen = sizeof(union mpls_shim);
119 	sc->sc_if.if_dlt = DLT_NULL;
120 	sc->sc_if.if_mtu = 1500;
121 	sc->sc_if.if_flags = 0;
122 	sc->sc_if.if_input = mpls_input;
123 	sc->sc_if.if_output = mpls_output;
124 	sc->sc_if.if_ioctl = mpls_ioctl;
125 
126 	if_attach(&sc->sc_if);
127 	if_alloc_sadl(&sc->sc_if);
128 	bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t));
129 	return 0;
130 }
131 
132 static int
133 mpls_clone_destroy(struct ifnet *ifp)
134 {
135 	int s;
136 
137 	bpf_detach(ifp);
138 
139 	s = splnet();
140 	if_detach(ifp);
141 	splx(s);
142 
143 	free(ifp->if_softc, M_DEVBUF);
144 	return 0;
145 }
146 
147 static void
148 mpls_input(struct ifnet *ifp, struct mbuf *m)
149 {
150 #if 0
151 	/*
152 	 * TODO - kefren
153 	 * I'd love to unshim the packet, guess family
154 	 * and pass it to bpf
155 	 */
156 	bpf_mtap_af(ifp, AF_MPLS, m);
157 #endif
158 
159 	mpls_lse(m);
160 }
161 
162 void
163 mplsintr(void)
164 {
165 	struct mbuf *m;
166 	int s;
167 
168 	while (!IF_IS_EMPTY(&mplsintrq)) {
169 		s = splnet();
170 		IF_DEQUEUE(&mplsintrq, m);
171 		splx(s);
172 
173 		if (!m)
174 			return;
175 
176 		if (((m->m_flags & M_PKTHDR) == 0) ||
177 		    (m->m_pkthdr.rcvif == 0))
178 			panic("mplsintr(): no pkthdr or rcvif");
179 
180 #ifdef MBUFTRACE
181 		m_claimm(m, &mpls_owner);
182 #endif
183 		mpls_input(m->m_pkthdr.rcvif, m);
184 	}
185 }
186 
187 /*
188  * prepend shim and deliver
189  */
190 static int
191 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt)
192 {
193 	union mpls_shim mh;
194 	struct rtentry *rt1;
195 	int err;
196 
197 	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
198 		m_freem(m);
199 		return ENETDOWN;
200 	}
201 
202 	if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) {
203 		m_freem(m);
204 		return EINVAL;
205 	}
206 
207 	bpf_mtap_af(ifp, dst->sa_family, m);
208 
209 	mh.s_addr=MPLS_GETSADDR(rt);
210 	mh.shim.bos=1;
211 	mh.shim.exp=0;
212 	mh.shim.ttl=mpls_defttl;
213 
214 	switch(dst->sa_family) {
215 #ifdef INET
216 	case AF_INET:
217 		m = mpls_label_inet(m, &mh);
218 		break;
219 #endif
220 #ifdef INET6
221 	case AF_INET6:
222 		m = mpls_label_inet6(m, &mh);
223 		break;
224 #endif
225 	default:
226 		m = mpls_prepend_shim(m, &mh);
227 		break;
228 	}
229 
230 	if (m == NULL) {
231 		IF_DROP(&ifp->if_snd);
232 		ifp->if_oerrors++;
233 		return ENOBUFS;
234 	}
235 
236 	ifp->if_opackets++;
237 	ifp->if_obytes += m->m_pkthdr.len;
238 
239 	if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) {
240 		m_freem(m);
241 		return EHOSTUNREACH;
242 	}
243 
244 	err = mpls_send_frame(m, rt1->rt_ifp, rt);
245 	RTFREE(rt1);
246 	return err;
247 }
248 
249 static int
250 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data)
251 {
252 	int error = 0, s = splnet();
253 	struct ifreq *ifr = data;
254 
255 	switch(cmd) {
256 	case SIOCINITIFADDR:
257 		ifp->if_flags |= IFF_UP | IFF_RUNNING;
258 		break;
259 	case SIOCSIFMTU:
260 		if (ifr != NULL && ifr->ifr_mtu < 576) {
261 			error = EINVAL;
262 			break;
263 		}
264 		/* FALLTHROUGH */
265 	case SIOCGIFMTU:
266 		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
267 			error = 0;
268 		break;
269 	case SIOCSIFFLAGS:
270 		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
271 			break;
272 		if (ifp->if_flags & IFF_UP)
273 			ifp->if_flags |= IFF_RUNNING;
274 		break;
275 	default:
276 		error = ifioctl_common(ifp, cmd, data);
277 		break;
278 	}
279 	splx(s);
280 	return error;
281 }
282 
283 /*
284  * MPLS Label Switch Engine
285  */
286 static int
287 mpls_lse(struct mbuf *m)
288 {
289 	struct sockaddr_mpls dst;
290 	union mpls_shim tshim, *htag;
291 	struct rtentry *rt = NULL;
292 	int error = ENOBUFS;
293 
294 	if (m->m_len < sizeof(union mpls_shim) &&
295 	    (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
296 		goto done;
297 
298 	dst.smpls_len = sizeof(struct sockaddr_mpls);
299 	dst.smpls_family = AF_MPLS;
300 	dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
301 
302 	/* Check if we're accepting MPLS Frames */
303 	error = EINVAL;
304 	if (!mpls_accept)
305 		goto done;
306 
307 	/* TTL decrement */
308 	if ((m = mpls_ttl_dec(m)) == NULL)
309 		goto done;
310 
311 	if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) {
312 		/* Don't swap reserved labels */
313 		switch (dst.smpls_addr.shim.label) {
314 #ifdef INET
315 		case MPLS_LABEL_IPV4NULL:
316 			/* Pop shim and push mbuf to IP stack */
317 			if (dst.smpls_addr.shim.bos)
318 				error = mpls_unlabel_inet(m);
319 			break;
320 #endif
321 #ifdef INET6
322 		case MPLS_LABEL_IPV6NULL:
323 			/* Pop shim and push mbuf to IPv6 stack */
324 			if (dst.smpls_addr.shim.bos)
325 				error = mpls_unlabel_inet6(m);
326 			break;
327 #endif
328 		case MPLS_LABEL_RTALERT:	/* Yeah, I'm all alerted */
329 		case MPLS_LABEL_IMPLNULL:	/* This is logical only */
330 		default:			/* Rest are not allowed */
331 			break;
332 		}
333 		goto done;
334 	}
335 
336 	/* Check if we should do MPLS forwarding */
337 	error = EHOSTUNREACH;
338 	if (!mpls_forwarding)
339 		goto done;
340 
341 	/* Get a route to dst */
342 	dst.smpls_addr.shim.ttl =
343 	    dst.smpls_addr.shim.bos =
344 	    dst.smpls_addr.shim.exp = 0;
345 	dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr);
346 	if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL)
347 		goto done;
348 
349 	/* MPLS packet with no MPLS tagged route ? */
350 	if ((rt->rt_flags & RTF_GATEWAY) == 0 ||
351 	     rt_gettag(rt) == NULL ||
352 	     rt_gettag(rt)->sa_family != AF_MPLS)
353 		goto done;
354 
355 	tshim.s_addr = MPLS_GETSADDR(rt);
356 
357 	/* Swap labels */
358 	if ((m->m_len < sizeof(union mpls_shim)) &&
359 	    (m = m_pullup(m, sizeof(union mpls_shim))) == 0) {
360 		error = ENOBUFS;
361 		goto done;
362 	}
363 
364 	/* Replace only the label */
365 	htag = mtod(m, union mpls_shim *);
366 	htag->s_addr = ntohl(htag->s_addr);
367 	htag->shim.label = tshim.shim.label;
368 	htag->s_addr = htonl(htag->s_addr);
369 
370 	error = mpls_send_frame(m, rt->rt_ifp, rt);
371 
372 done:
373 	if (error != 0 && m != NULL)
374 		m_freem(m);
375 	if (rt != NULL)
376 		RTFREE(rt);
377 
378 	return error;
379 }
380 
381 static int
382 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt)
383 {
384 	union mpls_shim msh;
385 
386 	if ((rt->rt_flags & RTF_GATEWAY) == 0)
387 		return EHOSTUNREACH;
388 
389 	rt->rt_use++;
390 
391 	msh.s_addr = MPLS_GETSADDR(rt);
392 	if (msh.shim.label == MPLS_LABEL_IMPLNULL ||
393 	    (m->m_flags & (M_MCAST | M_BCAST))) {
394 		m_adj(m, sizeof(union mpls_shim));
395 		m->m_pkthdr.csum_flags = 0;
396 	}
397 
398 	switch(ifp->if_type) {
399 	/* only these are supported for now */
400 	case IFT_ETHER:
401 	case IFT_TUNNEL:
402 	case IFT_LOOP:
403 		return (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
404 		break;
405 	default:
406 		return ENETUNREACH;
407 	}
408 	return 0;
409 }
410 
411 
412 
413 #ifdef INET
414 static int
415 mpls_unlabel_inet(struct mbuf *m)
416 {
417 	int s, iphlen;
418 	struct ip *iph;
419 	union mpls_shim *ms;
420 	struct ifqueue *inq;
421 
422 	if (mpls_mapttl_inet || mpls_mapprec_inet) {
423 
424 		/* get shim info */
425 		ms = mtod(m, union mpls_shim *);
426 		ms->s_addr = ntohl(ms->s_addr);
427 
428 		/* and get rid of it */
429 		m_adj(m, sizeof(union mpls_shim));
430 
431 		/* get ip header */
432 		if (m->m_len < sizeof (struct ip) &&
433 		    (m = m_pullup(m, sizeof(struct ip))) == NULL)
434 			return ENOBUFS;
435 		iph = mtod(m, struct ip *);
436 		iphlen = iph->ip_hl << 2;
437 
438 		/* get it all */
439 		if (m->m_len < iphlen) {
440 			if ((m = m_pullup(m, iphlen)) == NULL)
441 				return ENOBUFS;
442 			iph = mtod(m, struct ip *);
443 		}
444 
445 		/* check ipsum */
446 		if (in_cksum(m, iphlen) != 0) {
447 			m_freem(m);
448 			return EINVAL;
449 		}
450 
451 		/* set IP ttl from MPLS ttl */
452 		if (mpls_mapttl_inet)
453 			iph->ip_ttl = ms->shim.ttl;
454 
455 		/* set IP Precedence from MPLS Exp */
456 		if (mpls_mapprec_inet) {
457 			iph->ip_tos = (iph->ip_tos << 3) >> 3;
458 			iph->ip_tos |= ms->shim.exp << 5;
459 		}
460 
461 		/* reset ipsum because we modified TTL and TOS */
462 		iph->ip_sum = 0;
463 		iph->ip_sum = in_cksum(m, iphlen);
464 	} else
465 		m_adj(m, sizeof(union mpls_shim));
466 
467 	/* Put it on IP queue */
468 	inq = &ipintrq;
469 	s = splnet();
470 	if (IF_QFULL(inq)) {
471 		IF_DROP(inq);
472 		splx(s);
473 		m_freem(m);
474 		return ENOBUFS;
475 	}
476 	IF_ENQUEUE(inq, m);
477 	splx(s);
478 	schednetisr(NETISR_IP);
479 
480 	return 0;
481 }
482 
483 /*
484  * Prepend MPLS label
485  */
486 static struct mbuf *
487 mpls_label_inet(struct mbuf *m, union mpls_shim *ms)
488 {
489 	struct ip *iphdr;
490 
491 	if (mpls_mapttl_inet || mpls_mapprec_inet) {
492 		if ((m->m_len < sizeof(struct ip)) &&
493 		    (m = m_pullup(m, sizeof(struct ip))) == 0)
494 			return NULL;
495 		iphdr = mtod(m, struct ip *);
496 
497 		/* Map TTL */
498 		if (mpls_mapttl_inet)
499 			ms->shim.ttl = iphdr->ip_ttl;
500 
501 		/* Copy IP precedence to EXP */
502 		if (mpls_mapprec_inet)
503 			ms->shim.exp = ((u_int8_t)iphdr->ip_tos) >> 5;
504 	}
505 
506 	if ((m = mpls_prepend_shim(m, ms)) == NULL)
507 		return NULL;
508 
509 	return m;
510 }
511 
512 #endif	/* INET */
513 
514 #ifdef INET6
515 
516 static int
517 mpls_unlabel_inet6(struct mbuf *m)
518 {
519 	struct ip6_hdr *ip6hdr;
520 	union mpls_shim ms;
521 	struct ifqueue *inq;
522 	int s;
523 
524 	/* TODO: mapclass */
525 	if (mpls_mapttl_inet6) {
526 		ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
527 		m_adj(m, sizeof(union mpls_shim));
528 
529 		if (m->m_len < sizeof (struct ip6_hdr) &&
530 		    (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0)
531 			return ENOBUFS;
532 		ip6hdr = mtod(m, struct ip6_hdr *);
533 
534 		/* Because we just decremented this in mpls_lse */
535 		ip6hdr->ip6_hlim = ms.shim.ttl + 1;
536 	} else
537 		m_adj(m, sizeof(union mpls_shim));
538 
539 	/* Put it back on IPv6 stack */
540 	schednetisr(NETISR_IPV6);
541 	inq = &ip6intrq;
542 	s = splnet();
543 	if (IF_QFULL(inq)) {
544 		IF_DROP(inq);
545 		splx(s);
546 		m_freem(m);
547 		return ENOBUFS;
548 	}
549 
550 	IF_ENQUEUE(inq, m);
551 	splx(s);
552 
553 	return 0;
554 }
555 
556 static struct mbuf *
557 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms)
558 {
559 	struct ip6_hdr *ip6h;
560 
561 	if (mpls_mapttl_inet6 || mpls_mapclass_inet6) {
562 		if (m->m_len < sizeof(struct ip6_hdr) &&
563 		    (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0)
564 			return NULL;
565 		ip6h = mtod(m, struct ip6_hdr *);
566 
567 		if (mpls_mapttl_inet6)
568 			ms->shim.ttl = ip6h->ip6_hlim;
569 
570 		if (mpls_mapclass_inet6)
571 			ms->shim.exp = ip6h->ip6_vfc << 1 >> 5;
572 	}
573 
574 	if ((m = mpls_prepend_shim(m, ms)) == NULL)
575 		return NULL;
576 
577 	return m;
578 }
579 
580 #endif	/* INET6 */
581 
582 static struct mbuf *
583 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms)
584 {
585 	union mpls_shim *shim;
586 
587 	M_PREPEND(m, sizeof(*ms), M_DONTWAIT);
588 	if (m == NULL)
589 		return NULL;
590 
591 	if (m->m_len < sizeof(union mpls_shim) &&
592 	    (m = m_pullup(m, sizeof(union mpls_shim))) == 0)
593 		return NULL;
594 
595 	shim = mtod(m, union mpls_shim *);
596 
597 	memcpy(shim, ms, sizeof(*shim));
598 	shim->s_addr = htonl(shim->s_addr);
599 
600 	return m;
601 }
602