xref: /openbsd-src/sys/net/if_mpe.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /* $OpenBSD: if_mpe.c,v 1.97 2020/08/21 22:59:27 kn Exp $ */
2 
3 /*
4  * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@spootnik.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/socket.h>
23 #include <sys/sockio.h>
24 #include <sys/ioctl.h>
25 
26 #include <net/if.h>
27 #include <net/if_dl.h>
28 #include <net/if_var.h>
29 #include <net/if_types.h>
30 #include <net/netisr.h>
31 #include <net/route.h>
32 
33 #include <netinet/in.h>
34 #include <netinet/ip.h>
35 
36 #ifdef INET6
37 #include <netinet/ip6.h>
38 #endif /* INET6 */
39 
40 #include "bpfilter.h"
41 #if NBPFILTER > 0
42 #include <net/bpf.h>
43 #endif
44 
45 #include <netmpls/mpls.h>
46 
47 
48 
49 #ifdef MPLS_DEBUG
50 #define DPRINTF(x)    do { if (mpedebug) printf x ; } while (0)
51 #else
52 #define DPRINTF(x)
53 #endif
54 
55 struct mpe_softc {
56 	struct ifnet		sc_if;		/* the interface */
57 	int			sc_txhprio;
58 	int			sc_rxhprio;
59 	unsigned int		sc_rdomain;
60 	struct ifaddr		sc_ifa;
61 	struct sockaddr_mpls	sc_smpls;
62 
63 	int			sc_dead;
64 };
65 
66 #define MPE_HDRLEN	sizeof(struct shim_hdr)
67 #define MPE_MTU		1500
68 #define MPE_MTU_MIN	256
69 #define MPE_MTU_MAX	8192
70 
71 void	mpeattach(int);
72 int	mpe_output(struct ifnet *, struct mbuf *, struct sockaddr *,
73 	    struct rtentry *);
74 int	mpe_ioctl(struct ifnet *, u_long, caddr_t);
75 void	mpe_start(struct ifnet *);
76 int	mpe_clone_create(struct if_clone *, int);
77 int	mpe_clone_destroy(struct ifnet *);
78 void	mpe_input(struct ifnet *, struct mbuf *);
79 
80 struct if_clone	mpe_cloner =
81     IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy);
82 
83 extern int	mpls_mapttl_ip;
84 #ifdef INET6
85 extern int	mpls_mapttl_ip6;
86 #endif
87 
88 void
89 mpeattach(int nmpe)
90 {
91 	if_clone_attach(&mpe_cloner);
92 }
93 
94 int
95 mpe_clone_create(struct if_clone *ifc, int unit)
96 {
97 	struct mpe_softc	*sc;
98 	struct ifnet		*ifp;
99 
100 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
101 	if (sc == NULL)
102 		return (ENOMEM);
103 
104 	ifp = &sc->sc_if;
105 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit);
106 	ifp->if_flags = IFF_POINTOPOINT;
107 	ifp->if_xflags = IFXF_CLONED;
108 	ifp->if_softc = sc;
109 	ifp->if_mtu = MPE_MTU;
110 	ifp->if_ioctl = mpe_ioctl;
111 	ifp->if_output = mpe_output;
112 	ifp->if_start = mpe_start;
113 	ifp->if_type = IFT_MPLS;
114 	ifp->if_hdrlen = MPE_HDRLEN;
115 
116 	sc->sc_dead = 0;
117 
118 	if_attach(ifp);
119 	if_alloc_sadl(ifp);
120 #if NBPFILTER > 0
121 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
122 #endif
123 
124 	sc->sc_txhprio = 0;
125 	sc->sc_rxhprio = IF_HDRPRIO_PACKET;
126 	sc->sc_rdomain = 0;
127 	sc->sc_ifa.ifa_ifp = ifp;
128 	sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
129 	sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls);
130 	sc->sc_smpls.smpls_family = AF_MPLS;
131 
132 	return (0);
133 }
134 
135 int
136 mpe_clone_destroy(struct ifnet *ifp)
137 {
138 	struct mpe_softc	*sc = ifp->if_softc;
139 
140 	NET_LOCK();
141 	CLR(ifp->if_flags, IFF_RUNNING);
142 	sc->sc_dead = 1;
143 
144 	if (sc->sc_smpls.smpls_label) {
145 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
146 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
147 	}
148 	NET_UNLOCK();
149 
150 	ifq_barrier(&ifp->if_snd);
151 
152 	if_detach(ifp);
153 	free(sc, M_DEVBUF, sizeof *sc);
154 	return (0);
155 }
156 
157 /*
158  * Start output on the mpe interface.
159  */
160 void
161 mpe_start(struct ifnet *ifp)
162 {
163 	struct mpe_softc	*sc = ifp->if_softc;
164 	struct mbuf		*m;
165 	struct sockaddr		*sa;
166 	struct sockaddr		smpls = { .sa_family = AF_MPLS };
167 	struct rtentry		*rt;
168 	struct ifnet		*ifp0;
169 
170 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
171 		sa = mtod(m, struct sockaddr *);
172 		rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain);
173 		if (!rtisvalid(rt)) {
174 			m_freem(m);
175 			rtfree(rt);
176 			continue;
177 		}
178 
179 		ifp0 = if_get(rt->rt_ifidx);
180 		if (ifp0 == NULL) {
181 			m_freem(m);
182 			rtfree(rt);
183 			continue;
184 		}
185 
186 		m_adj(m, sa->sa_len);
187 
188 #if NBPFILTER > 0
189 		if (ifp->if_bpf) {
190 			/* remove MPLS label before passing packet to bpf */
191 			m->m_data += sizeof(struct shim_hdr);
192 			m->m_len -= sizeof(struct shim_hdr);
193 			m->m_pkthdr.len -= sizeof(struct shim_hdr);
194 			bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
195 			    m, BPF_DIRECTION_OUT);
196 			m->m_data -= sizeof(struct shim_hdr);
197 			m->m_len += sizeof(struct shim_hdr);
198 			m->m_pkthdr.len += sizeof(struct shim_hdr);
199 		}
200 #endif
201 
202 		m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
203 		CLR(m->m_flags, M_BCAST|M_MCAST);
204 
205 		mpls_output(ifp0, m, &smpls, rt);
206 		if_put(ifp0);
207 		rtfree(rt);
208 	}
209 }
210 
211 int
212 mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
213 	struct rtentry *rt)
214 {
215 	struct mpe_softc *sc;
216 	struct rt_mpls	*rtmpls;
217 	struct shim_hdr	shim;
218 	int		error;
219 	int		txprio;
220 	uint8_t		ttl = mpls_defttl;
221 	uint8_t		tos, prio;
222 	size_t		ttloff;
223 	socklen_t	slen;
224 
225 	if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) {
226 		m_freem(m);
227 		return (ENETUNREACH);
228 	}
229 
230 	if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) {
231 		mpe_input(ifp, m);
232 		return (0);
233 	}
234 
235 #ifdef DIAGNOSTIC
236 	if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) {
237 		printf("%s: trying to send packet on wrong domain. "
238 		    "if %d vs. mbuf %d\n", ifp->if_xname,
239 		    ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid));
240 	}
241 #endif
242 
243 	rtmpls = (struct rt_mpls *)rt->rt_llinfo;
244 	if (rtmpls->mpls_operation != MPLS_OP_PUSH) {
245 		m_freem(m);
246 		return (ENETUNREACH);
247 	}
248 
249 	error = 0;
250 	switch (dst->sa_family) {
251 	case AF_INET: {
252 		struct ip *ip = mtod(m, struct ip *);
253 		tos = ip->ip_tos;
254 		ttloff = offsetof(struct ip, ip_ttl);
255 		slen = sizeof(struct sockaddr_in);
256 		break;
257 	}
258 #ifdef INET6
259 	case AF_INET6: {
260 		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
261 		uint32_t flow = bemtoh32(&ip6->ip6_flow);
262 		tos = flow >> 20;
263 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
264 		slen = sizeof(struct sockaddr_in6);
265 		break;
266 	}
267 #endif
268 	default:
269 		m_freem(m);
270 		return (EPFNOSUPPORT);
271 	}
272 
273 	if (mpls_mapttl_ip) {
274 		/* assumes the ip header is already contig */
275 		ttl = *(mtod(m, uint8_t *) + ttloff);
276 	}
277 
278 	sc = ifp->if_softc;
279 	txprio = sc->sc_txhprio;
280 
281 	switch (txprio) {
282 	case IF_HDRPRIO_PACKET:
283 		prio = m->m_pkthdr.pf.prio;
284 		break;
285 	case IF_HDRPRIO_PAYLOAD:
286 		prio = IFQ_TOS2PRIO(tos);
287 		break;
288 	default:
289 		prio = txprio;
290 		break;
291 	}
292 
293 	shim.shim_label = rtmpls->mpls_label | htonl(prio << MPLS_EXP_OFFSET) |
294 	    MPLS_BOS_MASK | htonl(ttl);
295 
296 	m = m_prepend(m, sizeof(shim), M_NOWAIT);
297 	if (m == NULL) {
298 		error = ENOMEM;
299 		goto out;
300 	}
301 	*mtod(m, struct shim_hdr *) = shim;
302 
303 	m = m_prepend(m, slen, M_WAITOK);
304 	if (m == NULL) {
305 		error = ENOMEM;
306 		goto out;
307 	}
308 	memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen);
309 	mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */
310 
311 	m->m_pkthdr.ph_family = dst->sa_family;
312 
313 	error = if_enqueue(ifp, m);
314 out:
315 	if (error)
316 		ifp->if_oerrors++;
317 	return (error);
318 }
319 
320 int
321 mpe_set_label(struct mpe_softc *sc, uint32_t label, unsigned int rdomain)
322 {
323 	int error;
324 
325 	if (sc->sc_dead)
326 		return (ENXIO);
327 
328 	if (sc->sc_smpls.smpls_label) {
329 		/* remove old MPLS route */
330 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
331 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
332 	}
333 
334 	/* add new MPLS route */
335 	sc->sc_smpls.smpls_label = label;
336 	sc->sc_rdomain = rdomain;
337 
338 	error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
339 	    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
340 	if (error)
341 		sc->sc_smpls.smpls_label = 0;
342 
343 	return (error);
344 }
345 
346 int
347 mpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
348 {
349 	struct mpe_softc	*sc = ifp->if_softc;
350 	struct ifreq		*ifr;
351 	struct shim_hdr		 shim;
352 	int			 error = 0;
353 
354 	ifr = (struct ifreq *)data;
355 	switch (cmd) {
356 	case SIOCSIFADDR:
357 		break;
358 	case SIOCSIFFLAGS:
359 		if (ifp->if_flags & IFF_UP)
360 			ifp->if_flags |= IFF_RUNNING;
361 		else
362 			ifp->if_flags &= ~IFF_RUNNING;
363 		break;
364 	case SIOCSIFMTU:
365 		if (ifr->ifr_mtu < MPE_MTU_MIN ||
366 		    ifr->ifr_mtu > MPE_MTU_MAX)
367 			error = EINVAL;
368 		else
369 			ifp->if_mtu = ifr->ifr_mtu;
370 		break;
371 	case SIOCGETLABEL:
372 		shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
373 		if (shim.shim_label == 0) {
374 			error = EADDRNOTAVAIL;
375 			break;
376 		}
377 		error = copyout(&shim, ifr->ifr_data, sizeof(shim));
378 		break;
379 	case SIOCSETLABEL:
380 		error = copyin(ifr->ifr_data, &shim, sizeof(shim));
381 		if (error != 0)
382 			break;
383 		if (shim.shim_label > MPLS_LABEL_MAX ||
384 		    shim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
385 			error = EINVAL;
386 			break;
387 		}
388 		shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label);
389 		if (sc->sc_smpls.smpls_label != shim.shim_label) {
390 			error = mpe_set_label(sc, shim.shim_label,
391 			    sc->sc_rdomain);
392 		}
393 		break;
394 	case SIOCDELLABEL:
395 		if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) {
396 			rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
397 			    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
398 
399 		}
400 		shim.shim_label = MPLS_LABEL2SHIM(0);
401 		break;
402 
403 	case SIOCSLIFPHYRTABLE:
404 		if (ifr->ifr_rdomainid < 0 ||
405 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
406 		    !rtable_exists(ifr->ifr_rdomainid) ||
407 		    ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) {
408 			error = EINVAL;
409 			break;
410 		}
411 		if (sc->sc_rdomain != ifr->ifr_rdomainid) {
412 			error = mpe_set_label(sc, sc->sc_smpls.smpls_label,
413 			    ifr->ifr_rdomainid);
414 		}
415 		break;
416 	case SIOCGLIFPHYRTABLE:
417 		ifr->ifr_rdomainid = sc->sc_rdomain;
418 		break;
419 
420 	case SIOCSTXHPRIO:
421 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
422 		if (error != 0)
423 			break;
424 
425 		sc->sc_txhprio = ifr->ifr_hdrprio;
426 		break;
427 	case SIOCGTXHPRIO:
428 		ifr->ifr_hdrprio = sc->sc_txhprio;
429 		break;
430 
431 	case SIOCSRXHPRIO:
432 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
433 		if (error != 0)
434 			break;
435 
436 		sc->sc_rxhprio = ifr->ifr_hdrprio;
437 		break;
438 	case SIOCGRXHPRIO:
439 		ifr->ifr_hdrprio = sc->sc_rxhprio;
440 		break;
441 
442 	default:
443 		return (ENOTTY);
444 	}
445 
446 	return (error);
447 }
448 
449 void
450 mpe_input(struct ifnet *ifp, struct mbuf *m)
451 {
452 	struct mpe_softc *sc = ifp->if_softc;
453 	struct shim_hdr	*shim;
454 	struct mbuf 	*n;
455 	uint8_t		 ttl, tos;
456 	uint32_t	 exp;
457 	void (*input)(struct ifnet *, struct mbuf *);
458 	int rxprio = sc->sc_rxhprio;
459 
460 	shim = mtod(m, struct shim_hdr *);
461 	exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET;
462 	if (!MPLS_BOS_ISSET(shim->shim_label))
463 		goto drop;
464 
465 	ttl = ntohl(shim->shim_label & MPLS_TTL_MASK);
466 	m_adj(m, sizeof(*shim));
467 
468 	n = m;
469 	while (n->m_len == 0) {
470 		n = n->m_next;
471 		if (n == NULL)
472 			goto drop;
473 	}
474 
475 	switch (*mtod(n, uint8_t *) >> 4) {
476 	case 4: {
477 		struct ip *ip;
478 		if (m->m_len < sizeof(*ip)) {
479 			m = m_pullup(m, sizeof(*ip));
480 			if (m == NULL)
481 				return;
482 		}
483 		ip = mtod(m, struct ip *);
484 		tos = ip->ip_tos;
485 
486 		if (mpls_mapttl_ip) {
487 			m = mpls_ip_adjttl(m, ttl);
488 			if (m == NULL)
489 				return;
490 		}
491 		input = ipv4_input;
492 		m->m_pkthdr.ph_family = AF_INET;
493 		break;
494 	}
495 #ifdef INET6
496 	case 6: {
497 		struct ip6_hdr *ip6;
498 		uint32_t flow;
499 		if (m->m_len < sizeof(*ip6)) {
500 			m = m_pullup(m, sizeof(*ip6));
501 			if (m == NULL)
502 				return;
503 		}
504 		ip6 = mtod(m, struct ip6_hdr *);
505 		flow = bemtoh32(&ip6->ip6_flow);
506 		tos = flow >> 20;
507 
508 		if (mpls_mapttl_ip6) {
509 			m = mpls_ip6_adjttl(m, ttl);
510 			if (m == NULL)
511 				return;
512 		}
513 		input = ipv6_input;
514 		m->m_pkthdr.ph_family = AF_INET6;
515 		break;
516 	}
517 #endif /* INET6 */
518 	default:
519 		goto drop;
520 	}
521 
522 	switch (rxprio) {
523 	case IF_HDRPRIO_PACKET:
524 		/* nop */
525 		break;
526 	case IF_HDRPRIO_OUTER:
527 		m->m_pkthdr.pf.prio = exp;
528 		break;
529 	case IF_HDRPRIO_PAYLOAD:
530 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos);
531 		break;
532 	default:
533 		m->m_pkthdr.pf.prio = rxprio;
534 		break;
535 	}
536 
537 	/* new receive if and move into correct rtable */
538 	m->m_pkthdr.ph_ifidx = ifp->if_index;
539 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
540 
541 	/* packet has not been processed by PF yet. */
542 	KASSERT(m->m_pkthdr.pf.statekey == NULL);
543 
544 #if NBPFILTER > 0
545 	if (ifp->if_bpf) {
546 		bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
547 		    m, BPF_DIRECTION_IN);
548 	}
549 #endif
550 
551 	(*input)(ifp, m);
552 	return;
553 drop:
554 	m_freem(m);
555 }
556