xref: /openbsd-src/sys/net/if_mpip.c (revision b5b8bc53ca00dc14cccb11ede4abd86818e9a05b)
1*b5b8bc53Smvs /*	$OpenBSD: if_mpip.c,v 1.19 2024/01/01 18:47:02 mvs Exp $ */
2912623feSdlg 
3912623feSdlg /*
4912623feSdlg  * Copyright (c) 2015 Rafael Zalamena <rzalamena@openbsd.org>
5912623feSdlg  * Copyright (c) 2019 David Gwynne <dlg@openbsd.org>
6912623feSdlg  *
7912623feSdlg  * Permission to use, copy, modify, and distribute this software for any
8912623feSdlg  * purpose with or without fee is hereby granted, provided that the above
9912623feSdlg  * copyright notice and this permission notice appear in all copies.
10912623feSdlg  *
11912623feSdlg  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12912623feSdlg  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13912623feSdlg  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14912623feSdlg  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15912623feSdlg  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16912623feSdlg  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17912623feSdlg  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18912623feSdlg  */
19912623feSdlg 
20912623feSdlg #include "bpfilter.h"
21912623feSdlg 
22912623feSdlg #include <sys/param.h>
23912623feSdlg #include <sys/systm.h>
24912623feSdlg #include <sys/mbuf.h>
25912623feSdlg #include <sys/socket.h>
26912623feSdlg #include <sys/ioctl.h>
27912623feSdlg #include <sys/errno.h>
28912623feSdlg 
29912623feSdlg #include <net/if.h>
30912623feSdlg #include <net/if_var.h>
31912623feSdlg #include <net/if_dl.h>
32912623feSdlg #include <net/if_types.h>
33912623feSdlg #include <net/route.h>
34912623feSdlg 
35912623feSdlg #include <netinet/in.h>
36912623feSdlg #include <netinet/ip.h>
37912623feSdlg 
38912623feSdlg #ifdef INET6
39912623feSdlg #include <netinet/ip6.h>
40912623feSdlg #endif
41912623feSdlg 
42912623feSdlg #include <netmpls/mpls.h>
43912623feSdlg 
44912623feSdlg #if NBPFILTER > 0
45912623feSdlg #include <net/bpf.h>
46912623feSdlg #endif /* NBPFILTER */
47912623feSdlg 
48912623feSdlg struct mpip_neighbor {
49912623feSdlg 	struct shim_hdr		n_rshim;
50912623feSdlg 	struct sockaddr_storage	n_nexthop;
51912623feSdlg };
52912623feSdlg 
53912623feSdlg struct mpip_softc {
54912623feSdlg 	struct ifnet		sc_if;
55912623feSdlg 	unsigned int		sc_dead;
56912623feSdlg 	uint32_t		sc_flow; /* xor for mbuf flowid */
57912623feSdlg 
58f1dea89cSdlg 	int			sc_txhprio;
59a61f5d85Sdlg 	int			sc_rxhprio;
60912623feSdlg 	struct ifaddr		sc_ifa;
61912623feSdlg 	struct sockaddr_mpls	sc_smpls; /* Local label */
62912623feSdlg 	unsigned int		sc_rdomain;
63912623feSdlg 	struct mpip_neighbor	*sc_neighbor;
64912623feSdlg 
65912623feSdlg 	unsigned int		sc_cword; /* control word */
66912623feSdlg 	unsigned int		sc_fword; /* flow-aware transport */
67912623feSdlg 	int			sc_ttl;
68912623feSdlg };
69912623feSdlg 
70912623feSdlg void	mpipattach(int);
71912623feSdlg int	mpip_clone_create(struct if_clone *, int);
72912623feSdlg int	mpip_clone_destroy(struct ifnet *);
73912623feSdlg int	mpip_ioctl(struct ifnet *, u_long, caddr_t);
74912623feSdlg int	mpip_output(struct ifnet *, struct mbuf *, struct sockaddr *,
75912623feSdlg 	    struct rtentry *);
76912623feSdlg void	mpip_start(struct ifnet *);
77912623feSdlg 
78912623feSdlg struct if_clone mpip_cloner =
79912623feSdlg     IF_CLONE_INITIALIZER("mpip", mpip_clone_create, mpip_clone_destroy);
80912623feSdlg 
81912623feSdlg void
mpipattach(int n)82912623feSdlg mpipattach(int n)
83912623feSdlg {
84912623feSdlg 	if_clone_attach(&mpip_cloner);
85912623feSdlg }
86912623feSdlg 
87912623feSdlg int
mpip_clone_create(struct if_clone * ifc,int unit)88912623feSdlg mpip_clone_create(struct if_clone *ifc, int unit)
89912623feSdlg {
90912623feSdlg 	struct mpip_softc *sc;
91912623feSdlg 	struct ifnet *ifp;
92912623feSdlg 
93912623feSdlg 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
94912623feSdlg 	if (sc == NULL)
95912623feSdlg 		return (ENOMEM);
96912623feSdlg 
97f1dea89cSdlg 	sc->sc_txhprio = 0;
98a61f5d85Sdlg 	sc->sc_rxhprio = IF_HDRPRIO_PACKET;
99912623feSdlg 	sc->sc_neighbor = 0;
100912623feSdlg 	sc->sc_cword = 0; /* default to no control word */
101912623feSdlg 	sc->sc_fword = 0; /* both sides have to agree on FAT first */
102912623feSdlg 	sc->sc_flow = arc4random() & 0xfffff;
103912623feSdlg 	sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls);
104912623feSdlg 	sc->sc_smpls.smpls_family = AF_MPLS;
105912623feSdlg 	sc->sc_ttl = -1;
106912623feSdlg 
107912623feSdlg 	ifp = &sc->sc_if;
108912623feSdlg 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
109912623feSdlg 	    ifc->ifc_name, unit);
110912623feSdlg 	ifp->if_softc = sc;
111912623feSdlg 	ifp->if_type = IFT_TUNNEL;
112912623feSdlg 	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
113912623feSdlg 	ifp->if_xflags = IFXF_CLONED;
114912623feSdlg 	ifp->if_ioctl = mpip_ioctl;
1152e3db693Sdlg 	ifp->if_bpf_mtap = p2p_bpf_mtap;
1162e3db693Sdlg 	ifp->if_input = p2p_input;
117912623feSdlg 	ifp->if_output = mpip_output;
118912623feSdlg 	ifp->if_start = mpip_start;
119912623feSdlg 	ifp->if_rtrequest = p2p_rtrequest;
120912623feSdlg 	ifp->if_mtu = 1500;
121912623feSdlg 	ifp->if_hardmtu = 65535;
122912623feSdlg 
123938ff1aeSbluhm 	if_counters_alloc(ifp);
124*b5b8bc53Smvs 	if_attach(ifp);
125912623feSdlg 	if_alloc_sadl(ifp);
126912623feSdlg 
127912623feSdlg #if NBPFILTER > 0
128912623feSdlg 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
129912623feSdlg #endif
130912623feSdlg 
13118a44669Sbluhm 	refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR);
132912623feSdlg 	sc->sc_ifa.ifa_ifp = ifp;
133912623feSdlg 	sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
134912623feSdlg 
135912623feSdlg 	return (0);
136912623feSdlg }
137912623feSdlg 
138912623feSdlg int
mpip_clone_destroy(struct ifnet * ifp)139912623feSdlg mpip_clone_destroy(struct ifnet *ifp)
140912623feSdlg {
141912623feSdlg 	struct mpip_softc *sc = ifp->if_softc;
142912623feSdlg 
143912623feSdlg 	NET_LOCK();
144912623feSdlg 	ifp->if_flags &= ~IFF_RUNNING;
145912623feSdlg 	sc->sc_dead = 1;
146912623feSdlg 
147912623feSdlg 	if (sc->sc_smpls.smpls_label) {
148912623feSdlg 		rt_ifa_del(&sc->sc_ifa, RTF_LOCAL | RTF_MPLS,
149c7bf3b0bSkn 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
150912623feSdlg 	}
151912623feSdlg 	NET_UNLOCK();
152912623feSdlg 
153912623feSdlg 	ifq_barrier(&ifp->if_snd);
154912623feSdlg 
155912623feSdlg 	if_detach(ifp);
15618a44669Sbluhm 	if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) {
15718a44669Sbluhm 		panic("%s: ifa refcnt has %u refs", __func__,
15818a44669Sbluhm 		    sc->sc_ifa.ifa_refcnt.r_refs);
15918a44669Sbluhm 	}
160912623feSdlg 	free(sc->sc_neighbor, M_DEVBUF, sizeof(*sc->sc_neighbor));
161912623feSdlg 	free(sc, M_DEVBUF, sizeof(*sc));
162912623feSdlg 
163912623feSdlg 	return (0);
164912623feSdlg }
165912623feSdlg 
166912623feSdlg static int
mpip_set_route(struct mpip_softc * sc,uint32_t shim,unsigned int rdomain)167912623feSdlg mpip_set_route(struct mpip_softc *sc, uint32_t shim, unsigned int rdomain)
168912623feSdlg {
169912623feSdlg 	int error;
170912623feSdlg 
171912623feSdlg 	rt_ifa_del(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL,
172c7bf3b0bSkn 	    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
173912623feSdlg 
174912623feSdlg 	sc->sc_smpls.smpls_label = shim;
175912623feSdlg 	sc->sc_rdomain = rdomain;
176912623feSdlg 
177f8890659Skn 	/* only install with a label or mpip_clone_destroy() will ignore it */
178f8890659Skn 	if (sc->sc_smpls.smpls_label == MPLS_LABEL2SHIM(0))
179f8890659Skn 		return 0;
180f8890659Skn 
181912623feSdlg 	error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL,
182c7bf3b0bSkn 	    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
183912623feSdlg 	if (error) {
184912623feSdlg 		sc->sc_smpls.smpls_label = MPLS_LABEL2SHIM(0);
185912623feSdlg 		return (error);
186912623feSdlg 	}
187912623feSdlg 
188912623feSdlg 	return (0);
189912623feSdlg }
190912623feSdlg 
191912623feSdlg static int
mpip_set_label(struct mpip_softc * sc,struct ifreq * ifr)192912623feSdlg mpip_set_label(struct mpip_softc *sc, struct ifreq *ifr)
193912623feSdlg {
194912623feSdlg 	struct shim_hdr label;
195912623feSdlg 	uint32_t shim;
196912623feSdlg 	int error;
197912623feSdlg 
198912623feSdlg 	error = copyin(ifr->ifr_data, &label, sizeof(label));
199912623feSdlg 	if (error != 0)
200912623feSdlg 		return (error);
201912623feSdlg 
202912623feSdlg 	if (label.shim_label > MPLS_LABEL_MAX ||
203912623feSdlg 	    label.shim_label <= MPLS_LABEL_RESERVED_MAX)
204912623feSdlg 		return (EINVAL);
205912623feSdlg 
206912623feSdlg 	shim = MPLS_LABEL2SHIM(label.shim_label);
207912623feSdlg 
208912623feSdlg 	if (sc->sc_smpls.smpls_label == shim)
209912623feSdlg 		return (0);
210912623feSdlg 
211912623feSdlg 	return (mpip_set_route(sc, shim, sc->sc_rdomain));
212912623feSdlg }
213912623feSdlg 
214912623feSdlg static int
mpip_get_label(struct mpip_softc * sc,struct ifreq * ifr)215912623feSdlg mpip_get_label(struct mpip_softc *sc, struct ifreq *ifr)
216912623feSdlg {
217912623feSdlg 	struct shim_hdr label;
218912623feSdlg 
219912623feSdlg 	label.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
220912623feSdlg 
221417c8aceSdlg 	if (label.shim_label == 0)
222912623feSdlg 		return (EADDRNOTAVAIL);
223912623feSdlg 
224912623feSdlg 	return (copyout(&label, ifr->ifr_data, sizeof(label)));
225912623feSdlg }
226912623feSdlg 
227912623feSdlg static int
mpip_del_label(struct mpip_softc * sc)228912623feSdlg mpip_del_label(struct mpip_softc *sc)
229912623feSdlg {
230912623feSdlg 	if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) {
231912623feSdlg 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL,
232c7bf3b0bSkn 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
233912623feSdlg 	}
234912623feSdlg 
235912623feSdlg 	sc->sc_smpls.smpls_label = MPLS_LABEL2SHIM(0);
236912623feSdlg 
237912623feSdlg 	return (0);
238912623feSdlg }
239912623feSdlg 
240912623feSdlg static int
mpip_set_neighbor(struct mpip_softc * sc,struct if_laddrreq * req)241912623feSdlg mpip_set_neighbor(struct mpip_softc *sc, struct if_laddrreq *req)
242912623feSdlg {
243912623feSdlg 	struct mpip_neighbor *n, *o;
244912623feSdlg 	struct sockaddr *sa = (struct sockaddr *)&req->addr;
245912623feSdlg 	struct sockaddr_mpls *smpls = (struct sockaddr_mpls *)&req->dstaddr;
246912623feSdlg 	uint32_t label;
247912623feSdlg 
248912623feSdlg 	if (smpls->smpls_family != AF_MPLS)
249912623feSdlg 		return (EINVAL);
250912623feSdlg 	label = smpls->smpls_label;
251912623feSdlg 	if (label > MPLS_LABEL_MAX || label <= MPLS_LABEL_RESERVED_MAX)
252912623feSdlg 		return (EINVAL);
253912623feSdlg 
254912623feSdlg 	switch (sa->sa_family) {
255912623feSdlg 	case AF_INET: {
256912623feSdlg 		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
257912623feSdlg 
258912623feSdlg 		if (in_nullhost(sin->sin_addr) ||
259912623feSdlg 		    IN_MULTICAST(sin->sin_addr.s_addr))
260912623feSdlg 			return (EINVAL);
261912623feSdlg 
262912623feSdlg 		break;
263912623feSdlg 	}
264912623feSdlg #ifdef INET6
265912623feSdlg 	case AF_INET6: {
266912623feSdlg 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
267912623feSdlg 
268912623feSdlg 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
269912623feSdlg 		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
270912623feSdlg 			return (EINVAL);
271912623feSdlg 
272912623feSdlg 		/* check scope */
273912623feSdlg 
274912623feSdlg 		break;
275912623feSdlg 	}
276912623feSdlg #endif
277912623feSdlg 	default:
278912623feSdlg 		return (EAFNOSUPPORT);
279912623feSdlg 	}
280912623feSdlg 
281912623feSdlg 	if (sc->sc_dead)
282912623feSdlg 		return (ENXIO);
283912623feSdlg 
284912623feSdlg 	n = malloc(sizeof(*n), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
285912623feSdlg 	if (n == NULL)
286912623feSdlg 		return (ENOMEM);
287912623feSdlg 
288912623feSdlg 	n->n_rshim.shim_label = MPLS_LABEL2SHIM(label);
289912623feSdlg 	n->n_nexthop = req->addr;
290912623feSdlg 
291912623feSdlg 	o = sc->sc_neighbor;
292912623feSdlg 	sc->sc_neighbor = n;
293912623feSdlg 
294912623feSdlg 	NET_UNLOCK();
295912623feSdlg 	ifq_barrier(&sc->sc_if.if_snd);
296912623feSdlg 	NET_LOCK();
297912623feSdlg 
298912623feSdlg 	free(o, M_DEVBUF, sizeof(*o));
299912623feSdlg 
300912623feSdlg 	return (0);
301912623feSdlg }
302912623feSdlg 
303912623feSdlg static int
mpip_get_neighbor(struct mpip_softc * sc,struct if_laddrreq * req)304912623feSdlg mpip_get_neighbor(struct mpip_softc *sc, struct if_laddrreq *req)
305912623feSdlg {
306912623feSdlg 	struct sockaddr_mpls *smpls = (struct sockaddr_mpls *)&req->dstaddr;
307912623feSdlg 	struct mpip_neighbor *n = sc->sc_neighbor;
308912623feSdlg 
309912623feSdlg 	if (n == NULL)
310912623feSdlg 		return (EADDRNOTAVAIL);
311912623feSdlg 
312912623feSdlg 	smpls->smpls_len = sizeof(*smpls);
313912623feSdlg 	smpls->smpls_family = AF_MPLS;
314912623feSdlg 	smpls->smpls_label = MPLS_SHIM2LABEL(n->n_rshim.shim_label);
315912623feSdlg 	req->addr = n->n_nexthop;
316912623feSdlg 
317912623feSdlg 	return (0);
318912623feSdlg }
319912623feSdlg 
320912623feSdlg static int
mpip_del_neighbor(struct mpip_softc * sc,struct ifreq * req)321912623feSdlg mpip_del_neighbor(struct mpip_softc *sc, struct ifreq *req)
322912623feSdlg {
323912623feSdlg 	struct mpip_neighbor *o;
324912623feSdlg 
325912623feSdlg 	if (sc->sc_dead)
326912623feSdlg 		return (ENXIO);
327912623feSdlg 
328912623feSdlg 	o = sc->sc_neighbor;
329912623feSdlg 	sc->sc_neighbor = NULL;
330912623feSdlg 
331912623feSdlg 	NET_UNLOCK();
332912623feSdlg 	ifq_barrier(&sc->sc_if.if_snd);
333912623feSdlg 	NET_LOCK();
334912623feSdlg 
335912623feSdlg 	free(o, M_DEVBUF, sizeof(*o));
336912623feSdlg 
337912623feSdlg 	return (0);
338912623feSdlg }
339912623feSdlg 
340912623feSdlg int
mpip_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)341912623feSdlg mpip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
342912623feSdlg {
343912623feSdlg 	struct mpip_softc *sc = ifp->if_softc;
344912623feSdlg 	struct ifreq *ifr = (struct ifreq *)data;
345912623feSdlg 	int error = 0;
346912623feSdlg 
347912623feSdlg 	switch (cmd) {
348912623feSdlg 	case SIOCSIFADDR:
349912623feSdlg 		break;
350912623feSdlg 	case SIOCSIFFLAGS:
351912623feSdlg 		if ((ifp->if_flags & IFF_UP))
352912623feSdlg 			ifp->if_flags |= IFF_RUNNING;
353912623feSdlg 		else
354912623feSdlg 			ifp->if_flags &= ~IFF_RUNNING;
355912623feSdlg 		break;
356912623feSdlg 	case SIOCSIFMTU:
357912623feSdlg 		if (ifr->ifr_mtu < 60 || /* XXX */
358912623feSdlg 		    ifr->ifr_mtu > 65536) /* XXX */
359912623feSdlg 			error = EINVAL;
360912623feSdlg 		else
361912623feSdlg 			ifp->if_mtu = ifr->ifr_mtu;
362912623feSdlg 		break;
363912623feSdlg 
364912623feSdlg 	case SIOCGPWE3:
365912623feSdlg 		ifr->ifr_pwe3 = IF_PWE3_IP;
366912623feSdlg 		break;
367912623feSdlg 	case SIOCSPWE3CTRLWORD:
368912623feSdlg 		sc->sc_cword = ifr->ifr_pwe3 ? 1 : 0;
369912623feSdlg 		break;
370912623feSdlg 	case SIOCGPWE3CTRLWORD:
371912623feSdlg 		ifr->ifr_pwe3 = sc->sc_cword;
372912623feSdlg 		break;
373912623feSdlg 	case SIOCSPWE3FAT:
374912623feSdlg 		sc->sc_fword = ifr->ifr_pwe3 ? 1 : 0;
375912623feSdlg 		break;
376912623feSdlg 	case SIOCGPWE3FAT:
377912623feSdlg 		ifr->ifr_pwe3 = sc->sc_fword;
378912623feSdlg 		break;
379912623feSdlg 
380912623feSdlg 	case SIOCSETLABEL:
381912623feSdlg 		error = mpip_set_label(sc, ifr);
382912623feSdlg 		break;
383912623feSdlg 	case SIOCGETLABEL:
384912623feSdlg 		error = mpip_get_label(sc, ifr);
385912623feSdlg 		break;
386912623feSdlg 	case SIOCDELLABEL:
387912623feSdlg 		error = mpip_del_label(sc);
388912623feSdlg 		break;
389912623feSdlg 
390912623feSdlg 	case SIOCSPWE3NEIGHBOR:
391912623feSdlg 		error = mpip_set_neighbor(sc, (struct if_laddrreq *)data);
392912623feSdlg 		break;
393912623feSdlg 	case SIOCGPWE3NEIGHBOR:
394912623feSdlg 		error = mpip_get_neighbor(sc, (struct if_laddrreq *)data);
395912623feSdlg 		break;
396912623feSdlg 	case SIOCDPWE3NEIGHBOR:
397912623feSdlg 		error = mpip_del_neighbor(sc, ifr);
398912623feSdlg 		break;
399912623feSdlg 
400912623feSdlg 	case SIOCSLIFPHYRTABLE:
401912623feSdlg 		if (ifr->ifr_rdomainid < 0 ||
402912623feSdlg 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
403912623feSdlg 		    !rtable_exists(ifr->ifr_rdomainid) ||
404912623feSdlg 		    ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) {
405912623feSdlg 			error = EINVAL;
406912623feSdlg 			break;
407912623feSdlg 		}
408912623feSdlg 		if (sc->sc_rdomain != ifr->ifr_rdomainid) {
409912623feSdlg 			error = mpip_set_route(sc, sc->sc_smpls.smpls_label,
410912623feSdlg 			    ifr->ifr_rdomainid);
411912623feSdlg 		}
412912623feSdlg 		break;
413912623feSdlg 	case SIOCGLIFPHYRTABLE:
414912623feSdlg 		ifr->ifr_rdomainid = sc->sc_rdomain;
415912623feSdlg 		break;
416912623feSdlg 
417912623feSdlg 	case SIOCSLIFPHYTTL:
418912623feSdlg 		if (ifr->ifr_ttl != -1 &&
419912623feSdlg 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
420912623feSdlg 			error = EINVAL;
421912623feSdlg 			break;
422912623feSdlg 		}
423912623feSdlg 
424912623feSdlg 		/* commit */
425912623feSdlg 		sc->sc_ttl = ifr->ifr_ttl;
426912623feSdlg 		break;
427912623feSdlg 	case SIOCGLIFPHYTTL:
428912623feSdlg 		ifr->ifr_ttl = sc->sc_ttl;
429912623feSdlg 		break;
430912623feSdlg 
431f1dea89cSdlg 	case SIOCSTXHPRIO:
432b9e5cef3Sdlg 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
433b9e5cef3Sdlg 		if (error != 0)
434f1dea89cSdlg 			break;
435f1dea89cSdlg 
436f1dea89cSdlg 		sc->sc_txhprio = ifr->ifr_hdrprio;
437f1dea89cSdlg 		break;
438f1dea89cSdlg 	case SIOCGTXHPRIO:
439f1dea89cSdlg 		ifr->ifr_hdrprio = sc->sc_txhprio;
440f1dea89cSdlg 		break;
441f1dea89cSdlg 
442a61f5d85Sdlg 	case SIOCSRXHPRIO:
443b9e5cef3Sdlg 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
444b9e5cef3Sdlg 		if (error != 0)
445a61f5d85Sdlg 			break;
446a61f5d85Sdlg 
447a61f5d85Sdlg 		sc->sc_rxhprio = ifr->ifr_hdrprio;
448a61f5d85Sdlg 		break;
449a61f5d85Sdlg 	case SIOCGRXHPRIO:
450a61f5d85Sdlg 		ifr->ifr_hdrprio = sc->sc_rxhprio;
451a61f5d85Sdlg 		break;
452a61f5d85Sdlg 
453912623feSdlg 	case SIOCADDMULTI:
454912623feSdlg 	case SIOCDELMULTI:
455912623feSdlg 		break;
456912623feSdlg 
457912623feSdlg 	default:
458912623feSdlg 		error = ENOTTY;
459912623feSdlg 		break;
460912623feSdlg 	}
461912623feSdlg 
462912623feSdlg 	return (error);
463912623feSdlg }
464912623feSdlg 
465912623feSdlg static void
mpip_input(struct mpip_softc * sc,struct mbuf * m)466912623feSdlg mpip_input(struct mpip_softc *sc, struct mbuf *m)
467912623feSdlg {
468912623feSdlg 	struct ifnet *ifp = &sc->sc_if;
469a61f5d85Sdlg 	int rxprio = sc->sc_rxhprio;
470a61f5d85Sdlg 	uint32_t shim, exp;
471912623feSdlg 	struct mbuf *n;
472a61f5d85Sdlg 	uint8_t ttl, tos;
473912623feSdlg 
474912623feSdlg 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
475912623feSdlg 		goto drop;
476912623feSdlg 
477912623feSdlg 	shim = *mtod(m, uint32_t *);
478912623feSdlg 	m_adj(m, sizeof(shim));
479912623feSdlg 
480912623feSdlg 	ttl = ntohl(shim & MPLS_TTL_MASK);
481a61f5d85Sdlg 	exp = ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET;
482912623feSdlg 
483912623feSdlg 	if (sc->sc_fword) {
484912623feSdlg 		uint32_t label;
485912623feSdlg 
486912623feSdlg 		if (MPLS_BOS_ISSET(shim))
487912623feSdlg 			goto drop;
488912623feSdlg 
489912623feSdlg 		if (m->m_len < sizeof(shim)) {
490912623feSdlg 			m = m_pullup(m, sizeof(shim));
491912623feSdlg 			if (m == NULL)
492912623feSdlg 				return;
493912623feSdlg 		}
494912623feSdlg 
495912623feSdlg 		shim = *mtod(m, uint32_t *);
496912623feSdlg 		if (!MPLS_BOS_ISSET(shim))
497912623feSdlg 			goto drop;
498912623feSdlg 
499912623feSdlg 		label = MPLS_SHIM2LABEL(shim);
500912623feSdlg 		if (label <= MPLS_LABEL_RESERVED_MAX) {
501912623feSdlg 			counters_inc(ifp->if_counters, ifc_noproto); /* ? */
502912623feSdlg 			goto drop;
503912623feSdlg 		}
504912623feSdlg 
505912623feSdlg 		label -= MPLS_LABEL_RESERVED_MAX + 1;
506912623feSdlg 		label ^= sc->sc_flow;
507379654e8Sdlg 		SET(m->m_pkthdr.csum_flags, M_FLOWID);
508379654e8Sdlg 		m->m_pkthdr.ph_flowid = label;
509912623feSdlg 
510912623feSdlg 		m_adj(m, sizeof(shim));
511912623feSdlg 	} else if (!MPLS_BOS_ISSET(shim))
512912623feSdlg 		goto drop;
513912623feSdlg 
514912623feSdlg 	if (sc->sc_cword) {
515912623feSdlg 		if (m->m_len < sizeof(shim)) {
516912623feSdlg 			m = m_pullup(m, sizeof(shim));
517912623feSdlg 			if (m == NULL)
518912623feSdlg 				return;
519912623feSdlg 		}
520912623feSdlg 		shim = *mtod(m, uint32_t *);
521912623feSdlg 
522912623feSdlg 		/*
523912623feSdlg 		 * The first 4 bits identifies that this packet is a
524912623feSdlg 		 * control word. If the control word is configured and
525912623feSdlg 		 * we received an IP datagram we shall drop it.
526912623feSdlg 		 */
527912623feSdlg 		if (shim & CW_ZERO_MASK) {
528912623feSdlg 			counters_inc(ifp->if_counters, ifc_ierrors);
529912623feSdlg 			goto drop;
530912623feSdlg 		}
531912623feSdlg 
532912623feSdlg 		/* We don't support fragmentation just yet. */
533912623feSdlg 		if (shim & CW_FRAG_MASK) {
534912623feSdlg 			counters_inc(ifp->if_counters, ifc_ierrors);
535912623feSdlg 			goto drop;
536912623feSdlg 		}
537912623feSdlg 
538912623feSdlg 		m_adj(m, sizeof(shim));
539912623feSdlg 	}
540912623feSdlg 
541912623feSdlg 	n = m;
542912623feSdlg 	while (n->m_len == 0) {
543912623feSdlg 		n = n->m_next;
544912623feSdlg 		if (n == NULL)
545912623feSdlg 			goto drop;
546912623feSdlg 	}
547912623feSdlg 
548912623feSdlg 	switch (*mtod(n, uint8_t *) >> 4) {
549a61f5d85Sdlg 	case 4: {
550a61f5d85Sdlg 		struct ip *ip;
551a61f5d85Sdlg 		if (m->m_len < sizeof(*ip)) {
552a61f5d85Sdlg 			m = m_pullup(m, sizeof(*ip));
553a61f5d85Sdlg 			if (m == NULL)
554a61f5d85Sdlg 				return;
555a61f5d85Sdlg 		}
556a61f5d85Sdlg 		ip = mtod(m, struct ip *);
557a61f5d85Sdlg 		tos = ip->ip_tos;
558a61f5d85Sdlg 
559912623feSdlg 		if (sc->sc_ttl == -1) {
560912623feSdlg 			m = mpls_ip_adjttl(m, ttl);
561912623feSdlg 			if (m == NULL)
562912623feSdlg 				return;
563912623feSdlg 		}
5642e3db693Sdlg 
565912623feSdlg 		m->m_pkthdr.ph_family = AF_INET;
566912623feSdlg 		break;
567a61f5d85Sdlg 	}
568912623feSdlg #ifdef INET6
569a61f5d85Sdlg 	case 6: {
570a61f5d85Sdlg 		struct ip6_hdr *ip6;
571a61f5d85Sdlg 		uint32_t flow;
572a61f5d85Sdlg 		if (m->m_len < sizeof(*ip6)) {
573a61f5d85Sdlg 			m = m_pullup(m, sizeof(*ip6));
574a61f5d85Sdlg 			if (m == NULL)
575a61f5d85Sdlg 				return;
576a61f5d85Sdlg 		}
577a61f5d85Sdlg 		ip6 = mtod(m, struct ip6_hdr *);
578a61f5d85Sdlg 		flow = bemtoh32(&ip6->ip6_flow);
579a61f5d85Sdlg 		tos = flow >> 20;
580a61f5d85Sdlg 
581912623feSdlg 		if (sc->sc_ttl == -1) {
582912623feSdlg 			m = mpls_ip6_adjttl(m, ttl);
583912623feSdlg 			if (m == NULL)
584912623feSdlg 				return;
585912623feSdlg 		}
5862e3db693Sdlg 
587912623feSdlg 		m->m_pkthdr.ph_family = AF_INET6;
588912623feSdlg 		break;
589a61f5d85Sdlg 	}
590912623feSdlg #endif /* INET6 */
591912623feSdlg 	default:
592912623feSdlg 		counters_inc(ifp->if_counters, ifc_noproto);
593912623feSdlg 		goto drop;
594912623feSdlg 	}
595912623feSdlg 
596a61f5d85Sdlg 	switch (rxprio) {
597a61f5d85Sdlg 	case IF_HDRPRIO_PACKET:
598a61f5d85Sdlg 		/* nop */
599a61f5d85Sdlg 		break;
600a61f5d85Sdlg 	case IF_HDRPRIO_OUTER:
601a61f5d85Sdlg 		m->m_pkthdr.pf.prio = exp;
602a61f5d85Sdlg 		break;
603a61f5d85Sdlg 	case IF_HDRPRIO_PAYLOAD:
604a61f5d85Sdlg 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos);
605a61f5d85Sdlg 		break;
606a61f5d85Sdlg 	default:
607a61f5d85Sdlg 		m->m_pkthdr.pf.prio = rxprio;
608a61f5d85Sdlg 		break;
609a61f5d85Sdlg 	}
610a61f5d85Sdlg 
6112e3db693Sdlg 	if_vinput(ifp, m);
612912623feSdlg 	return;
613912623feSdlg drop:
614912623feSdlg 	m_freem(m);
615912623feSdlg }
616912623feSdlg 
617912623feSdlg int
mpip_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * dst,struct rtentry * rt)618912623feSdlg mpip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
619912623feSdlg     struct rtentry *rt)
620912623feSdlg {
621912623feSdlg 	struct mpip_softc *sc = ifp->if_softc;
622912623feSdlg 	int error;
623912623feSdlg 
624912623feSdlg 	if (dst->sa_family == AF_LINK &&
625912623feSdlg 	    rt != NULL && ISSET(rt->rt_flags, RTF_LOCAL)) {
626912623feSdlg 		mpip_input(sc, m);
627912623feSdlg 		return (0);
628912623feSdlg 	}
629912623feSdlg 
630912623feSdlg 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
631912623feSdlg 		error = ENETDOWN;
632912623feSdlg 		goto drop;
633912623feSdlg 	}
634912623feSdlg 
635912623feSdlg 	switch (dst->sa_family) {
636912623feSdlg 	case AF_INET:
637912623feSdlg #ifdef INET6
638912623feSdlg 	case AF_INET6:
639912623feSdlg #endif
640912623feSdlg 		break;
641912623feSdlg 	default:
642912623feSdlg 		error = EAFNOSUPPORT;
643912623feSdlg 		goto drop;
644912623feSdlg 	}
645912623feSdlg 
646912623feSdlg 	m->m_pkthdr.ph_family = dst->sa_family;
647912623feSdlg 
648912623feSdlg 	error = if_enqueue(ifp, m);
649912623feSdlg 	if (error)
650912623feSdlg 		counters_inc(ifp->if_counters, ifc_oerrors);
651912623feSdlg 	return (error);
652912623feSdlg 
653912623feSdlg drop:
654912623feSdlg 	m_freem(m);
655912623feSdlg 	return (error);
656912623feSdlg }
657912623feSdlg 
658912623feSdlg void
mpip_start(struct ifnet * ifp)659912623feSdlg mpip_start(struct ifnet *ifp)
660912623feSdlg {
661912623feSdlg 	struct mpip_softc *sc = ifp->if_softc;
662912623feSdlg 	struct mpip_neighbor *n = sc->sc_neighbor;
663912623feSdlg 	struct rtentry *rt;
664912623feSdlg 	struct ifnet *ifp0;
665912623feSdlg 	struct mbuf *m;
666912623feSdlg 	uint32_t shim;
667912623feSdlg 	struct sockaddr_mpls smpls = {
668912623feSdlg 		.smpls_len = sizeof(smpls),
669912623feSdlg 		.smpls_family = AF_MPLS,
670912623feSdlg 	};
671f1dea89cSdlg 	int txprio = sc->sc_txhprio;
672f1dea89cSdlg 	uint32_t exp, bos;
673f1dea89cSdlg 	uint8_t tos, prio, ttl;
674912623feSdlg 
675912623feSdlg 	if (!ISSET(ifp->if_flags, IFF_RUNNING) || n == NULL) {
67698da7c78Spatrick 		ifq_purge(&ifp->if_snd);
677912623feSdlg 		return;
678912623feSdlg 	}
679912623feSdlg 
680db4e5757Sdlg 	rt = rtalloc(sstosa(&n->n_nexthop), RT_RESOLVE, sc->sc_rdomain);
681912623feSdlg 	if (!rtisvalid(rt)) {
68298da7c78Spatrick 		ifq_purge(&ifp->if_snd);
683912623feSdlg 		goto rtfree;
684912623feSdlg 	}
685912623feSdlg 
686912623feSdlg 	ifp0 = if_get(rt->rt_ifidx);
687912623feSdlg 	if (ifp0 == NULL) {
68898da7c78Spatrick 		ifq_purge(&ifp->if_snd);
689912623feSdlg 		goto rtfree;
690912623feSdlg 	}
691912623feSdlg 
692912623feSdlg 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
693912623feSdlg #if NBPFILTER > 0
694912623feSdlg 		caddr_t if_bpf = sc->sc_if.if_bpf;
695912623feSdlg 		if (if_bpf) {
696912623feSdlg 			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
697912623feSdlg 			    m, BPF_DIRECTION_OUT);
698912623feSdlg 		}
699912623feSdlg #endif /* NBPFILTER */
700912623feSdlg 
701912623feSdlg 		if (sc->sc_ttl == -1) {
702912623feSdlg 			switch (m->m_pkthdr.ph_family) {
703912623feSdlg 			case AF_INET: {
704912623feSdlg 				struct ip *ip;
705912623feSdlg 				ip = mtod(m, struct ip *);
706912623feSdlg 				ttl = ip->ip_ttl;
707912623feSdlg 				break;
708912623feSdlg 			}
709912623feSdlg #ifdef INET6
710912623feSdlg 			case AF_INET6: {
711912623feSdlg 				struct ip6_hdr *ip6;
712912623feSdlg 				ip6 = mtod(m, struct ip6_hdr *);
713912623feSdlg 				ttl = ip6->ip6_hlim;
714912623feSdlg 				break;
715912623feSdlg 			}
716912623feSdlg #endif
717912623feSdlg 			default:
718912623feSdlg 				unhandled_af(m->m_pkthdr.ph_family);
719912623feSdlg 			}
720912623feSdlg 		} else
721912623feSdlg 			ttl = mpls_defttl;
722912623feSdlg 
723f1dea89cSdlg 		switch (txprio) {
724f1dea89cSdlg 		case IF_HDRPRIO_PACKET:
725f1dea89cSdlg 			prio = m->m_pkthdr.pf.prio;
726f1dea89cSdlg 			break;
727f1dea89cSdlg 		case IF_HDRPRIO_PAYLOAD:
728f1dea89cSdlg 			switch (m->m_pkthdr.ph_family) {
729f1dea89cSdlg 			case AF_INET: {
730f1dea89cSdlg 				struct ip *ip;
731f1dea89cSdlg 				ip = mtod(m, struct ip *);
732f1dea89cSdlg 				tos = ip->ip_tos;
733f1dea89cSdlg 				break;
734f1dea89cSdlg 			}
735f1dea89cSdlg #ifdef INET6
736f1dea89cSdlg 			case AF_INET6: {
737f1dea89cSdlg 				struct ip6_hdr *ip6;
738f1dea89cSdlg 				uint32_t flow;
739f1dea89cSdlg 				ip6 = mtod(m, struct ip6_hdr *);
740f1dea89cSdlg 				flow = bemtoh32(&ip6->ip6_flow);
741f1dea89cSdlg 				tos = flow >> 20;
742f1dea89cSdlg 				break;
743f1dea89cSdlg 			}
744f1dea89cSdlg #endif
745f1dea89cSdlg 			default:
746f1dea89cSdlg 				unhandled_af(m->m_pkthdr.ph_family);
747f1dea89cSdlg 			}
748f1dea89cSdlg 
749f1dea89cSdlg 			prio = IFQ_TOS2PRIO(tos);
750f1dea89cSdlg 			break;
751f1dea89cSdlg 		default:
752f1dea89cSdlg 			prio = txprio;
753f1dea89cSdlg 			break;
754f1dea89cSdlg 		}
755f1dea89cSdlg 		exp = htonl(prio << MPLS_EXP_OFFSET);
756f1dea89cSdlg 
757912623feSdlg 		if (sc->sc_cword) {
758912623feSdlg 			m = m_prepend(m, sizeof(shim), M_NOWAIT);
759912623feSdlg 			if (m == NULL)
760912623feSdlg 				continue;
761912623feSdlg 
762912623feSdlg 			*mtod(m, uint32_t *) = 0;
763912623feSdlg 		}
764912623feSdlg 
765912623feSdlg 		bos = MPLS_BOS_MASK;
766912623feSdlg 
767912623feSdlg 		if (sc->sc_fword) {
768912623feSdlg 			uint32_t flow = 0;
769912623feSdlg 			m = m_prepend(m, sizeof(shim), M_NOWAIT);
770912623feSdlg 			if (m == NULL)
771912623feSdlg 				continue;
772912623feSdlg 
773379654e8Sdlg 			if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
774379654e8Sdlg 				flow = m->m_pkthdr.ph_flowid;
775912623feSdlg 			flow ^= sc->sc_flow;
776912623feSdlg 			flow += MPLS_LABEL_RESERVED_MAX + 1;
777912623feSdlg 
778912623feSdlg 			shim = htonl(1) & MPLS_TTL_MASK;
779912623feSdlg 			shim |= htonl(flow << MPLS_LABEL_OFFSET) &
780912623feSdlg 			    MPLS_LABEL_MASK;
781f1dea89cSdlg 			shim |= exp | bos;
782912623feSdlg 			*mtod(m, uint32_t *) = shim;
783912623feSdlg 
784912623feSdlg 			bos = 0;
785912623feSdlg 		}
786912623feSdlg 
787912623feSdlg 		m = m_prepend(m, sizeof(shim), M_NOWAIT);
788912623feSdlg 		if (m == NULL)
789912623feSdlg 			continue;
790912623feSdlg 
791912623feSdlg 		shim = htonl(ttl) & MPLS_TTL_MASK;
792912623feSdlg 		shim |= n->n_rshim.shim_label;
793f1dea89cSdlg 		shim |= exp | bos;
794912623feSdlg 		*mtod(m, uint32_t *) = shim;
795912623feSdlg 
796db4e5757Sdlg 		m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
797957fd304Sdlg 		CLR(m->m_flags, M_BCAST|M_MCAST);
798db4e5757Sdlg 
799912623feSdlg 		mpls_output(ifp0, m, (struct sockaddr *)&smpls, rt);
800912623feSdlg 	}
801912623feSdlg 
802912623feSdlg 	if_put(ifp0);
803912623feSdlg rtfree:
804912623feSdlg 	rtfree(rt);
805912623feSdlg }
806