xref: /netbsd-src/sys/netipsec/ipsecif.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: ipsecif.c,v 1.10 2018/05/31 07:03:57 maxv Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.10 2018/05/31 07:03:57 maxv Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #include "opt_ipsec.h"
35 #endif
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/socket.h>
40 #include <sys/sockio.h>
41 #include <sys/mbuf.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/syslog.h>
45 #include <sys/kernel.h>
46 
47 #include <net/if.h>
48 #include <net/route.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip_encap.h>
56 #include <netinet/ip_ecn.h>
57 #include <netinet/ip_private.h>
58 #include <netinet/udp.h>
59 
60 #ifdef INET6
61 #include <netinet/ip6.h>
62 #include <netinet6/ip6_var.h>
63 #include <netinet6/ip6_private.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
66 #include <netinet/ip_ecn.h>
67 #endif
68 
69 #include <netipsec/key.h>
70 #include <netipsec/ipsecif.h>
71 
72 #include <net/if_ipsec.h>
73 
74 static void ipsecif4_input(struct mbuf *, int, int, void *);
75 static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
76 static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
77 	struct ifnet *);
78 
79 #ifdef INET6
80 static int ipsecif6_input(struct mbuf **, int *, int, void *);
81 static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
82 static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
83 	struct ifnet *);
84 #endif
85 
86 static int ip_ipsec_ttl = IPSEC_TTL;
87 static int ip_ipsec_copy_tos = 0;
88 #ifdef INET6
89 static int ip6_ipsec_hlim = IPSEC_HLIM;
90 static int ip6_ipsec_pmtu = 0; /* XXX: per interface configuration?? */
91 static int ip6_ipsec_copy_tos = 0;
92 #endif
93 
94 static const struct encapsw ipsecif4_encapsw = {
95 	.encapsw4 = {
96 		.pr_input = ipsecif4_input,
97 		.pr_ctlinput = NULL,
98 	}
99 };
100 
101 #ifdef INET6
102 static const struct encapsw ipsecif6_encapsw;
103 #endif
104 
105 static struct mbuf *
106 ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
107     uint8_t proto, uint8_t tos)
108 {
109 	struct ip *ip;
110 	struct sockaddr_in *src, *dst;
111 
112 	src = satosin(var->iv_psrc);
113 	dst = satosin(var->iv_pdst);
114 
115 	if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
116 	    src->sin_addr.s_addr == INADDR_BROADCAST ||
117 	    dst->sin_addr.s_addr == INADDR_BROADCAST) {
118 		m_freem(m);
119 		return NULL;
120 	}
121 	m->m_flags &= ~M_BCAST;
122 
123 	if (IN_MULTICAST(src->sin_addr.s_addr) ||
124 	    IN_MULTICAST(dst->sin_addr.s_addr)) {
125 		m_freem(m);
126 		return NULL;
127 	}
128 
129 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
130 	if (m && M_UNWRITABLE(m, sizeof(struct ip)))
131 		m = m_pullup(m, sizeof(struct ip));
132 	if (m == NULL)
133 		return NULL;
134 
135 	ip = mtod(m, struct ip *);
136 	ip->ip_v = IPVERSION;
137 	ip->ip_off = htons(0);
138 	if (m->m_pkthdr.len < IP_MINFRAGSIZE)
139 		ip->ip_id = 0;
140 	else
141 		ip->ip_id = ip_newid(NULL);
142 	ip->ip_hl = sizeof(*ip) >> 2;
143 	if (ip_ipsec_copy_tos)
144 		ip->ip_tos = tos;
145 	else
146 		ip->ip_tos = 0;
147 	ip->ip_sum = 0;
148 	ip->ip_src = src->sin_addr;
149 	ip->ip_dst = dst->sin_addr;
150 	ip->ip_p = proto;
151 	ip->ip_ttl = ip_ipsec_ttl;
152 	ip->ip_len = htons(m->m_pkthdr.len);
153 #ifndef IPSEC_TX_TOS_CLEAR
154 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
155 	if (ifp->if_flags & IFF_ECN)
156 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
157 	else
158 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
159 #endif
160 
161 	return m;
162 }
163 
164 static int
165 ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
166 {
167 	struct ip ip0;
168 	struct ip *ip;
169 	int mtu;
170 	struct secasvar *sav;
171 
172 	sav = key_lookup_sa_bysaidx(&isr->saidx);
173 	if (sav == NULL)
174 		return 0;
175 
176 	if (!(sav->natt_type & UDP_ENCAP_ESPINUDP)) {
177 		mtu = 0;
178 		goto out;
179 	}
180 
181 	if (m->m_len < sizeof(struct ip)) {
182 		m_copydata(m, 0, sizeof(ip0), &ip0);
183 		ip = &ip0;
184 	} else {
185 		ip = mtod(m, struct ip *);
186 	}
187 	mtu = sav->esp_frag;
188 	if (ntohs(ip->ip_len) <= mtu)
189 		mtu = 0;
190 
191 out:
192 	KEY_SA_UNREF(&sav);
193 	return mtu;
194 }
195 
196 static struct mbuf *
197 ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
198 {
199 	const struct ip *ip;
200 	int proto;
201 	int tos;
202 
203 	KASSERT(proto0 != NULL);
204 	KASSERT(tos0 != NULL);
205 
206 	switch (family) {
207 	case AF_INET:
208 		proto = IPPROTO_IPV4;
209 		if (m->m_len < sizeof(*ip)) {
210 			m = m_pullup(m, sizeof(*ip));
211 			if (m == NULL) {
212 				*tos0 = 0;
213 				*proto0 = 0;
214 				return NULL;
215 			}
216 		}
217 		ip = mtod(m, const struct ip *);
218 		tos = ip->ip_tos;
219 		/* TODO: support ALTQ for innner packet */
220 		break;
221 #ifdef INET6
222 	case AF_INET6: {
223 		const struct ip6_hdr *ip6;
224 		proto = IPPROTO_IPV6;
225 		if (m->m_len < sizeof(*ip6)) {
226 			m = m_pullup(m, sizeof(*ip6));
227 			if (m == NULL) {
228 				*tos0 = 0;
229 				*proto0 = 0;
230 				return NULL;
231 			}
232 		}
233 		ip6 = mtod(m, const struct ip6_hdr *);
234 		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
235 		/* TODO: support ALTQ for innner packet */
236 		break;
237 	}
238 #endif /* INET6 */
239 	default:
240 		*tos0 = 0;
241 		*proto0 = 0;
242 		return NULL;
243 	}
244 
245 	*proto0 = proto;
246 	*tos0 = tos;
247 	return m;
248 }
249 
250 static int
251 ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
252 {
253 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
254 	struct mbuf *next;
255 	struct m_tag *mtag;
256 	int error;
257 
258 	KASSERT(if_ipsec_heldref_variant(var));
259 
260 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
261 	if (mtag)
262 		m_tag_delete(m, mtag);
263 
264 	/* consider new IP header prepended in ipsecif4_output() */
265 	if (mtu <= sizeof(struct ip)) {
266 		m_freem(m);
267 		return ENETUNREACH;
268 	}
269 	m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
270 	error = ip_fragment(m, ifp, mtu - sizeof(struct ip));
271 	if (error)
272 		return error;
273 
274 	for (error = 0; m; m = next) {
275 		next = m->m_nextpkt;
276 		m->m_nextpkt = NULL;
277 		if (error) {
278 			m_freem(m);
279 			continue;
280 		}
281 
282 		error = ipsecif4_output(var, family, m);
283 	}
284 	if (error == 0)
285 		IP_STATINC(IP_STAT_FRAGMENTED);
286 
287 	return error;
288 }
289 
290 int
291 ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
292 {
293 	struct m_tag *mtag;
294 	struct sockaddr_in *src, *dst;
295 	u_int16_t src_port = 0;
296 	u_int16_t dst_port = 0;
297 
298 	KASSERT(var != NULL);
299 
300 	src = satosin(var->iv_psrc);
301 	dst = satosin(var->iv_pdst);
302 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
303 	if (mtag) {
304 		u_int16_t *ports;
305 
306 		ports = (u_int16_t *)(mtag + 1);
307 		src_port = ports[0];
308 		dst_port = ports[1];
309 	}
310 
311 	/* address match */
312 	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
313 	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
314 		return 0;
315 
316 	/* UDP encap? */
317 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
318 		goto match;
319 
320 	/* port match */
321 	if (src_port != var->iv_dport ||
322 	    dst_port != var->iv_sport) {
323 #ifdef DEBUG
324 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
325 		    __func__, ntohs(src_port), ntohs(dst_port),
326 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
327 #endif
328 		return 0;
329 	}
330 
331 match:
332 	/*
333 	 * hide NAT-T information from encapsulated traffics.
334 	 * they don't know about IPsec.
335 	 */
336 	if (mtag)
337 		m_tag_delete(m, mtag);
338 	return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
339 }
340 
341 static int
342 ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
343 {
344 	struct secpolicy *sp = NULL;
345 	u_int8_t tos;
346 	int proto;
347 	int error;
348 	int mtu;
349 	u_long sa_mtu = 0;
350 
351 	KASSERT(if_ipsec_heldref_variant(var));
352 	KASSERT(if_ipsec_variant_is_configured(var));
353 	KASSERT(var->iv_psrc->sa_family == AF_INET);
354 	KASSERT(var->iv_pdst->sa_family == AF_INET);
355 
356 	sp = IV_SP_OUT(var);
357 	KASSERT(sp != NULL);
358 	/*
359 	 * The SPs in ipsec_variant are prevented from freed by
360 	 * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
361 	 */
362 
363 	KASSERT(sp->policy != IPSEC_POLICY_NONE);
364 	KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
365 	KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
366 	if (sp->policy != IPSEC_POLICY_IPSEC) {
367 		struct ifnet *ifp = &var->iv_softc->ipsec_if;
368 		m_freem(m);
369 		IF_DROP(&ifp->if_snd);
370 		return 0;
371 	}
372 
373 	/* get flowinfo */
374 	m = ipsecif4_flowinfo(m, family, &proto, &tos);
375 	if (m == NULL) {
376 		error = ENETUNREACH;
377 		goto done;
378 	}
379 
380 	/* prepend new IP header */
381 	m = ipsecif4_prepend_hdr(var, m, proto, tos);
382 	if (m == NULL) {
383 		error = ENETUNREACH;
384 		goto done;
385 	}
386 
387 	/*
388 	 * Normal netipsec's NAT-T fragmentation is done in ip_output().
389 	 * See "natt_frag" processing.
390 	 * However, ipsec(4) interface's one is not done in the same way,
391 	 * so we must do NAT-T fragmentation by own code.
392 	 */
393 	/* NAT-T ESP fragmentation */
394 	mtu = ipsecif4_needfrag(m, sp->req);
395 	if (mtu > 0)
396 		return ipsecif4_fragout(var, family, m, mtu);
397 
398 	/* IPsec output */
399 	IP_STATINC(IP_STAT_LOCALOUT);
400 	error = ipsec4_process_packet(m, sp->req, &sa_mtu);
401 	if (error == ENOENT)
402 		error = 0;
403 	/*
404 	 * frangmentation is already done in ipsecif4_fragout(),
405 	 * so ipsec4_process_packet() must not do fragmentation here.
406 	 */
407 	KASSERT(sa_mtu == 0);
408 
409 done:
410 	return error;
411 }
412 
413 #ifdef INET6
414 int
415 ipsecif6_encap_func(struct mbuf *m, struct ip6_hdr *ip6, struct ipsec_variant *var)
416 {
417 	struct m_tag *mtag;
418 	struct sockaddr_in6 *src, *dst;
419 	u_int16_t src_port = 0;
420 	u_int16_t dst_port = 0;
421 
422 	KASSERT(var != NULL);
423 
424 	src = satosin6(var->iv_psrc);
425 	dst = satosin6(var->iv_pdst);
426 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
427 	if (mtag) {
428 		u_int16_t *ports;
429 
430 		ports = (u_int16_t *)(mtag + 1);
431 		src_port = ports[0];
432 		dst_port = ports[1];
433 	}
434 
435 	/* address match */
436 	if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
437 	    !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
438 		return 0;
439 
440 	/* UDP encap? */
441 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
442 		goto match;
443 
444 	/* port match */
445 	if (src_port != var->iv_dport ||
446 	    dst_port != var->iv_sport) {
447 #ifdef DEBUG
448 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
449 		    __func__, ntohs(src_port), ntohs(dst_port),
450 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
451 #endif
452 		return 0;
453 	}
454 
455 match:
456 	/*
457 	 * hide NAT-T information from encapsulated traffics.
458 	 * they don't know about IPsec.
459 	 */
460 	if (mtag)
461 		m_tag_delete(m, mtag);
462 	return sizeof(src->sin6_addr) + sizeof(dst->sin6_addr);
463 }
464 
465 static int
466 ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
467 {
468 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
469 	struct ipsec_softc *sc = ifp->if_softc;
470 	struct ipsec_ro *iro;
471 	struct rtentry *rt;
472 	struct sockaddr_in6 *sin6_src;
473 	struct sockaddr_in6 *sin6_dst;
474 	struct ip6_hdr *ip6;
475 	int proto, error;
476 	u_int8_t itos, otos;
477 	union {
478 		struct sockaddr		dst;
479 		struct sockaddr_in6	dst6;
480 	} u;
481 
482 	KASSERT(if_ipsec_heldref_variant(var));
483 	KASSERT(if_ipsec_variant_is_configured(var));
484 
485 	sin6_src = satosin6(var->iv_psrc);
486 	sin6_dst = satosin6(var->iv_pdst);
487 
488 	KASSERT(sin6_src->sin6_family == AF_INET6);
489 	KASSERT(sin6_dst->sin6_family == AF_INET6);
490 
491 	switch (family) {
492 #ifdef INET
493 	case AF_INET:
494 	    {
495 		struct ip *ip;
496 
497 		proto = IPPROTO_IPV4;
498 		if (m->m_len < sizeof(*ip)) {
499 			m = m_pullup(m, sizeof(*ip));
500 			if (m == NULL)
501 				return ENOBUFS;
502 		}
503 		ip = mtod(m, struct ip *);
504 		itos = ip->ip_tos;
505 		/* TODO: support ALTQ for innner packet */
506 		break;
507 	    }
508 #endif /* INET */
509 	case AF_INET6:
510 	    {
511 		struct ip6_hdr *xip6;
512 		proto = IPPROTO_IPV6;
513 		if (m->m_len < sizeof(*xip6)) {
514 			m = m_pullup(m, sizeof(*xip6));
515 			if (m == NULL)
516 				return ENOBUFS;
517 		}
518 		xip6 = mtod(m, struct ip6_hdr *);
519 		itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
520 		/* TODO: support ALTQ for innner packet */
521 		break;
522 	    }
523 	default:
524 		m_freem(m);
525 		return EAFNOSUPPORT;
526 	}
527 
528 	/* prepend new IP header */
529 	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
530 	if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
531 		m = m_pullup(m, sizeof(struct ip6_hdr));
532 	if (m == NULL)
533 		return ENOBUFS;
534 
535 	ip6 = mtod(m, struct ip6_hdr *);
536 	ip6->ip6_flow	= 0;
537 	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
538 	ip6->ip6_vfc	|= IPV6_VERSION;
539 #if 0	/* ip6->ip6_plen will be filled by ip6_output */
540 	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len - sizeof(*ip6));
541 #endif
542 	ip6->ip6_nxt	= proto;
543 	ip6->ip6_hlim	= ip6_ipsec_hlim;
544 	ip6->ip6_src	= sin6_src->sin6_addr;
545 	/* bidirectional configured tunnel mode */
546 	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
547 		ip6->ip6_dst = sin6_dst->sin6_addr;
548 	} else  {
549 		m_freem(m);
550 		return ENETUNREACH;
551 	}
552 #ifndef IPSEC_TX_TOS_CLEAR
553 	if (ifp->if_flags & IFF_ECN)
554 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
555 	else
556 		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
557 
558 	if (!ip6_ipsec_copy_tos)
559 		otos = 0;
560 #else
561 	if (ip6_ipsec_copy_tos)
562 		otos = itos;
563 	else
564 		otos = 0;
565 #endif
566 	ip6->ip6_flow &= ~ntohl(0xff00000);
567 	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
568 
569 	sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
570 
571 	iro = percpu_getref(sc->ipsec_ro_percpu);
572 	mutex_enter(iro->ir_lock);
573 	if ((rt = rtcache_lookup(&iro->ir_ro, &u.dst)) == NULL) {
574 		mutex_exit(iro->ir_lock);
575 		percpu_putref(sc->ipsec_ro_percpu);
576 		m_freem(m);
577 		return ENETUNREACH;
578 	}
579 
580 	if (rt->rt_ifp == ifp) {
581 		rtcache_unref(rt, &iro->ir_ro);
582 		rtcache_free(&iro->ir_ro);
583 		mutex_exit(iro->ir_lock);
584 		percpu_putref(sc->ipsec_ro_percpu);
585 		m_freem(m);
586 		return ENETUNREACH;
587 	}
588 	rtcache_unref(rt, &iro->ir_ro);
589 
590 	/*
591 	 * force fragmentation to minimum MTU, to avoid path MTU discovery.
592 	 * it is too painful to ask for resend of inner packet, to achieve
593 	 * path MTU discovery for encapsulated packets.
594 	 */
595 	error = ip6_output(m, 0, &iro->ir_ro,
596 	    ip6_ipsec_pmtu ? 0 : IPV6_MINMTU, 0, NULL, NULL);
597 	if (error)
598 		rtcache_free(&iro->ir_ro);
599 
600 	mutex_exit(iro->ir_lock);
601 	percpu_putref(sc->ipsec_ro_percpu);
602 
603 	return error;
604 }
605 #endif /* INET6 */
606 
607 static void
608 ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
609 {
610 	struct ifnet *ipsecp;
611 	struct ipsec_softc *sc = eparg;
612 	struct ipsec_variant *var;
613 	const struct ip *ip;
614 	int af;
615 #ifndef IPSEC_TX_TOS_CLEAR
616 	u_int8_t otos;
617 #endif
618 	struct psref psref_rcvif;
619 	struct psref psref_var;
620 	struct ifnet *rcvif;
621 
622 	KASSERT(sc != NULL);
623 
624 	ipsecp = &sc->ipsec_if;
625 	if ((ipsecp->if_flags & IFF_UP) == 0) {
626 		m_freem(m);
627 		ip_statinc(IP_STAT_NOIPSEC);
628 		return;
629 	}
630 
631 	var = if_ipsec_getref_variant(sc, &psref_var);
632 	if (if_ipsec_variant_is_unconfigured(var)) {
633 		if_ipsec_putref_variant(var, &psref_var);
634 		m_freem(m);
635 		ip_statinc(IP_STAT_NOIPSEC);
636 		return;
637 	}
638 
639 	ip = mtod(m, const struct ip *);
640 
641 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
642 	if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
643 		m_put_rcvif_psref(rcvif, &psref_rcvif);
644 		if_ipsec_putref_variant(var, &psref_var);
645 		m_freem(m);
646 		ip_statinc(IP_STAT_NOIPSEC);
647 		return;
648 	}
649 	m_put_rcvif_psref(rcvif, &psref_rcvif);
650 	if_ipsec_putref_variant(var, &psref_var);
651 #ifndef IPSEC_TX_TOS_CLEAR
652 	otos = ip->ip_tos;
653 #endif
654 	m_adj(m, off);
655 
656 	switch (proto) {
657 	case IPPROTO_IPV4:
658 	    {
659 		struct ip *xip;
660 		af = AF_INET;
661 		if (M_UNWRITABLE(m, sizeof(*xip))) {
662 			m = m_pullup(m, sizeof(*xip));
663 			if (m == NULL)
664 				return;
665 		}
666 		xip = mtod(m, struct ip *);
667 #ifndef IPSEC_TX_TOS_CLEAR
668 		if (ipsecp->if_flags & IFF_ECN)
669 			ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
670 		else
671 			ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
672 #endif
673 		break;
674 	    }
675 #ifdef INET6
676 	case IPPROTO_IPV6:
677 	    {
678 		struct ip6_hdr *ip6;
679 		u_int8_t itos;
680 		af = AF_INET6;
681 		if (M_UNWRITABLE(m, sizeof(*ip6))) {
682 			m = m_pullup(m, sizeof(*ip6));
683 			if (m == NULL)
684 				return;
685 		}
686 		ip6 = mtod(m, struct ip6_hdr *);
687 		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
688 #ifndef IPSEC_TX_TOS_CLEAR
689 		if (ipsecp->if_flags & IFF_ECN)
690 			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
691 		else
692 			ip_ecn_egress(ECN_NOCARE, &otos, &itos);
693 #endif
694 		ip6->ip6_flow &= ~htonl(0xff << 20);
695 		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
696 		break;
697 	    }
698 #endif /* INET6 */
699 	default:
700 		ip_statinc(IP_STAT_NOIPSEC);
701 		m_freem(m);
702 		return;
703 	}
704 	if_ipsec_input(m, af, ipsecp);
705 
706 	return;
707 }
708 
709 /*
710  * validate and filter the pakcet
711  */
712 static int
713 ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
714     struct ifnet *ifp)
715 {
716 	struct sockaddr_in *src, *dst;
717 
718 	src = satosin(var->iv_psrc);
719 	dst = satosin(var->iv_pdst);
720 
721 	return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
722 }
723 
724 #ifdef INET6
725 static int
726 ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
727 {
728 	struct mbuf *m = *mp;
729 	struct ifnet *ipsecp;
730 	struct ipsec_softc *sc = eparg;
731 	struct ipsec_variant *var;
732 	struct ip6_hdr *ip6;
733 	int af = 0;
734 #ifndef IPSEC_TX_TOS_CLEAR
735 	u_int32_t otos;
736 #endif
737 	struct psref psref_rcvif;
738 	struct psref psref_var;
739 	struct ifnet *rcvif;
740 
741 	KASSERT(eparg != NULL);
742 
743 	ipsecp = &sc->ipsec_if;
744 	if ((ipsecp->if_flags & IFF_UP) == 0) {
745 		m_freem(m);
746 		IP6_STATINC(IP6_STAT_NOIPSEC);
747 		return IPPROTO_DONE;
748 	}
749 
750 	var = if_ipsec_getref_variant(sc, &psref_var);
751 	if (if_ipsec_variant_is_unconfigured(var)) {
752 		if_ipsec_putref_variant(var, &psref_var);
753 		m_freem(m);
754 		IP6_STATINC(IP6_STAT_NOIPSEC);
755 		return IPPROTO_DONE;
756 	}
757 
758 	ip6 = mtod(m, struct ip6_hdr *);
759 
760 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
761 	if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
762 		m_put_rcvif_psref(rcvif, &psref_rcvif);
763 		if_ipsec_putref_variant(var, &psref_var);
764 		m_freem(m);
765 		IP6_STATINC(IP6_STAT_NOIPSEC);
766 		return IPPROTO_DONE;
767 	}
768 	m_put_rcvif_psref(rcvif, &psref_rcvif);
769 	if_ipsec_putref_variant(var, &psref_var);
770 
771 #ifndef IPSEC_TX_TOS_CLEAR
772 	otos = ip6->ip6_flow;
773 #endif
774 	m_adj(m, *offp);
775 
776 	switch (proto) {
777 #ifdef INET
778 	case IPPROTO_IPV4:
779 	    {
780 		af = AF_INET;
781 #ifndef IPSEC_TX_TOS_CLEAR
782 		struct ip *ip;
783 		u_int8_t otos8;
784 		otos8 = (ntohl(otos) >> 20) & 0xff;
785 
786 		if (M_UNWRITABLE(m, sizeof(*ip))) {
787 			m = m_pullup(m, sizeof(*ip));
788 			if (m == NULL)
789 				return IPPROTO_DONE;
790 		}
791 		ip = mtod(m, struct ip *);
792 		if (ipsecp->if_flags & IFF_ECN)
793 			ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
794 		else
795 			ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
796 #endif
797 		break;
798 	    }
799 #endif /* INET */
800 	case IPPROTO_IPV6:
801 	    {
802 		af = AF_INET6;
803 #ifndef IPSEC_TX_TOS_CLEAR
804 		struct ip6_hdr *xip6;
805 
806 		if (M_UNWRITABLE(m, sizeof(*xip6))) {
807 			m = m_pullup(m, sizeof(*xip6));
808 			if (m == NULL)
809 				return IPPROTO_DONE;
810 		}
811 		xip6 = mtod(m, struct ip6_hdr *);
812 		if (ipsecp->if_flags & IFF_ECN)
813 			ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
814 		else
815 			ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
816 		break;
817 #endif
818 	    }
819 	default:
820 		IP6_STATINC(IP6_STAT_NOIPSEC);
821 		m_freem(m);
822 		return IPPROTO_DONE;
823 	}
824 
825 	if_ipsec_input(m, af, ipsecp);
826 	return IPPROTO_DONE;
827 }
828 
829 /*
830  * validate and filter the packet.
831  */
832 static int
833 ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
834     struct ifnet *ifp)
835 {
836 	struct sockaddr_in6 *src, *dst;
837 
838 	src = satosin6(var->iv_psrc);
839 	dst = satosin6(var->iv_pdst);
840 
841 	return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
842 }
843 #endif /* INET6 */
844 
845 int
846 ipsecif4_attach(struct ipsec_variant *var)
847 {
848 	struct ipsec_softc *sc = var->iv_softc;
849 
850 	KASSERT(if_ipsec_variant_is_configured(var));
851 
852 	if (var->iv_encap_cookie4 != NULL)
853 		return EALREADY;
854 	var->iv_encap_cookie4 = encap_attach_func(AF_INET, -1, if_ipsec_encap_func,
855 	    &ipsecif4_encapsw, sc);
856 	if (var->iv_encap_cookie4 == NULL)
857 		return EEXIST;
858 
859 	var->iv_output = ipsecif4_output;
860 	return 0;
861 }
862 
863 int
864 ipsecif4_detach(struct ipsec_variant *var)
865 {
866 	int error;
867 
868 	if (var->iv_encap_cookie4 == NULL)
869 		return 0;
870 
871 	var->iv_output = NULL;
872 	error = encap_detach(var->iv_encap_cookie4);
873 	if (error == 0)
874 		var->iv_encap_cookie4 = NULL;
875 
876 	return error;
877 }
878 
879 #ifdef INET6
880 int
881 ipsecif6_attach(struct ipsec_variant *var)
882 {
883 	struct sockaddr_in6 mask6;
884 	struct ipsec_softc *sc = var->iv_softc;
885 
886 	KASSERT(if_ipsec_variant_is_configured(var));
887 	KASSERT(var->iv_encap_cookie6 == NULL);
888 
889 	memset(&mask6, 0, sizeof(mask6));
890 	mask6.sin6_len = sizeof(struct sockaddr_in6);
891 	mask6.sin6_addr.s6_addr32[0] = mask6.sin6_addr.s6_addr32[1] =
892 	mask6.sin6_addr.s6_addr32[2] = mask6.sin6_addr.s6_addr32[3] = ~0;
893 
894 	var->iv_encap_cookie6 = encap_attach_func(AF_INET6, -1, if_ipsec_encap_func,
895 	    &ipsecif6_encapsw, sc);
896 	if (var->iv_encap_cookie6 == NULL)
897 		return EEXIST;
898 
899 	var->iv_output = ipsecif6_output;
900 	return 0;
901 }
902 
903 static void
904 ipsecif6_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
905 {
906 	struct ipsec_ro *iro = p;
907 
908 	mutex_enter(iro->ir_lock);
909 	rtcache_free(&iro->ir_ro);
910 	mutex_exit(iro->ir_lock);
911 }
912 
913 int
914 ipsecif6_detach(struct ipsec_variant *var)
915 {
916 	struct ipsec_softc *sc = var->iv_softc;
917 	int error;
918 
919 	KASSERT(var->iv_encap_cookie6 != NULL);
920 
921 	percpu_foreach(sc->ipsec_ro_percpu, ipsecif6_rtcache_free_pc, NULL);
922 
923 	var->iv_output = NULL;
924 	error = encap_detach(var->iv_encap_cookie6);
925 	if (error == 0)
926 		var->iv_encap_cookie6 = NULL;
927 	return error;
928 }
929 
930 void *
931 ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
932 {
933 	struct ipsec_softc *sc = eparg;
934 	struct ip6ctlparam *ip6cp = NULL;
935 	struct ip6_hdr *ip6;
936 	const struct sockaddr_in6 *dst6;
937 	struct ipsec_ro *iro;
938 
939 	if (sa->sa_family != AF_INET6 ||
940 	    sa->sa_len != sizeof(struct sockaddr_in6))
941 		return NULL;
942 
943 	if ((unsigned)cmd >= PRC_NCMDS)
944 		return NULL;
945 	if (cmd == PRC_HOSTDEAD)
946 		d = NULL;
947 	else if (inet6ctlerrmap[cmd] == 0)
948 		return NULL;
949 
950 	/* if the parameter is from icmp6, decode it. */
951 	if (d != NULL) {
952 		ip6cp = (struct ip6ctlparam *)d;
953 		ip6 = ip6cp->ip6c_ip6;
954 	} else {
955 		ip6 = NULL;
956 	}
957 
958 	if (!ip6)
959 		return NULL;
960 
961 	iro = percpu_getref(sc->ipsec_ro_percpu);
962 	mutex_enter(iro->ir_lock);
963 	dst6 = satocsin6(rtcache_getdst(&iro->ir_ro));
964 	/* XXX scope */
965 	if (dst6 == NULL)
966 		;
967 	else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
968 		/* flush route cache */
969 		rtcache_free(&iro->ir_ro);
970 
971 	mutex_exit(iro->ir_lock);
972 	percpu_putref(sc->ipsec_ro_percpu);
973 
974 	return NULL;
975 }
976 
977 ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
978 #define	ipsecif6_ctlinput	ipsecif6_ctlinput_wrapper
979 
980 static const struct encapsw ipsecif6_encapsw = {
981 	.encapsw6 = {
982 		.pr_input = ipsecif6_input,
983 		.pr_ctlinput = ipsecif6_ctlinput,
984 	}
985 };
986 #endif /* INET6 */
987