xref: /netbsd-src/sys/netipsec/ipsecif.c (revision fc4f42693f9b1c31f39f9cf50af1bf2010325808)
1 /*	$NetBSD: ipsecif.c,v 1.7 2018/04/06 10:38:53 knakahara Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.7 2018/04/06 10:38:53 knakahara Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #include "opt_ipsec.h"
35 #endif
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/socket.h>
40 #include <sys/sockio.h>
41 #include <sys/mbuf.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/syslog.h>
45 #include <sys/kernel.h>
46 
47 #include <net/if.h>
48 #include <net/route.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip_encap.h>
56 #include <netinet/ip_ecn.h>
57 #include <netinet/ip_private.h>
58 #include <netinet/udp.h>
59 
60 #ifdef INET6
61 #include <netinet/ip6.h>
62 #include <netinet6/ip6_var.h>
63 #include <netinet6/ip6_private.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
66 #include <netinet/ip_ecn.h>
67 #endif
68 
69 #include <netipsec/key.h>
70 #include <netipsec/ipsecif.h>
71 
72 #include <net/if_ipsec.h>
73 
74 static void ipsecif4_input(struct mbuf *, int, int, void *);
75 static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
76 static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
77 	struct ifnet *);
78 
79 #ifdef INET6
80 static int ipsecif6_input(struct mbuf **, int *, int, void *);
81 static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
82 static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
83 	struct ifnet *);
84 #endif
85 
86 static int ip_ipsec_ttl = IPSEC_TTL;
87 static int ip_ipsec_copy_tos = 0;
88 #ifdef INET6
89 static int ip6_ipsec_hlim = IPSEC_HLIM;
90 static int ip6_ipsec_pmtu = 0; /* XXX: per interface configuration?? */
91 static int ip6_ipsec_copy_tos = 0;
92 #endif
93 
94 struct encapsw ipsecif4_encapsw = {
95 	.encapsw4 = {
96 		.pr_input = ipsecif4_input,
97 		.pr_ctlinput = NULL,
98 	}
99 };
100 
101 #ifdef INET6
102 static const struct encapsw ipsecif6_encapsw;
103 #endif
104 
105 static struct mbuf *
106 ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
107     uint8_t proto, uint8_t tos)
108 {
109 	struct ip *ip;
110 	struct sockaddr_in *src, *dst;
111 
112 	src = satosin(var->iv_psrc);
113 	dst = satosin(var->iv_pdst);
114 
115 	if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
116 	    src->sin_addr.s_addr == INADDR_BROADCAST ||
117 	    dst->sin_addr.s_addr == INADDR_BROADCAST) {
118 		m_freem(m);
119 		return NULL;
120 	}
121 	m->m_flags &= ~M_BCAST;
122 
123 	if (IN_MULTICAST(src->sin_addr.s_addr) ||
124 	    IN_MULTICAST(dst->sin_addr.s_addr)) {
125 		m_freem(m);
126 		return NULL;
127 	}
128 
129 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
130 	if (m && M_UNWRITABLE(m, sizeof(struct ip)))
131 		m = m_pullup(m, sizeof(struct ip));
132 	if (m == NULL)
133 		return NULL;
134 
135 	ip = mtod(m, struct ip *);
136 	ip->ip_v = IPVERSION;
137 	ip->ip_off = htons(0);
138 	if (m->m_pkthdr.len < IP_MINFRAGSIZE)
139 		ip->ip_id = 0;
140 	else
141 		ip->ip_id = ip_newid(NULL);
142 	ip->ip_hl = sizeof(*ip) >> 2;
143 	if (ip_ipsec_copy_tos)
144 		ip->ip_tos = tos;
145 	else
146 		ip->ip_tos = 0;
147 	ip->ip_sum = 0;
148 	ip->ip_src = src->sin_addr;
149 	ip->ip_dst = dst->sin_addr;
150 	ip->ip_p = proto;
151 	ip->ip_ttl = ip_ipsec_ttl;
152 	ip->ip_len = htons(m->m_pkthdr.len);
153 #ifndef IPSEC_TX_TOS_CLEAR
154 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
155 	if (ifp->if_flags & IFF_ECN)
156 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
157 	else
158 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
159 #endif
160 
161 	return m;
162 }
163 
164 static int
165 ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
166 {
167 	struct ip ip0;
168 	struct ip *ip;
169 	int mtu;
170 	struct secasvar *sav;
171 
172 	sav = key_lookup_sa_bysaidx(&isr->saidx);
173 	if (sav == NULL)
174 		return 0;
175 
176 	if (!(sav->natt_type & UDP_ENCAP_ESPINUDP) &&
177 	    !(sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE)) {
178 		mtu = 0;
179 		goto out;
180 	}
181 
182 	if (m->m_len < sizeof(struct ip)) {
183 		m_copydata(m, 0, sizeof(ip0), &ip0);
184 		ip = &ip0;
185 	} else {
186 		ip = mtod(m, struct ip *);
187 	}
188 	mtu = sav->esp_frag;
189 	if (ntohs(ip->ip_len) <= mtu)
190 		mtu = 0;
191 
192 out:
193 	KEY_SA_UNREF(&sav);
194 	return mtu;
195 }
196 
197 static struct mbuf *
198 ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
199 {
200 	const struct ip *ip;
201 	int proto;
202 	int tos;
203 
204 	KASSERT(proto0 != NULL);
205 	KASSERT(tos0 != NULL);
206 
207 	switch (family) {
208 	case AF_INET:
209 		proto = IPPROTO_IPV4;
210 		if (m->m_len < sizeof(*ip)) {
211 			m = m_pullup(m, sizeof(*ip));
212 			if (m == NULL) {
213 				*tos0 = 0;
214 				*proto0 = 0;
215 				return NULL;
216 			}
217 		}
218 		ip = mtod(m, const struct ip *);
219 		tos = ip->ip_tos;
220 		/* TODO: support ALTQ for innner packet */
221 		break;
222 #ifdef INET6
223 	case AF_INET6: {
224 		const struct ip6_hdr *ip6;
225 		proto = IPPROTO_IPV6;
226 		if (m->m_len < sizeof(*ip6)) {
227 			m = m_pullup(m, sizeof(*ip6));
228 			if (m == NULL) {
229 				*tos0 = 0;
230 				*proto0 = 0;
231 				return NULL;
232 			}
233 		}
234 		ip6 = mtod(m, const struct ip6_hdr *);
235 		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
236 		/* TODO: support ALTQ for innner packet */
237 		break;
238 	}
239 #endif /* INET6 */
240 	default:
241 		*tos0 = 0;
242 		*proto0 = 0;
243 		return NULL;
244 	}
245 
246 	*proto0 = proto;
247 	*tos0 = tos;
248 	return m;
249 }
250 
251 static int
252 ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
253 {
254 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
255 	struct mbuf *next;
256 	struct m_tag *mtag;
257 	int error;
258 
259 	KASSERT(if_ipsec_heldref_variant(var));
260 
261 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
262 	if (mtag)
263 		m_tag_delete(m, mtag);
264 
265 	/* consider new IP header prepended in ipsecif4_output() */
266 	if (mtu <= sizeof(struct ip)) {
267 		m_freem(m);
268 		return ENETUNREACH;
269 	}
270 	m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
271 	error = ip_fragment(m, ifp, mtu - sizeof(struct ip));
272 	if (error)
273 		return error;
274 
275 	for (error = 0; m; m = next) {
276 		next = m->m_nextpkt;
277 		m->m_nextpkt = NULL;
278 		if (error) {
279 			m_freem(m);
280 			continue;
281 		}
282 
283 		error = ipsecif4_output(var, family, m);
284 	}
285 	if (error == 0)
286 		IP_STATINC(IP_STAT_FRAGMENTED);
287 
288 	return error;
289 }
290 
291 int
292 ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
293 {
294 	struct m_tag *mtag;
295 	struct sockaddr_in *src, *dst;
296 	u_int16_t src_port = 0;
297 	u_int16_t dst_port = 0;
298 
299 	KASSERT(var != NULL);
300 
301 	src = satosin(var->iv_psrc);
302 	dst = satosin(var->iv_pdst);
303 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
304 	if (mtag) {
305 		u_int16_t *ports;
306 
307 		ports = (u_int16_t *)(mtag + 1);
308 		src_port = ports[0];
309 		dst_port = ports[1];
310 	}
311 
312 	/* address match */
313 	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
314 	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
315 		return 0;
316 
317 	/* UDP encap? */
318 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
319 		goto match;
320 
321 	/* port match */
322 	if (src_port != var->iv_dport ||
323 	    dst_port != var->iv_sport) {
324 #ifdef DEBUG
325 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
326 		    __func__, ntohs(src_port), ntohs(dst_port),
327 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
328 #endif
329 		return 0;
330 	}
331 
332 match:
333 	/*
334 	 * hide NAT-T information from encapsulated traffics.
335 	 * they don't know about IPsec.
336 	 */
337 	if (mtag)
338 		m_tag_delete(m, mtag);
339 	return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
340 }
341 
342 static int
343 ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
344 {
345 	struct secpolicy *sp = NULL;
346 	u_int8_t tos;
347 	int proto;
348 	int error;
349 	int mtu;
350 	u_long sa_mtu = 0;
351 
352 	KASSERT(if_ipsec_heldref_variant(var));
353 	KASSERT(if_ipsec_variant_is_configured(var));
354 	KASSERT(var->iv_psrc->sa_family == AF_INET);
355 	KASSERT(var->iv_pdst->sa_family == AF_INET);
356 
357 	sp = IV_SP_OUT(var);
358 	KASSERT(sp != NULL);
359 	/*
360 	 * The SPs in ipsec_variant are prevented from freed by
361 	 * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
362 	 */
363 
364 	KASSERT(sp->policy != IPSEC_POLICY_NONE);
365 	KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
366 	KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
367 	if (sp->policy != IPSEC_POLICY_IPSEC) {
368 		struct ifnet *ifp = &var->iv_softc->ipsec_if;
369 		m_freem(m);
370 		IF_DROP(&ifp->if_snd);
371 		return 0;
372 	}
373 
374 	/* get flowinfo */
375 	m = ipsecif4_flowinfo(m, family, &proto, &tos);
376 	if (m == NULL) {
377 		error = ENETUNREACH;
378 		goto done;
379 	}
380 
381 	/* prepend new IP header */
382 	m = ipsecif4_prepend_hdr(var, m, proto, tos);
383 	if (m == NULL) {
384 		error = ENETUNREACH;
385 		goto done;
386 	}
387 
388 	/*
389 	 * Normal netipsec's NAT-T fragmentation is done in ip_output().
390 	 * See "natt_frag" processing.
391 	 * However, ipsec(4) interface's one is not done in the same way,
392 	 * so we must do NAT-T fragmentation by own code.
393 	 */
394 	/* NAT-T ESP fragmentation */
395 	mtu = ipsecif4_needfrag(m, sp->req);
396 	if (mtu > 0)
397 		return ipsecif4_fragout(var, family, m, mtu);
398 
399 	/* IPsec output */
400 	IP_STATINC(IP_STAT_LOCALOUT);
401 	error = ipsec4_process_packet(m, sp->req, &sa_mtu);
402 	if (error == ENOENT)
403 		error = 0;
404 	/*
405 	 * frangmentation is already done in ipsecif4_fragout(),
406 	 * so ipsec4_process_packet() must not do fragmentation here.
407 	 */
408 	KASSERT(sa_mtu == 0);
409 
410 done:
411 	return error;
412 }
413 
414 #ifdef INET6
415 int
416 ipsecif6_encap_func(struct mbuf *m, struct ip6_hdr *ip6, struct ipsec_variant *var)
417 {
418 	struct m_tag *mtag;
419 	struct sockaddr_in6 *src, *dst;
420 	u_int16_t src_port = 0;
421 	u_int16_t dst_port = 0;
422 
423 	KASSERT(var != NULL);
424 
425 	src = satosin6(var->iv_psrc);
426 	dst = satosin6(var->iv_pdst);
427 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
428 	if (mtag) {
429 		u_int16_t *ports;
430 
431 		ports = (u_int16_t *)(mtag + 1);
432 		src_port = ports[0];
433 		dst_port = ports[1];
434 	}
435 
436 	/* address match */
437 	if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
438 	    !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
439 		return 0;
440 
441 	/* UDP encap? */
442 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
443 		goto match;
444 
445 	/* port match */
446 	if (src_port != var->iv_dport ||
447 	    dst_port != var->iv_sport) {
448 #ifdef DEBUG
449 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
450 		    __func__, ntohs(src_port), ntohs(dst_port),
451 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
452 #endif
453 		return 0;
454 	}
455 
456 match:
457 	/*
458 	 * hide NAT-T information from encapsulated traffics.
459 	 * they don't know about IPsec.
460 	 */
461 	if (mtag)
462 		m_tag_delete(m, mtag);
463 	return sizeof(src->sin6_addr) + sizeof(dst->sin6_addr);
464 }
465 
466 static int
467 ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
468 {
469 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
470 	struct ipsec_softc *sc = ifp->if_softc;
471 	struct ipsec_ro *iro;
472 	struct rtentry *rt;
473 	struct sockaddr_in6 *sin6_src;
474 	struct sockaddr_in6 *sin6_dst;
475 	struct ip6_hdr *ip6;
476 	int proto, error;
477 	u_int8_t itos, otos;
478 	union {
479 		struct sockaddr		dst;
480 		struct sockaddr_in6	dst6;
481 	} u;
482 
483 	KASSERT(if_ipsec_heldref_variant(var));
484 	KASSERT(if_ipsec_variant_is_configured(var));
485 
486 	sin6_src = satosin6(var->iv_psrc);
487 	sin6_dst = satosin6(var->iv_pdst);
488 
489 	KASSERT(sin6_src->sin6_family == AF_INET6);
490 	KASSERT(sin6_dst->sin6_family == AF_INET6);
491 
492 	switch (family) {
493 #ifdef INET
494 	case AF_INET:
495 	    {
496 		struct ip *ip;
497 
498 		proto = IPPROTO_IPV4;
499 		if (m->m_len < sizeof(*ip)) {
500 			m = m_pullup(m, sizeof(*ip));
501 			if (m == NULL)
502 				return ENOBUFS;
503 		}
504 		ip = mtod(m, struct ip *);
505 		itos = ip->ip_tos;
506 		/* TODO: support ALTQ for innner packet */
507 		break;
508 	    }
509 #endif /* INET */
510 	case AF_INET6:
511 	    {
512 		struct ip6_hdr *xip6;
513 		proto = IPPROTO_IPV6;
514 		if (m->m_len < sizeof(*xip6)) {
515 			m = m_pullup(m, sizeof(*xip6));
516 			if (m == NULL)
517 				return ENOBUFS;
518 		}
519 		xip6 = mtod(m, struct ip6_hdr *);
520 		itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
521 		/* TODO: support ALTQ for innner packet */
522 		break;
523 	    }
524 	default:
525 		m_freem(m);
526 		return EAFNOSUPPORT;
527 	}
528 
529 	/* prepend new IP header */
530 	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
531 	if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
532 		m = m_pullup(m, sizeof(struct ip6_hdr));
533 	if (m == NULL)
534 		return ENOBUFS;
535 
536 	ip6 = mtod(m, struct ip6_hdr *);
537 	ip6->ip6_flow	= 0;
538 	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
539 	ip6->ip6_vfc	|= IPV6_VERSION;
540 #if 0	/* ip6->ip6_plen will be filled by ip6_output */
541 	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len - sizeof(*ip6));
542 #endif
543 	ip6->ip6_nxt	= proto;
544 	ip6->ip6_hlim	= ip6_ipsec_hlim;
545 	ip6->ip6_src	= sin6_src->sin6_addr;
546 	/* bidirectional configured tunnel mode */
547 	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
548 		ip6->ip6_dst = sin6_dst->sin6_addr;
549 	} else  {
550 		m_freem(m);
551 		return ENETUNREACH;
552 	}
553 #ifndef IPSEC_TX_TOS_CLEAR
554 	if (ifp->if_flags & IFF_ECN)
555 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
556 	else
557 		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
558 
559 	if (!ip6_ipsec_copy_tos)
560 		otos = 0;
561 #else
562 	if (ip6_ipsec_copy_tos)
563 		otos = itos;
564 	else
565 		otos = 0;
566 #endif
567 	ip6->ip6_flow &= ~ntohl(0xff00000);
568 	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
569 
570 	sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
571 
572 	iro = percpu_getref(sc->ipsec_ro_percpu);
573 	mutex_enter(&iro->ir_lock);
574 	if ((rt = rtcache_lookup(&iro->ir_ro, &u.dst)) == NULL) {
575 		mutex_exit(&iro->ir_lock);
576 		percpu_putref(sc->ipsec_ro_percpu);
577 		m_freem(m);
578 		return ENETUNREACH;
579 	}
580 
581 	if (rt->rt_ifp == ifp) {
582 		rtcache_unref(rt, &iro->ir_ro);
583 		rtcache_free(&iro->ir_ro);
584 		mutex_exit(&iro->ir_lock);
585 		percpu_putref(sc->ipsec_ro_percpu);
586 		m_freem(m);
587 		return ENETUNREACH;
588 	}
589 	rtcache_unref(rt, &iro->ir_ro);
590 
591 	/*
592 	 * force fragmentation to minimum MTU, to avoid path MTU discovery.
593 	 * it is too painful to ask for resend of inner packet, to achieve
594 	 * path MTU discovery for encapsulated packets.
595 	 */
596 	error = ip6_output(m, 0, &iro->ir_ro,
597 	    ip6_ipsec_pmtu ? 0 : IPV6_MINMTU, 0, NULL, NULL);
598 	if (error)
599 		rtcache_free(&iro->ir_ro);
600 
601 	mutex_exit(&iro->ir_lock);
602 	percpu_putref(sc->ipsec_ro_percpu);
603 
604 	return error;
605 }
606 #endif /* INET6 */
607 
608 static void
609 ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
610 {
611 	struct ifnet *ipsecp;
612 	struct ipsec_softc *sc = eparg;
613 	struct ipsec_variant *var;
614 	const struct ip *ip;
615 	int af;
616 #ifndef IPSEC_TX_TOS_CLEAR
617 	u_int8_t otos;
618 #endif
619 	struct psref psref_rcvif;
620 	struct psref psref_var;
621 	struct ifnet *rcvif;
622 
623 	KASSERT(sc != NULL);
624 
625 	ipsecp = &sc->ipsec_if;
626 	if ((ipsecp->if_flags & IFF_UP) == 0) {
627 		m_freem(m);
628 		ip_statinc(IP_STAT_NOIPSEC);
629 		return;
630 	}
631 
632 	var = if_ipsec_getref_variant(sc, &psref_var);
633 	if (if_ipsec_variant_is_unconfigured(var)) {
634 		if_ipsec_putref_variant(var, &psref_var);
635 		m_freem(m);
636 		ip_statinc(IP_STAT_NOIPSEC);
637 		return;
638 	}
639 
640 	ip = mtod(m, const struct ip *);
641 
642 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
643 	if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
644 		m_put_rcvif_psref(rcvif, &psref_rcvif);
645 		if_ipsec_putref_variant(var, &psref_var);
646 		m_freem(m);
647 		ip_statinc(IP_STAT_NOIPSEC);
648 		return;
649 	}
650 	m_put_rcvif_psref(rcvif, &psref_rcvif);
651 	if_ipsec_putref_variant(var, &psref_var);
652 #ifndef IPSEC_TX_TOS_CLEAR
653 	otos = ip->ip_tos;
654 #endif
655 	m_adj(m, off);
656 
657 	switch (proto) {
658 	case IPPROTO_IPV4:
659 	    {
660 		struct ip *xip;
661 		af = AF_INET;
662 		if (M_UNWRITABLE(m, sizeof(*xip))) {
663 			m = m_pullup(m, sizeof(*xip));
664 			if (m == NULL)
665 				return;
666 		}
667 		xip = mtod(m, struct ip *);
668 #ifndef IPSEC_TX_TOS_CLEAR
669 		if (ipsecp->if_flags & IFF_ECN)
670 			ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
671 		else
672 			ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
673 #endif
674 		break;
675 	    }
676 #ifdef INET6
677 	case IPPROTO_IPV6:
678 	    {
679 		struct ip6_hdr *ip6;
680 		u_int8_t itos;
681 		af = AF_INET6;
682 		if (M_UNWRITABLE(m, sizeof(*ip6))) {
683 			m = m_pullup(m, sizeof(*ip6));
684 			if (m == NULL)
685 				return;
686 		}
687 		ip6 = mtod(m, struct ip6_hdr *);
688 		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
689 #ifndef IPSEC_TX_TOS_CLEAR
690 		if (ipsecp->if_flags & IFF_ECN)
691 			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
692 		else
693 			ip_ecn_egress(ECN_NOCARE, &otos, &itos);
694 #endif
695 		ip6->ip6_flow &= ~htonl(0xff << 20);
696 		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
697 		break;
698 	    }
699 #endif /* INET6 */
700 	default:
701 		ip_statinc(IP_STAT_NOIPSEC);
702 		m_freem(m);
703 		return;
704 	}
705 	if_ipsec_input(m, af, ipsecp);
706 
707 	return;
708 }
709 
710 /*
711  * validate and filter the pakcet
712  */
713 static int
714 ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
715     struct ifnet *ifp)
716 {
717 	struct sockaddr_in *src, *dst;
718 
719 	src = satosin(var->iv_psrc);
720 	dst = satosin(var->iv_pdst);
721 
722 	return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
723 }
724 
725 #ifdef INET6
726 static int
727 ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
728 {
729 	struct mbuf *m = *mp;
730 	struct ifnet *ipsecp;
731 	struct ipsec_softc *sc = eparg;
732 	struct ipsec_variant *var;
733 	struct ip6_hdr *ip6;
734 	int af = 0;
735 #ifndef IPSEC_TX_TOS_CLEAR
736 	u_int32_t otos;
737 #endif
738 	struct psref psref_rcvif;
739 	struct psref psref_var;
740 	struct ifnet *rcvif;
741 
742 	KASSERT(eparg != NULL);
743 
744 	ipsecp = &sc->ipsec_if;
745 	if ((ipsecp->if_flags & IFF_UP) == 0) {
746 		m_freem(m);
747 		IP6_STATINC(IP6_STAT_NOIPSEC);
748 		return IPPROTO_DONE;
749 	}
750 
751 	var = if_ipsec_getref_variant(sc, &psref_var);
752 	if (if_ipsec_variant_is_unconfigured(var)) {
753 		if_ipsec_putref_variant(var, &psref_var);
754 		m_freem(m);
755 		IP6_STATINC(IP6_STAT_NOIPSEC);
756 		return IPPROTO_DONE;
757 	}
758 
759 	ip6 = mtod(m, struct ip6_hdr *);
760 
761 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
762 	if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
763 		m_put_rcvif_psref(rcvif, &psref_rcvif);
764 		if_ipsec_putref_variant(var, &psref_var);
765 		m_freem(m);
766 		IP6_STATINC(IP6_STAT_NOIPSEC);
767 		return IPPROTO_DONE;
768 	}
769 	m_put_rcvif_psref(rcvif, &psref_rcvif);
770 	if_ipsec_putref_variant(var, &psref_var);
771 
772 #ifndef IPSEC_TX_TOS_CLEAR
773 	otos = ip6->ip6_flow;
774 #endif
775 	m_adj(m, *offp);
776 
777 	switch (proto) {
778 #ifdef INET
779 	case IPPROTO_IPV4:
780 	    {
781 		af = AF_INET;
782 #ifndef IPSEC_TX_TOS_CLEAR
783 		struct ip *ip;
784 		u_int8_t otos8;
785 		otos8 = (ntohl(otos) >> 20) & 0xff;
786 
787 		if (M_UNWRITABLE(m, sizeof(*ip))) {
788 			m = m_pullup(m, sizeof(*ip));
789 			if (m == NULL)
790 				return IPPROTO_DONE;
791 		}
792 		ip = mtod(m, struct ip *);
793 		if (ipsecp->if_flags & IFF_ECN)
794 			ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
795 		else
796 			ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
797 #endif
798 		break;
799 	    }
800 #endif /* INET */
801 	case IPPROTO_IPV6:
802 	    {
803 		af = AF_INET6;
804 #ifndef IPSEC_TX_TOS_CLEAR
805 		struct ip6_hdr *xip6;
806 
807 		if (M_UNWRITABLE(m, sizeof(*xip6))) {
808 			m = m_pullup(m, sizeof(*xip6));
809 			if (m == NULL)
810 				return IPPROTO_DONE;
811 		}
812 		xip6 = mtod(m, struct ip6_hdr *);
813 		if (ipsecp->if_flags & IFF_ECN)
814 			ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
815 		else
816 			ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
817 		break;
818 #endif
819 	    }
820 	default:
821 		IP6_STATINC(IP6_STAT_NOIPSEC);
822 		m_freem(m);
823 		return IPPROTO_DONE;
824 	}
825 
826 	if_ipsec_input(m, af, ipsecp);
827 	return IPPROTO_DONE;
828 }
829 
830 /*
831  * validate and filter the packet.
832  */
833 static int
834 ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
835     struct ifnet *ifp)
836 {
837 	struct sockaddr_in6 *src, *dst;
838 
839 	src = satosin6(var->iv_psrc);
840 	dst = satosin6(var->iv_pdst);
841 
842 	return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
843 }
844 #endif /* INET6 */
845 
846 int
847 ipsecif4_attach(struct ipsec_variant *var)
848 {
849 	struct ipsec_softc *sc = var->iv_softc;
850 
851 	KASSERT(if_ipsec_variant_is_configured(var));
852 
853 	if (var->iv_encap_cookie4 != NULL)
854 		return EALREADY;
855 	var->iv_encap_cookie4 = encap_attach_func(AF_INET, -1, if_ipsec_encap_func,
856 	    &ipsecif4_encapsw, sc);
857 	if (var->iv_encap_cookie4 == NULL)
858 		return EEXIST;
859 
860 	var->iv_output = ipsecif4_output;
861 	return 0;
862 }
863 
864 int
865 ipsecif4_detach(struct ipsec_variant *var)
866 {
867 	int error;
868 
869 	if (var->iv_encap_cookie4 == NULL)
870 		return 0;
871 
872 	var->iv_output = NULL;
873 	error = encap_detach(var->iv_encap_cookie4);
874 	if (error == 0)
875 		var->iv_encap_cookie4 = NULL;
876 
877 	return error;
878 }
879 
880 #ifdef INET6
881 int
882 ipsecif6_attach(struct ipsec_variant *var)
883 {
884 	struct sockaddr_in6 mask6;
885 	struct ipsec_softc *sc = var->iv_softc;
886 
887 	KASSERT(if_ipsec_variant_is_configured(var));
888 	KASSERT(var->iv_encap_cookie6 == NULL);
889 
890 	memset(&mask6, 0, sizeof(mask6));
891 	mask6.sin6_len = sizeof(struct sockaddr_in6);
892 	mask6.sin6_addr.s6_addr32[0] = mask6.sin6_addr.s6_addr32[1] =
893 	mask6.sin6_addr.s6_addr32[2] = mask6.sin6_addr.s6_addr32[3] = ~0;
894 
895 	var->iv_encap_cookie6 = encap_attach_func(AF_INET6, -1, if_ipsec_encap_func,
896 	    &ipsecif6_encapsw, sc);
897 	if (var->iv_encap_cookie6 == NULL)
898 		return EEXIST;
899 
900 	var->iv_output = ipsecif6_output;
901 	return 0;
902 }
903 
904 static void
905 ipsecif6_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
906 {
907 	struct ipsec_ro *iro = p;
908 
909 	mutex_enter(&iro->ir_lock);
910 	rtcache_free(&iro->ir_ro);
911 	mutex_exit(&iro->ir_lock);
912 }
913 
914 int
915 ipsecif6_detach(struct ipsec_variant *var)
916 {
917 	struct ipsec_softc *sc = var->iv_softc;
918 	int error;
919 
920 	KASSERT(var->iv_encap_cookie6 != NULL);
921 
922 	percpu_foreach(sc->ipsec_ro_percpu, ipsecif6_rtcache_free_pc, NULL);
923 
924 	var->iv_output = NULL;
925 	error = encap_detach(var->iv_encap_cookie6);
926 	if (error == 0)
927 		var->iv_encap_cookie6 = NULL;
928 	return error;
929 }
930 
931 void *
932 ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
933 {
934 	struct ipsec_softc *sc = eparg;
935 	struct ip6ctlparam *ip6cp = NULL;
936 	struct ip6_hdr *ip6;
937 	const struct sockaddr_in6 *dst6;
938 	struct ipsec_ro *iro;
939 
940 	if (sa->sa_family != AF_INET6 ||
941 	    sa->sa_len != sizeof(struct sockaddr_in6))
942 		return NULL;
943 
944 	if ((unsigned)cmd >= PRC_NCMDS)
945 		return NULL;
946 	if (cmd == PRC_HOSTDEAD)
947 		d = NULL;
948 	else if (inet6ctlerrmap[cmd] == 0)
949 		return NULL;
950 
951 	/* if the parameter is from icmp6, decode it. */
952 	if (d != NULL) {
953 		ip6cp = (struct ip6ctlparam *)d;
954 		ip6 = ip6cp->ip6c_ip6;
955 	} else {
956 		ip6 = NULL;
957 	}
958 
959 	if (!ip6)
960 		return NULL;
961 
962 	iro = percpu_getref(sc->ipsec_ro_percpu);
963 	mutex_enter(&iro->ir_lock);
964 	dst6 = satocsin6(rtcache_getdst(&iro->ir_ro));
965 	/* XXX scope */
966 	if (dst6 == NULL)
967 		;
968 	else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
969 		/* flush route cache */
970 		rtcache_free(&iro->ir_ro);
971 
972 	mutex_exit(&iro->ir_lock);
973 	percpu_putref(sc->ipsec_ro_percpu);
974 
975 	return NULL;
976 }
977 
978 ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
979 #define	ipsecif6_ctlinput	ipsecif6_ctlinput_wrapper
980 
981 static const struct encapsw ipsecif6_encapsw = {
982 	.encapsw6 = {
983 		.pr_input = ipsecif6_input,
984 		.pr_ctlinput = ipsecif6_ctlinput,
985 	}
986 };
987 #endif /* INET6 */
988