xref: /netbsd-src/sys/netipsec/ipsecif.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /*	$NetBSD: ipsecif.c,v 1.19 2020/01/31 06:54:19 knakahara Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.19 2020/01/31 06:54:19 knakahara Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #include "opt_ipsec.h"
35 #endif
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/socket.h>
40 #include <sys/sockio.h>
41 #include <sys/mbuf.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/syslog.h>
45 #include <sys/kernel.h>
46 
47 #include <net/if.h>
48 #include <net/route.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip_encap.h>
56 #include <netinet/ip_ecn.h>
57 #include <netinet/ip_private.h>
58 #include <netinet/udp.h>
59 
60 #ifdef INET6
61 #include <netinet/ip6.h>
62 #include <netinet6/ip6_var.h>
63 #include <netinet6/ip6_private.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
66 #include <netinet/ip_ecn.h>
67 #endif
68 
69 #include <netipsec/key.h>
70 #include <netipsec/ipsecif.h>
71 
72 #include <net/if_ipsec.h>
73 
74 static int ipsecif_set_natt_ports(struct ipsec_variant *, struct mbuf *);
75 static void ipsecif4_input(struct mbuf *, int, int, void *);
76 static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
77 static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
78 	struct ifnet *);
79 
80 #ifdef INET6
81 static int ipsecif6_input(struct mbuf **, int *, int, void *);
82 static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
83 static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
84 	struct ifnet *);
85 #endif
86 
87 static int ip_ipsec_ttl = IPSEC_TTL;
88 static int ip_ipsec_copy_tos = 0;
89 #ifdef INET6
90 int ip6_ipsec_hlim = IPSEC_HLIM;
91 int ip6_ipsec_pmtu = 0;
92 static int ip6_ipsec_copy_tos = 0;
93 #endif
94 
95 static const struct encapsw ipsecif4_encapsw = {
96 	.encapsw4 = {
97 		.pr_input = ipsecif4_input,
98 		.pr_ctlinput = NULL,
99 	}
100 };
101 
102 #ifdef INET6
103 static const struct encapsw ipsecif6_encapsw;
104 #endif
105 
106 static int
107 ipsecif_set_natt_ports(struct ipsec_variant *var, struct mbuf *m)
108 {
109 
110 	KASSERT(if_ipsec_heldref_variant(var));
111 
112 	if (var->iv_sport || var->iv_dport) {
113 		struct m_tag *mtag;
114 
115 		mtag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
116 		    sizeof(uint16_t) + sizeof(uint16_t), M_DONTWAIT);
117 		if (mtag) {
118 			uint16_t *natt_port;
119 
120 			natt_port = (uint16_t *)(mtag + 1);
121 			natt_port[0] = var->iv_dport;
122 			natt_port[1] = var->iv_sport;
123 			m_tag_prepend(m, mtag);
124 		} else {
125 			return ENOBUFS;
126 		}
127 	}
128 
129 	return 0;
130 }
131 
132 static struct mbuf *
133 ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
134     uint8_t proto, uint8_t tos)
135 {
136 	struct ip *ip;
137 	struct sockaddr_in *src, *dst;
138 
139 	src = satosin(var->iv_psrc);
140 	dst = satosin(var->iv_pdst);
141 
142 	if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
143 	    src->sin_addr.s_addr == INADDR_BROADCAST ||
144 	    dst->sin_addr.s_addr == INADDR_BROADCAST) {
145 		m_freem(m);
146 		return NULL;
147 	}
148 	m->m_flags &= ~M_BCAST;
149 
150 	if (IN_MULTICAST(src->sin_addr.s_addr) ||
151 	    IN_MULTICAST(dst->sin_addr.s_addr)) {
152 		m_freem(m);
153 		return NULL;
154 	}
155 
156 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
157 	if (m && M_UNWRITABLE(m, sizeof(struct ip)))
158 		m = m_pullup(m, sizeof(struct ip));
159 	if (m == NULL)
160 		return NULL;
161 
162 	ip = mtod(m, struct ip *);
163 	ip->ip_v = IPVERSION;
164 	ip->ip_off = htons(0);
165 	if (m->m_pkthdr.len < IP_MINFRAGSIZE)
166 		ip->ip_id = 0;
167 	else
168 		ip->ip_id = ip_newid(NULL);
169 	ip->ip_hl = sizeof(*ip) >> 2;
170 	if (ip_ipsec_copy_tos)
171 		ip->ip_tos = tos;
172 	else
173 		ip->ip_tos = 0;
174 	ip->ip_sum = 0;
175 	ip->ip_src = src->sin_addr;
176 	ip->ip_dst = dst->sin_addr;
177 	ip->ip_p = proto;
178 	ip->ip_ttl = ip_ipsec_ttl;
179 	ip->ip_len = htons(m->m_pkthdr.len);
180 #ifndef IPSEC_TX_TOS_CLEAR
181 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
182 	if (ifp->if_flags & IFF_ECN)
183 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
184 	else
185 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
186 #endif
187 
188 	return m;
189 }
190 
191 static int
192 ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
193 {
194 	struct ip ip0;
195 	struct ip *ip;
196 	int mtu;
197 	struct secasvar *sav;
198 
199 	sav = key_lookup_sa_bysaidx(&isr->saidx);
200 	if (sav == NULL)
201 		return 0;
202 
203 	if (!(sav->natt_type & UDP_ENCAP_ESPINUDP)) {
204 		mtu = 0;
205 		goto out;
206 	}
207 
208 	if (m->m_len < sizeof(struct ip)) {
209 		m_copydata(m, 0, sizeof(ip0), &ip0);
210 		ip = &ip0;
211 	} else {
212 		ip = mtod(m, struct ip *);
213 	}
214 	mtu = sav->esp_frag;
215 	if (ntohs(ip->ip_len) <= mtu)
216 		mtu = 0;
217 
218 out:
219 	KEY_SA_UNREF(&sav);
220 	return mtu;
221 }
222 
223 static struct mbuf *
224 ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
225 {
226 	const struct ip *ip;
227 	int proto;
228 	int tos;
229 
230 	KASSERT(proto0 != NULL);
231 	KASSERT(tos0 != NULL);
232 
233 	switch (family) {
234 	case AF_INET:
235 		proto = IPPROTO_IPV4;
236 		if (m->m_len < sizeof(*ip)) {
237 			m = m_pullup(m, sizeof(*ip));
238 			if (m == NULL) {
239 				*tos0 = 0;
240 				*proto0 = 0;
241 				return NULL;
242 			}
243 		}
244 		ip = mtod(m, const struct ip *);
245 		tos = ip->ip_tos;
246 		/* TODO: support ALTQ for innner packet */
247 		break;
248 #ifdef INET6
249 	case AF_INET6: {
250 		const struct ip6_hdr *ip6;
251 		proto = IPPROTO_IPV6;
252 		if (m->m_len < sizeof(*ip6)) {
253 			m = m_pullup(m, sizeof(*ip6));
254 			if (m == NULL) {
255 				*tos0 = 0;
256 				*proto0 = 0;
257 				return NULL;
258 			}
259 		}
260 		ip6 = mtod(m, const struct ip6_hdr *);
261 		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
262 		/* TODO: support ALTQ for innner packet */
263 		break;
264 	}
265 #endif /* INET6 */
266 	default:
267 		*tos0 = 0;
268 		*proto0 = 0;
269 		return NULL;
270 	}
271 
272 	*proto0 = proto;
273 	*tos0 = tos;
274 	return m;
275 }
276 
277 static int
278 ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
279 {
280 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
281 	struct mbuf *next;
282 	struct m_tag *mtag;
283 	int error;
284 
285 	KASSERT(if_ipsec_heldref_variant(var));
286 
287 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
288 	if (mtag)
289 		m_tag_delete(m, mtag);
290 
291 	/* consider new IP header prepended in ipsecif4_output() */
292 	if (mtu <= sizeof(struct ip)) {
293 		m_freem(m);
294 		return ENETUNREACH;
295 	}
296 	m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
297 	error = ip_fragment(m, ifp, mtu - sizeof(struct ip));
298 	if (error)
299 		return error;
300 
301 	for (error = 0; m; m = next) {
302 		next = m->m_nextpkt;
303 		m->m_nextpkt = NULL;
304 		if (error) {
305 			m_freem(m);
306 			continue;
307 		}
308 
309 		error = ipsecif4_output(var, family, m);
310 	}
311 	if (error == 0)
312 		IP_STATINC(IP_STAT_FRAGMENTED);
313 
314 	return error;
315 }
316 
317 int
318 ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
319 {
320 	struct m_tag *mtag;
321 	struct sockaddr_in *src, *dst;
322 	u_int16_t src_port = 0;
323 	u_int16_t dst_port = 0;
324 
325 	KASSERT(var != NULL);
326 
327 	src = satosin(var->iv_psrc);
328 	dst = satosin(var->iv_pdst);
329 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
330 	if (mtag) {
331 		u_int16_t *ports;
332 
333 		ports = (u_int16_t *)(mtag + 1);
334 		src_port = ports[0];
335 		dst_port = ports[1];
336 	}
337 
338 	/* address match */
339 	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
340 	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
341 		return 0;
342 
343 	/* UDP encap? */
344 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
345 		goto match;
346 
347 	/* port match */
348 	if (src_port != var->iv_dport ||
349 	    dst_port != var->iv_sport) {
350 #ifdef DEBUG
351 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
352 		    __func__, ntohs(src_port), ntohs(dst_port),
353 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
354 #endif
355 		return 0;
356 	}
357 
358 match:
359 	/*
360 	 * hide NAT-T information from encapsulated traffics.
361 	 * they don't know about IPsec.
362 	 */
363 	if (mtag)
364 		m_tag_delete(m, mtag);
365 	return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
366 }
367 
368 static int
369 ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
370 {
371 	struct secpolicy *sp = NULL;
372 	u_int8_t tos;
373 	int proto;
374 	int error;
375 	int mtu;
376 	u_long sa_mtu = 0;
377 
378 	KASSERT(if_ipsec_heldref_variant(var));
379 	KASSERT(if_ipsec_variant_is_configured(var));
380 	KASSERT(var->iv_psrc->sa_family == AF_INET);
381 	KASSERT(var->iv_pdst->sa_family == AF_INET);
382 
383 	switch (family) {
384 	case AF_INET:
385 		sp = IV_SP_OUT(var);
386 		break;
387 	case AF_INET6:
388 		sp = IV_SP_OUT6(var);
389 		break;
390 	default:
391 		m_freem(m);
392 		return EAFNOSUPPORT;
393 	}
394 	KASSERT(sp != NULL);
395 	/*
396 	 * The SPs in ipsec_variant are prevented from freed by
397 	 * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
398 	 */
399 
400 	KASSERT(sp->policy != IPSEC_POLICY_NONE);
401 	KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
402 	KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
403 	if (sp->policy != IPSEC_POLICY_IPSEC) {
404 		m_freem(m);
405 		error = ENETUNREACH;
406 		goto done;
407 	}
408 
409 	/* get flowinfo */
410 	m = ipsecif4_flowinfo(m, family, &proto, &tos);
411 	if (m == NULL) {
412 		error = ENETUNREACH;
413 		goto done;
414 	}
415 
416 	/* prepend new IP header */
417 	m = ipsecif4_prepend_hdr(var, m, proto, tos);
418 	if (m == NULL) {
419 		error = ENETUNREACH;
420 		goto done;
421 	}
422 
423 	/*
424 	 * Normal netipsec's NAT-T fragmentation is done in ip_output().
425 	 * See "natt_frag" processing.
426 	 * However, ipsec(4) interface's one is not done in the same way,
427 	 * so we must do NAT-T fragmentation by own code.
428 	 */
429 	/* NAT-T ESP fragmentation */
430 	mtu = ipsecif4_needfrag(m, sp->req);
431 	if (mtu > 0)
432 		return ipsecif4_fragout(var, family, m, mtu);
433 
434 	/* set NAT-T ports */
435 	error = ipsecif_set_natt_ports(var, m);
436 	if (error) {
437 		m_freem(m);
438 		goto done;
439 	}
440 
441 	/* IPsec output */
442 	IP_STATINC(IP_STAT_LOCALOUT);
443 	error = ipsec4_process_packet(m, sp->req, &sa_mtu);
444 	if (error == ENOENT)
445 		error = 0;
446 	/*
447 	 * frangmentation is already done in ipsecif4_fragout(),
448 	 * so ipsec4_process_packet() must not do fragmentation here.
449 	 */
450 	KASSERT(sa_mtu == 0);
451 
452 done:
453 	return error;
454 }
455 
456 #ifdef INET6
457 int
458 ipsecif6_encap_func(struct mbuf *m, struct ip6_hdr *ip6, struct ipsec_variant *var)
459 {
460 	struct m_tag *mtag;
461 	struct sockaddr_in6 *src, *dst;
462 	u_int16_t src_port = 0;
463 	u_int16_t dst_port = 0;
464 
465 	KASSERT(var != NULL);
466 
467 	src = satosin6(var->iv_psrc);
468 	dst = satosin6(var->iv_pdst);
469 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
470 	if (mtag) {
471 		u_int16_t *ports;
472 
473 		ports = (u_int16_t *)(mtag + 1);
474 		src_port = ports[0];
475 		dst_port = ports[1];
476 	}
477 
478 	/* address match */
479 	if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
480 	    !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
481 		return 0;
482 
483 	/* UDP encap? */
484 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
485 		goto match;
486 
487 	/* port match */
488 	if (src_port != var->iv_dport ||
489 	    dst_port != var->iv_sport) {
490 #ifdef DEBUG
491 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
492 		    __func__, ntohs(src_port), ntohs(dst_port),
493 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
494 #endif
495 		return 0;
496 	}
497 
498 match:
499 	/*
500 	 * hide NAT-T information from encapsulated traffics.
501 	 * they don't know about IPsec.
502 	 */
503 	if (mtag)
504 		m_tag_delete(m, mtag);
505 	return sizeof(src->sin6_addr) + sizeof(dst->sin6_addr);
506 }
507 
508 static int
509 ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
510 {
511 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
512 	struct ipsec_softc *sc = ifp->if_softc;
513 	struct route *ro_pc;
514 	kmutex_t *lock_pc;
515 	struct rtentry *rt;
516 	struct sockaddr_in6 *sin6_src;
517 	struct sockaddr_in6 *sin6_dst;
518 	struct ip6_hdr *ip6;
519 	int proto, error, flags;
520 	u_int8_t itos, otos;
521 	union {
522 		struct sockaddr		dst;
523 		struct sockaddr_in6	dst6;
524 	} u;
525 
526 	KASSERT(if_ipsec_heldref_variant(var));
527 	KASSERT(if_ipsec_variant_is_configured(var));
528 
529 	sin6_src = satosin6(var->iv_psrc);
530 	sin6_dst = satosin6(var->iv_pdst);
531 
532 	KASSERT(sin6_src->sin6_family == AF_INET6);
533 	KASSERT(sin6_dst->sin6_family == AF_INET6);
534 
535 	switch (family) {
536 #ifdef INET
537 	case AF_INET:
538 	    {
539 		struct ip *ip;
540 
541 		proto = IPPROTO_IPV4;
542 		if (m->m_len < sizeof(*ip)) {
543 			m = m_pullup(m, sizeof(*ip));
544 			if (m == NULL)
545 				return ENOBUFS;
546 		}
547 		ip = mtod(m, struct ip *);
548 		itos = ip->ip_tos;
549 		/* TODO: support ALTQ for innner packet */
550 		break;
551 	    }
552 #endif /* INET */
553 	case AF_INET6:
554 	    {
555 		struct ip6_hdr *xip6;
556 		proto = IPPROTO_IPV6;
557 		if (m->m_len < sizeof(*xip6)) {
558 			m = m_pullup(m, sizeof(*xip6));
559 			if (m == NULL)
560 				return ENOBUFS;
561 		}
562 		xip6 = mtod(m, struct ip6_hdr *);
563 		itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
564 		/* TODO: support ALTQ for innner packet */
565 		break;
566 	    }
567 	default:
568 		m_freem(m);
569 		return EAFNOSUPPORT;
570 	}
571 
572 	/* prepend new IP header */
573 	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
574 	if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
575 		m = m_pullup(m, sizeof(struct ip6_hdr));
576 	if (m == NULL)
577 		return ENOBUFS;
578 
579 	ip6 = mtod(m, struct ip6_hdr *);
580 	ip6->ip6_flow	= 0;
581 	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
582 	ip6->ip6_vfc	|= IPV6_VERSION;
583 #if 0	/* ip6->ip6_plen will be filled by ip6_output */
584 	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len - sizeof(*ip6));
585 #endif
586 	ip6->ip6_nxt	= proto;
587 	ip6->ip6_hlim	= ip6_ipsec_hlim;
588 	ip6->ip6_src	= sin6_src->sin6_addr;
589 	/* bidirectional configured tunnel mode */
590 	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
591 		ip6->ip6_dst = sin6_dst->sin6_addr;
592 	} else  {
593 		m_freem(m);
594 		return ENETUNREACH;
595 	}
596 #ifndef IPSEC_TX_TOS_CLEAR
597 	if (!ip6_ipsec_copy_tos)
598 		otos = 0;
599 
600 	if (ifp->if_flags & IFF_ECN)
601 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
602 	else
603 		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
604 #else
605 	if (ip6_ipsec_copy_tos)
606 		otos = itos;
607 	else
608 		otos = 0;
609 #endif
610 	ip6->ip6_flow &= ~ntohl(0xff00000);
611 	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
612 
613 	sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
614 
615 	if_tunnel_get_ro(sc->ipsec_ro_percpu, &ro_pc, &lock_pc);
616 	if ((rt = rtcache_lookup(ro_pc, &u.dst)) == NULL) {
617 		if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
618 		m_freem(m);
619 		return ENETUNREACH;
620 	}
621 
622 	if (rt->rt_ifp == ifp) {
623 		rtcache_unref(rt, ro_pc);
624 		rtcache_free(ro_pc);
625 		if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
626 		m_freem(m);
627 		return ENETUNREACH;
628 	}
629 	rtcache_unref(rt, ro_pc);
630 
631 	/* set NAT-T ports */
632 	error = ipsecif_set_natt_ports(var, m);
633 	if (error) {
634 		m_freem(m);
635 		goto out;
636 	}
637 
638 	/*
639 	 * - IPSEC_PMTU_MINMTU
640 	 *   Force fragmentation to minimum MTU to avoid path MTU discovery
641 	 * - IPSEC_PMTU_OUTERMTU
642 	 *   Trust outer MTU is large enough to send all packets
643 	 *
644 	 * It is too painful to ask for resend of inner packet, to achieve
645 	 * path MTU discovery for encapsulated packets.
646 	 *
647 	 * See RFC4459.
648 	 */
649 	if (sc->ipsec_pmtu == IPSEC_PMTU_SYSDEFAULT) {
650 		switch (ip6_ipsec_pmtu) {
651 		case IPSEC_PMTU_MINMTU:
652 			flags = IPV6_MINMTU;
653 			break;
654 		case IPSEC_PMTU_OUTERMTU:
655 			flags = 0;
656 			break;
657 		default:
658 #ifdef DEBUG
659 			log(LOG_DEBUG, "%s: ignore unexpected ip6_ipsec_pmtu %d\n",
660 			    __func__, ip6_ipsec_pmtu);
661 #endif
662 			flags = IPV6_MINMTU;
663 			break;
664 		}
665 	} else {
666 		switch (sc->ipsec_pmtu) {
667 		case IPSEC_PMTU_MINMTU:
668 			flags = IPV6_MINMTU;
669 			break;
670 		case IPSEC_PMTU_OUTERMTU:
671 			flags = 0;
672 			break;
673 		default:
674 #ifdef DEBUG
675 			log(LOG_DEBUG, "%s: ignore unexpected ipsec_pmtu of %s %d\n",
676 			    __func__, ifp->if_xname, sc->ipsec_pmtu);
677 #endif
678 			flags = IPV6_MINMTU;
679 			break;
680 		}
681 	}
682 	error = ip6_output(m, 0, ro_pc, flags, 0, NULL, NULL);
683 
684 out:
685 	if (error)
686 		rtcache_free(ro_pc);
687 	if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
688 
689 	return error;
690 }
691 #endif /* INET6 */
692 
693 static void
694 ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
695 {
696 	struct ifnet *ipsecp;
697 	struct ipsec_softc *sc = eparg;
698 	struct ipsec_variant *var;
699 	const struct ip *ip;
700 	int af;
701 #ifndef IPSEC_TX_TOS_CLEAR
702 	u_int8_t otos;
703 #endif
704 	struct psref psref_rcvif;
705 	struct psref psref_var;
706 	struct ifnet *rcvif;
707 
708 	KASSERT(sc != NULL);
709 
710 	ipsecp = &sc->ipsec_if;
711 	if ((ipsecp->if_flags & IFF_UP) == 0) {
712 		m_freem(m);
713 		ip_statinc(IP_STAT_NOIPSEC);
714 		return;
715 	}
716 
717 	var = if_ipsec_getref_variant(sc, &psref_var);
718 	if (if_ipsec_variant_is_unconfigured(var)) {
719 		if_ipsec_putref_variant(var, &psref_var);
720 		m_freem(m);
721 		ip_statinc(IP_STAT_NOIPSEC);
722 		return;
723 	}
724 
725 	ip = mtod(m, const struct ip *);
726 
727 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
728 	if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
729 		m_put_rcvif_psref(rcvif, &psref_rcvif);
730 		if_ipsec_putref_variant(var, &psref_var);
731 		m_freem(m);
732 		ip_statinc(IP_STAT_NOIPSEC);
733 		return;
734 	}
735 	m_put_rcvif_psref(rcvif, &psref_rcvif);
736 	if_ipsec_putref_variant(var, &psref_var);
737 #ifndef IPSEC_TX_TOS_CLEAR
738 	otos = ip->ip_tos;
739 #endif
740 	m_adj(m, off);
741 
742 	switch (proto) {
743 	case IPPROTO_IPV4:
744 	    {
745 		struct ip *xip;
746 		af = AF_INET;
747 		if (M_UNWRITABLE(m, sizeof(*xip))) {
748 			m = m_pullup(m, sizeof(*xip));
749 			if (m == NULL)
750 				return;
751 		}
752 		xip = mtod(m, struct ip *);
753 #ifndef IPSEC_TX_TOS_CLEAR
754 		if (ipsecp->if_flags & IFF_ECN)
755 			ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
756 		else
757 			ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
758 #endif
759 		break;
760 	    }
761 #ifdef INET6
762 	case IPPROTO_IPV6:
763 	    {
764 		struct ip6_hdr *ip6;
765 		u_int8_t itos;
766 		af = AF_INET6;
767 		if (M_UNWRITABLE(m, sizeof(*ip6))) {
768 			m = m_pullup(m, sizeof(*ip6));
769 			if (m == NULL)
770 				return;
771 		}
772 		ip6 = mtod(m, struct ip6_hdr *);
773 		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
774 #ifndef IPSEC_TX_TOS_CLEAR
775 		if (ipsecp->if_flags & IFF_ECN)
776 			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
777 		else
778 			ip_ecn_egress(ECN_NOCARE, &otos, &itos);
779 #endif
780 		ip6->ip6_flow &= ~htonl(0xff << 20);
781 		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
782 		break;
783 	    }
784 #endif /* INET6 */
785 	default:
786 		ip_statinc(IP_STAT_NOIPSEC);
787 		m_freem(m);
788 		return;
789 	}
790 	if_ipsec_input(m, af, ipsecp);
791 
792 	return;
793 }
794 
795 /*
796  * validate and filter the packet
797  */
798 static int
799 ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
800     struct ifnet *ifp)
801 {
802 	struct sockaddr_in *src, *dst;
803 
804 	src = satosin(var->iv_psrc);
805 	dst = satosin(var->iv_pdst);
806 
807 	return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
808 }
809 
810 #ifdef INET6
811 static int
812 ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
813 {
814 	struct mbuf *m = *mp;
815 	struct ifnet *ipsecp;
816 	struct ipsec_softc *sc = eparg;
817 	struct ipsec_variant *var;
818 	struct ip6_hdr *ip6;
819 	int af = 0;
820 #ifndef IPSEC_TX_TOS_CLEAR
821 	u_int32_t otos;
822 #endif
823 	struct psref psref_rcvif;
824 	struct psref psref_var;
825 	struct ifnet *rcvif;
826 
827 	KASSERT(eparg != NULL);
828 
829 	ipsecp = &sc->ipsec_if;
830 	if ((ipsecp->if_flags & IFF_UP) == 0) {
831 		m_freem(m);
832 		IP6_STATINC(IP6_STAT_NOIPSEC);
833 		return IPPROTO_DONE;
834 	}
835 
836 	var = if_ipsec_getref_variant(sc, &psref_var);
837 	if (if_ipsec_variant_is_unconfigured(var)) {
838 		if_ipsec_putref_variant(var, &psref_var);
839 		m_freem(m);
840 		IP6_STATINC(IP6_STAT_NOIPSEC);
841 		return IPPROTO_DONE;
842 	}
843 
844 	ip6 = mtod(m, struct ip6_hdr *);
845 
846 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
847 	if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
848 		m_put_rcvif_psref(rcvif, &psref_rcvif);
849 		if_ipsec_putref_variant(var, &psref_var);
850 		m_freem(m);
851 		IP6_STATINC(IP6_STAT_NOIPSEC);
852 		return IPPROTO_DONE;
853 	}
854 	m_put_rcvif_psref(rcvif, &psref_rcvif);
855 	if_ipsec_putref_variant(var, &psref_var);
856 
857 #ifndef IPSEC_TX_TOS_CLEAR
858 	otos = ip6->ip6_flow;
859 #endif
860 	m_adj(m, *offp);
861 
862 	switch (proto) {
863 #ifdef INET
864 	case IPPROTO_IPV4:
865 	    {
866 		af = AF_INET;
867 #ifndef IPSEC_TX_TOS_CLEAR
868 		struct ip *ip;
869 		u_int8_t otos8;
870 		otos8 = (ntohl(otos) >> 20) & 0xff;
871 
872 		if (M_UNWRITABLE(m, sizeof(*ip))) {
873 			m = m_pullup(m, sizeof(*ip));
874 			if (m == NULL)
875 				return IPPROTO_DONE;
876 		}
877 		ip = mtod(m, struct ip *);
878 		if (ipsecp->if_flags & IFF_ECN)
879 			ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
880 		else
881 			ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
882 #endif
883 		break;
884 	    }
885 #endif /* INET */
886 	case IPPROTO_IPV6:
887 	    {
888 		af = AF_INET6;
889 #ifndef IPSEC_TX_TOS_CLEAR
890 		struct ip6_hdr *xip6;
891 
892 		if (M_UNWRITABLE(m, sizeof(*xip6))) {
893 			m = m_pullup(m, sizeof(*xip6));
894 			if (m == NULL)
895 				return IPPROTO_DONE;
896 		}
897 		xip6 = mtod(m, struct ip6_hdr *);
898 		if (ipsecp->if_flags & IFF_ECN)
899 			ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
900 		else
901 			ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
902 		break;
903 #endif
904 	    }
905 	default:
906 		IP6_STATINC(IP6_STAT_NOIPSEC);
907 		m_freem(m);
908 		return IPPROTO_DONE;
909 	}
910 
911 	if_ipsec_input(m, af, ipsecp);
912 	return IPPROTO_DONE;
913 }
914 
915 /*
916  * validate and filter the packet.
917  */
918 static int
919 ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
920     struct ifnet *ifp)
921 {
922 	struct sockaddr_in6 *src, *dst;
923 
924 	src = satosin6(var->iv_psrc);
925 	dst = satosin6(var->iv_pdst);
926 
927 	return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
928 }
929 #endif /* INET6 */
930 
931 int
932 ipsecif4_attach(struct ipsec_variant *var)
933 {
934 	struct ipsec_softc *sc = var->iv_softc;
935 
936 	KASSERT(if_ipsec_variant_is_configured(var));
937 
938 	if (var->iv_encap_cookie4 != NULL)
939 		return EALREADY;
940 	var->iv_encap_cookie4 = encap_attach_func(AF_INET, -1, if_ipsec_encap_func,
941 	    &ipsecif4_encapsw, sc);
942 	if (var->iv_encap_cookie4 == NULL)
943 		return EEXIST;
944 
945 	var->iv_output = ipsecif4_output;
946 	return 0;
947 }
948 
949 int
950 ipsecif4_detach(struct ipsec_variant *var)
951 {
952 	int error;
953 
954 	if (var->iv_encap_cookie4 == NULL)
955 		return 0;
956 
957 	var->iv_output = NULL;
958 	error = encap_detach(var->iv_encap_cookie4);
959 	if (error == 0)
960 		var->iv_encap_cookie4 = NULL;
961 
962 	return error;
963 }
964 
965 #ifdef INET6
966 int
967 ipsecif6_attach(struct ipsec_variant *var)
968 {
969 	struct ipsec_softc *sc = var->iv_softc;
970 
971 	KASSERT(if_ipsec_variant_is_configured(var));
972 	KASSERT(var->iv_encap_cookie6 == NULL);
973 
974 	var->iv_encap_cookie6 = encap_attach_func(AF_INET6, -1, if_ipsec_encap_func,
975 	    &ipsecif6_encapsw, sc);
976 	if (var->iv_encap_cookie6 == NULL)
977 		return EEXIST;
978 
979 	var->iv_output = ipsecif6_output;
980 	return 0;
981 }
982 
983 int
984 ipsecif6_detach(struct ipsec_variant *var)
985 {
986 	struct ipsec_softc *sc = var->iv_softc;
987 	int error;
988 
989 	KASSERT(var->iv_encap_cookie6 != NULL);
990 
991 	if_tunnel_ro_percpu_rtcache_free(sc->ipsec_ro_percpu);
992 
993 	var->iv_output = NULL;
994 	error = encap_detach(var->iv_encap_cookie6);
995 	if (error == 0)
996 		var->iv_encap_cookie6 = NULL;
997 	return error;
998 }
999 
1000 void *
1001 ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
1002 {
1003 	struct ipsec_softc *sc = eparg;
1004 	struct ip6ctlparam *ip6cp = NULL;
1005 	struct ip6_hdr *ip6;
1006 	const struct sockaddr_in6 *dst6;
1007 	struct route *ro_pc;
1008 	kmutex_t *lock_pc;
1009 
1010 	if (sa->sa_family != AF_INET6 ||
1011 	    sa->sa_len != sizeof(struct sockaddr_in6))
1012 		return NULL;
1013 
1014 	if ((unsigned)cmd >= PRC_NCMDS)
1015 		return NULL;
1016 	if (cmd == PRC_HOSTDEAD)
1017 		d = NULL;
1018 	else if (inet6ctlerrmap[cmd] == 0)
1019 		return NULL;
1020 
1021 	/* if the parameter is from icmp6, decode it. */
1022 	if (d != NULL) {
1023 		ip6cp = (struct ip6ctlparam *)d;
1024 		ip6 = ip6cp->ip6c_ip6;
1025 	} else {
1026 		ip6 = NULL;
1027 	}
1028 
1029 	if (!ip6)
1030 		return NULL;
1031 
1032 	if_tunnel_get_ro(sc->ipsec_ro_percpu, &ro_pc, &lock_pc);
1033 	dst6 = satocsin6(rtcache_getdst(ro_pc));
1034 	/* XXX scope */
1035 	if (dst6 == NULL)
1036 		;
1037 	else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
1038 		/* flush route cache */
1039 		rtcache_free(ro_pc);
1040 
1041 	if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
1042 
1043 	return NULL;
1044 }
1045 
1046 ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
1047 #define	ipsecif6_ctlinput	ipsecif6_ctlinput_wrapper
1048 
1049 static const struct encapsw ipsecif6_encapsw = {
1050 	.encapsw6 = {
1051 		.pr_input = ipsecif6_input,
1052 		.pr_ctlinput = ipsecif6_ctlinput,
1053 	}
1054 };
1055 #endif /* INET6 */
1056