xref: /openbsd-src/sys/netinet/ipsec_input.c (revision fc405d53b73a2d73393cb97f684863d17b583e38)
1 /*	$OpenBSD: ipsec_input.c,v 1.204 2023/05/13 13:35:17 bluhm Exp $	*/
2 /*
3  * The authors of this code are John Ioannidis (ji@tla.org),
4  * Angelos D. Keromytis (kermit@csd.uch.gr) and
5  * Niels Provos (provos@physnet.uni-hamburg.de).
6  *
7  * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
8  * in November 1995.
9  *
10  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
11  * by Angelos D. Keromytis.
12  *
13  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
14  * and Niels Provos.
15  *
16  * Additional features in 1999 by Angelos D. Keromytis.
17  *
18  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
19  * Angelos D. Keromytis and Niels Provos.
20  * Copyright (c) 2001, Angelos D. Keromytis.
21  *
22  * Permission to use, copy, and modify this software with or without fee
23  * is hereby granted, provided that this entire notice is included in
24  * all copies of any software which is or includes a copy or
25  * modification of this software.
26  * You may use this code under the GNU public license if you so wish. Please
27  * contribute changes back to the authors under this freer than GPL license
28  * so that we may further the use of strong encryption without limitations to
29  * all.
30  *
31  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
32  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
33  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
34  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
35  * PURPOSE.
36  */
37 
38 #include "pf.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/protosw.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
46 #include <sys/kernel.h>
47 #include <sys/timeout.h>
48 
49 #include <net/if.h>
50 #include <net/if_var.h>
51 #include <net/netisr.h>
52 #include <net/bpf.h>
53 #include <net/route.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_icmp.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 
62 #if NPF > 0
63 #include <net/pfvar.h>
64 #endif
65 
66 #ifdef INET6
67 #include <netinet6/in6_var.h>
68 #include <netinet/ip6.h>
69 #include <netinet6/ip6_var.h>
70 #endif /* INET6 */
71 
72 #include <netinet/ip_ipsp.h>
73 #include <netinet/ip_esp.h>
74 #include <netinet/ip_ah.h>
75 #include <netinet/ip_ipcomp.h>
76 
77 #include <net/if_enc.h>
78 
79 #include <crypto/cryptodev.h>
80 #include <crypto/xform.h>
81 
82 #include "bpfilter.h"
83 
84 void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
85 
86 #ifdef ENCDEBUG
87 #define DPRINTF(fmt, args...)						\
88 	do {								\
89 		if (encdebug)						\
90 			printf("%s: " fmt "\n", __func__, ## args);	\
91 	} while (0)
92 #else
93 #define DPRINTF(fmt, args...)						\
94 	do { } while (0)
95 #endif
96 
97 /* sysctl variables */
98 int encdebug = 0;
99 int ipsec_keep_invalid = IPSEC_DEFAULT_EMBRYONIC_SA_TIMEOUT;
100 int ipsec_require_pfs = IPSEC_DEFAULT_PFS;
101 int ipsec_soft_allocations = IPSEC_DEFAULT_SOFT_ALLOCATIONS;
102 int ipsec_exp_allocations = IPSEC_DEFAULT_EXP_ALLOCATIONS;
103 int ipsec_soft_bytes = IPSEC_DEFAULT_SOFT_BYTES;
104 int ipsec_exp_bytes = IPSEC_DEFAULT_EXP_BYTES;
105 int ipsec_soft_timeout = IPSEC_DEFAULT_SOFT_TIMEOUT;
106 int ipsec_exp_timeout = IPSEC_DEFAULT_EXP_TIMEOUT;
107 int ipsec_soft_first_use = IPSEC_DEFAULT_SOFT_FIRST_USE;
108 int ipsec_exp_first_use = IPSEC_DEFAULT_EXP_FIRST_USE;
109 int ipsec_expire_acquire = IPSEC_DEFAULT_EXPIRE_ACQUIRE;
110 
111 int esp_enable = 1;
112 int ah_enable = 1;
113 int ipcomp_enable = 0;
114 
115 const struct sysctl_bounded_args espctl_vars[] = {
116 	{ESPCTL_ENABLE, &esp_enable, 0, 1},
117 	{ESPCTL_UDPENCAP_ENABLE, &udpencap_enable, 0, 1},
118 	{ESPCTL_UDPENCAP_PORT, &udpencap_port, 0, 65535},
119 };
120 const struct sysctl_bounded_args ahctl_vars[] = {
121 	{AHCTL_ENABLE, &ah_enable, 0, 1},
122 };
123 const struct sysctl_bounded_args ipcompctl_vars[] = {
124 	{IPCOMPCTL_ENABLE, &ipcomp_enable, 0, 1},
125 };
126 
127 struct cpumem *espcounters;
128 struct cpumem *ahcounters;
129 struct cpumem *ipcompcounters;
130 struct cpumem *ipseccounters;
131 
132 char ipsec_def_enc[20];
133 char ipsec_def_auth[20];
134 char ipsec_def_comp[20];
135 
136 const struct sysctl_bounded_args ipsecctl_vars[] = {
137 	{ IPSEC_ENCDEBUG, &encdebug, 0, 1 },
138 	{ IPSEC_EXPIRE_ACQUIRE, &ipsec_expire_acquire, 0, INT_MAX },
139 	{ IPSEC_EMBRYONIC_SA_TIMEOUT, &ipsec_keep_invalid, 0, INT_MAX },
140 	{ IPSEC_REQUIRE_PFS, &ipsec_require_pfs, 0, 1 },
141 	{ IPSEC_SOFT_ALLOCATIONS, &ipsec_soft_allocations, 0, INT_MAX },
142 	{ IPSEC_ALLOCATIONS, &ipsec_exp_allocations, 0, INT_MAX },
143 	{ IPSEC_SOFT_BYTES, &ipsec_soft_bytes, 0, INT_MAX },
144 	{ IPSEC_BYTES, &ipsec_exp_bytes, 0, INT_MAX },
145 	{ IPSEC_TIMEOUT, &ipsec_exp_timeout, 0, INT_MAX },
146 	{ IPSEC_SOFT_TIMEOUT, &ipsec_soft_timeout,0, INT_MAX },
147 	{ IPSEC_SOFT_FIRSTUSE, &ipsec_soft_first_use, 0, INT_MAX },
148 	{ IPSEC_FIRSTUSE, &ipsec_exp_first_use, 0, INT_MAX },
149 };
150 
151 int esp_sysctl_espstat(void *, size_t *, void *);
152 int ah_sysctl_ahstat(void *, size_t *, void *);
153 int ipcomp_sysctl_ipcompstat(void *, size_t *, void *);
154 int ipsec_sysctl_ipsecstat(void *, size_t *, void *);
155 
156 void
157 ipsec_init(void)
158 {
159 	espcounters = counters_alloc(esps_ncounters);
160 	ahcounters = counters_alloc(ahs_ncounters);
161 	ipcompcounters = counters_alloc(ipcomps_ncounters);
162 	ipseccounters = counters_alloc(ipsec_ncounters);
163 
164 	strlcpy(ipsec_def_enc, IPSEC_DEFAULT_DEF_ENC, sizeof(ipsec_def_enc));
165 	strlcpy(ipsec_def_auth, IPSEC_DEFAULT_DEF_AUTH, sizeof(ipsec_def_auth));
166 	strlcpy(ipsec_def_comp, IPSEC_DEFAULT_DEF_COMP, sizeof(ipsec_def_comp));
167 
168 	ipsp_init();
169 }
170 
171 /*
172  * ipsec_common_input() gets called when we receive an IPsec-protected packet
173  * in IPv4 or IPv6. All it does is find the right TDB and call the appropriate
174  * transform. The callback takes care of further processing (like ingress
175  * filtering).
176  */
177 int
178 ipsec_common_input(struct mbuf **mp, int skip, int protoff, int af, int sproto,
179     int udpencap)
180 {
181 #define IPSEC_ISTAT(x,y,z) do {			\
182 	if (sproto == IPPROTO_ESP)		\
183 		espstat_inc(x);			\
184 	else if (sproto == IPPROTO_AH)		\
185 		ahstat_inc(y);			\
186 	else					\
187 		ipcompstat_inc(z);		\
188 } while (0)
189 
190 	struct mbuf *m = *mp;
191 	union sockaddr_union dst_address;
192 	struct tdb *tdbp = NULL;
193 	u_int32_t spi;
194 	u_int16_t cpi;
195 	int prot;
196 #ifdef ENCDEBUG
197 	char buf[INET6_ADDRSTRLEN];
198 #endif
199 
200 	NET_ASSERT_LOCKED();
201 
202 	ipsecstat_pkt(ipsec_ipackets, ipsec_ibytes, m->m_pkthdr.len);
203 	IPSEC_ISTAT(esps_input, ahs_input, ipcomps_input);
204 
205 	if ((sproto == IPPROTO_IPCOMP) && (m->m_flags & M_COMP)) {
206 		DPRINTF("repeated decompression");
207 		ipcompstat_inc(ipcomps_pdrops);
208 		goto drop;
209 	}
210 
211 	if (m->m_pkthdr.len - skip < 2 * sizeof(u_int32_t)) {
212 		DPRINTF("packet too small");
213 		IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
214 		goto drop;
215 	}
216 
217 	/* Retrieve the SPI from the relevant IPsec header */
218 	switch (sproto) {
219 	case IPPROTO_ESP:
220 		m_copydata(m, skip, sizeof(u_int32_t), (caddr_t) &spi);
221 		break;
222 	case IPPROTO_AH:
223 		m_copydata(m, skip + sizeof(u_int32_t), sizeof(u_int32_t),
224 		    (caddr_t) &spi);
225 		break;
226 	case IPPROTO_IPCOMP:
227 		m_copydata(m, skip + sizeof(u_int16_t), sizeof(u_int16_t),
228 		    (caddr_t) &cpi);
229 		spi = ntohl(htons(cpi));
230 		break;
231 	default:
232 		panic("%s: unknown/unsupported security protocol %d",
233 		    __func__, sproto);
234 	}
235 
236 	/*
237 	 * Find tunnel control block and (indirectly) call the appropriate
238 	 * kernel crypto routine. The resulting mbuf chain is a valid
239 	 * IP packet ready to go through input processing.
240 	 */
241 
242 	memset(&dst_address, 0, sizeof(dst_address));
243 	dst_address.sa.sa_family = af;
244 
245 	switch (af) {
246 	case AF_INET:
247 		dst_address.sin.sin_len = sizeof(struct sockaddr_in);
248 		m_copydata(m, offsetof(struct ip, ip_dst),
249 		    sizeof(struct in_addr),
250 		    (caddr_t) &(dst_address.sin.sin_addr));
251 		break;
252 
253 #ifdef INET6
254 	case AF_INET6:
255 		dst_address.sin6.sin6_len = sizeof(struct sockaddr_in6);
256 		m_copydata(m, offsetof(struct ip6_hdr, ip6_dst),
257 		    sizeof(struct in6_addr),
258 		    (caddr_t) &(dst_address.sin6.sin6_addr));
259 		in6_recoverscope(&dst_address.sin6,
260 		    &dst_address.sin6.sin6_addr);
261 		break;
262 #endif /* INET6 */
263 
264 	default:
265 		DPRINTF("unsupported protocol family %d", af);
266 		IPSEC_ISTAT(esps_nopf, ahs_nopf, ipcomps_nopf);
267 		goto drop;
268 	}
269 
270 	tdbp = gettdb(rtable_l2(m->m_pkthdr.ph_rtableid),
271 	    spi, &dst_address, sproto);
272 	if (tdbp == NULL) {
273 		DPRINTF("could not find SA for packet to %s, spi %08x",
274 		    ipsp_address(&dst_address, buf, sizeof(buf)), ntohl(spi));
275 		IPSEC_ISTAT(esps_notdb, ahs_notdb, ipcomps_notdb);
276 		goto drop;
277 	}
278 
279 	if (tdbp->tdb_flags & TDBF_INVALID) {
280 		DPRINTF("attempted to use invalid SA %s/%08x/%u",
281 		    ipsp_address(&dst_address, buf, sizeof(buf)),
282 		    ntohl(spi), tdbp->tdb_sproto);
283 		IPSEC_ISTAT(esps_invalid, ahs_invalid, ipcomps_invalid);
284 		goto drop;
285 	}
286 
287 	if (udpencap && !(tdbp->tdb_flags & TDBF_UDPENCAP)) {
288 		DPRINTF("attempted to use non-udpencap SA %s/%08x/%u",
289 		    ipsp_address(&dst_address, buf, sizeof(buf)),
290 		    ntohl(spi), tdbp->tdb_sproto);
291 		espstat_inc(esps_udpinval);
292 		goto drop;
293 	}
294 
295 	if (!udpencap && (tdbp->tdb_flags & TDBF_UDPENCAP)) {
296 		DPRINTF("attempted to use udpencap SA %s/%08x/%u",
297 		    ipsp_address(&dst_address, buf, sizeof(buf)),
298 		    ntohl(spi), tdbp->tdb_sproto);
299 		espstat_inc(esps_udpneeded);
300 		goto drop;
301 	}
302 
303 	if (tdbp->tdb_xform == NULL) {
304 		DPRINTF("attempted to use uninitialized SA %s/%08x/%u",
305 		    ipsp_address(&dst_address, buf, sizeof(buf)),
306 		    ntohl(spi), tdbp->tdb_sproto);
307 		IPSEC_ISTAT(esps_noxform, ahs_noxform, ipcomps_noxform);
308 		goto drop;
309 	}
310 
311 	KERNEL_LOCK();
312 	/* Register first use, setup expiration timer. */
313 	if (tdbp->tdb_first_use == 0) {
314 		tdbp->tdb_first_use = gettime();
315 		if (tdbp->tdb_flags & TDBF_FIRSTUSE) {
316 			if (timeout_add_sec(&tdbp->tdb_first_tmo,
317 			    tdbp->tdb_exp_first_use))
318 				tdb_ref(tdbp);
319 		}
320 		if (tdbp->tdb_flags & TDBF_SOFT_FIRSTUSE) {
321 			if (timeout_add_sec(&tdbp->tdb_sfirst_tmo,
322 			    tdbp->tdb_soft_first_use))
323 				tdb_ref(tdbp);
324 		}
325 	}
326 
327 	tdbstat_pkt(tdbp, tdb_ipackets, tdb_ibytes, m->m_pkthdr.len);
328 
329 	/*
330 	 * Call appropriate transform and return -- callback takes care of
331 	 * everything else.
332 	 */
333 	prot = (*(tdbp->tdb_xform->xf_input))(mp, tdbp, skip, protoff);
334 	if (prot == IPPROTO_DONE) {
335 		ipsecstat_inc(ipsec_idrops);
336 		tdbstat_inc(tdbp, tdb_idrops);
337 	}
338 	tdb_unref(tdbp);
339 	KERNEL_UNLOCK();
340 	return prot;
341 
342  drop:
343 	m_freemp(mp);
344 	ipsecstat_inc(ipsec_idrops);
345 	if (tdbp != NULL)
346 		tdbstat_inc(tdbp, tdb_idrops);
347 	tdb_unref(tdbp);
348 	return IPPROTO_DONE;
349 }
350 
351 /*
352  * IPsec input callback, called by the transform callback. Takes care of
353  * filtering and other sanity checks on the processed packet.
354  */
355 int
356 ipsec_common_input_cb(struct mbuf **mp, struct tdb *tdbp, int skip, int protoff)
357 {
358 	struct mbuf *m = *mp;
359 	int af, sproto;
360 	u_int8_t prot;
361 #if NBPFILTER > 0
362 	struct ifnet *encif;
363 #endif
364 	struct ip *ip;
365 #ifdef INET6
366 	struct ip6_hdr *ip6;
367 #endif /* INET6 */
368 	struct m_tag *mtag;
369 	struct tdb_ident *tdbi;
370 #ifdef ENCDEBUG
371 	char buf[INET6_ADDRSTRLEN];
372 #endif
373 
374 	af = tdbp->tdb_dst.sa.sa_family;
375 	sproto = tdbp->tdb_sproto;
376 
377 	tdbp->tdb_last_used = gettime();
378 
379 	/* Fix IPv4 header */
380 	if (af == AF_INET) {
381 		if (m->m_len < skip &&
382 		    (m = *mp = m_pullup(m, skip)) == NULL) {
383 			DPRINTF("processing failed for SA %s/%08x",
384 			    ipsp_address(&tdbp->tdb_dst, buf, sizeof(buf)),
385 			    ntohl(tdbp->tdb_spi));
386 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
387 			goto baddone;
388 		}
389 
390 		ip = mtod(m, struct ip *);
391 		ip->ip_len = htons(m->m_pkthdr.len);
392 		in_hdr_cksum_out(m, NULL);
393 		prot = ip->ip_p;
394 	}
395 
396 #ifdef INET6
397 	/* Fix IPv6 header */
398 	if (af == AF_INET6) {
399 		if (m->m_len < sizeof(struct ip6_hdr) &&
400 		    (m = *mp = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
401 
402 			DPRINTF("processing failed for SA %s/%08x",
403 			    ipsp_address(&tdbp->tdb_dst, buf, sizeof(buf)),
404 			    ntohl(tdbp->tdb_spi));
405 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
406 			goto baddone;
407 		}
408 
409 		ip6 = mtod(m, struct ip6_hdr *);
410 		ip6->ip6_plen = htons(m->m_pkthdr.len - skip);
411 
412 		/* Save protocol */
413 		m_copydata(m, protoff, 1, (caddr_t) &prot);
414 	}
415 #endif /* INET6 */
416 
417 	/*
418 	 * Fix TCP/UDP checksum of UDP encapsulated transport mode ESP packet.
419 	 * (RFC3948 3.1.2)
420 	 */
421 	if ((af == AF_INET || af == AF_INET6) &&
422 	    (tdbp->tdb_flags & TDBF_UDPENCAP) &&
423 	    (tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
424 		u_int16_t cksum;
425 
426 		switch (prot) {
427 		case IPPROTO_UDP:
428 			if (m->m_pkthdr.len < skip + sizeof(struct udphdr)) {
429 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
430 				    ipcomps_hdrops);
431 				goto baddone;
432 			}
433 			cksum = 0;
434 			m_copyback(m, skip + offsetof(struct udphdr, uh_sum),
435 			    sizeof(cksum), &cksum, M_NOWAIT);
436 #ifdef INET6
437 			if (af == AF_INET6) {
438 				cksum = in6_cksum(m, IPPROTO_UDP, skip,
439 				    m->m_pkthdr.len - skip);
440 				m_copyback(m, skip + offsetof(struct udphdr,
441 				    uh_sum), sizeof(cksum), &cksum, M_NOWAIT);
442 			}
443 #endif
444 			break;
445 		case IPPROTO_TCP:
446 			if (m->m_pkthdr.len < skip + sizeof(struct tcphdr)) {
447 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
448 				    ipcomps_hdrops);
449 				goto baddone;
450 			}
451 			cksum = 0;
452 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
453 			    sizeof(cksum), &cksum, M_NOWAIT);
454 			if (af == AF_INET)
455 				cksum = in4_cksum(m, IPPROTO_TCP, skip,
456 				    m->m_pkthdr.len - skip);
457 #ifdef INET6
458 			else if (af == AF_INET6)
459 				cksum = in6_cksum(m, IPPROTO_TCP, skip,
460 				    m->m_pkthdr.len - skip);
461 #endif
462 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
463 			    sizeof(cksum), &cksum, M_NOWAIT);
464 			break;
465 		}
466 	}
467 
468 	/*
469 	 * Record what we've done to the packet (under what SA it was
470 	 * processed).
471 	 */
472 	if (tdbp->tdb_sproto != IPPROTO_IPCOMP) {
473 		mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE,
474 		    sizeof(struct tdb_ident), M_NOWAIT);
475 		if (mtag == NULL) {
476 			DPRINTF("failed to get tag");
477 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
478 			goto baddone;
479 		}
480 
481 		tdbi = (struct tdb_ident *)(mtag + 1);
482 		tdbi->dst = tdbp->tdb_dst;
483 		tdbi->proto = tdbp->tdb_sproto;
484 		tdbi->spi = tdbp->tdb_spi;
485 		tdbi->rdomain = tdbp->tdb_rdomain;
486 
487 		m_tag_prepend(m, mtag);
488 	}
489 
490 	switch (sproto) {
491 	case IPPROTO_ESP:
492 		/* Packet is confidential ? */
493 		if (tdbp->tdb_encalgxform)
494 			m->m_flags |= M_CONF;
495 
496 		/* Check if we had authenticated ESP. */
497 		if (tdbp->tdb_authalgxform)
498 			m->m_flags |= M_AUTH;
499 		break;
500 	case IPPROTO_AH:
501 		m->m_flags |= M_AUTH;
502 		break;
503 	case IPPROTO_IPCOMP:
504 		m->m_flags |= M_COMP;
505 		break;
506 	default:
507 		panic("%s: unknown/unsupported security protocol %d",
508 		    __func__, sproto);
509 	}
510 
511 #if NPF > 0
512 	/* Add pf tag if requested. */
513 	pf_tag_packet(m, tdbp->tdb_tag, -1);
514 	pf_pkt_addr_changed(m);
515 #endif
516 	if (tdbp->tdb_rdomain != tdbp->tdb_rdomain_post)
517 		m->m_pkthdr.ph_rtableid = tdbp->tdb_rdomain_post;
518 
519 	if (tdbp->tdb_flags & TDBF_TUNNELING)
520 		m->m_flags |= M_TUNNEL;
521 
522 	ipsecstat_add(ipsec_idecompbytes, m->m_pkthdr.len);
523 	tdbstat_add(tdbp, tdb_idecompbytes, m->m_pkthdr.len);
524 
525 #if NBPFILTER > 0
526 	encif = enc_getif(tdbp->tdb_rdomain_post, tdbp->tdb_tap);
527 	if (encif != NULL) {
528 		encif->if_ipackets++;
529 		encif->if_ibytes += m->m_pkthdr.len;
530 
531 		if (sproto != IPPROTO_IPCOMP) {
532 			/* XXX This conflicts with the scoped nature of IPv6 */
533 			m->m_pkthdr.ph_ifidx = encif->if_index;
534 		}
535 		if (encif->if_bpf) {
536 			struct enchdr hdr;
537 
538 			hdr.af = af;
539 			hdr.spi = tdbp->tdb_spi;
540 			hdr.flags = m->m_flags & (M_AUTH|M_CONF);
541 
542 			bpf_mtap_hdr(encif->if_bpf, (char *)&hdr,
543 			    ENC_HDRLEN, m, BPF_DIRECTION_IN);
544 		}
545 	}
546 #endif
547 
548 #if NPF > 0
549 	/*
550 	 * The ip_deliver() shortcut avoids running through ip_input() with the
551 	 * same IP header twice.  Packets in transport mode have to be be
552 	 * passed to pf explicitly.  In tunnel mode the inner IP header will
553 	 * run through ip_input() and pf anyway.
554 	 */
555 	if ((tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
556 		struct ifnet *ifp;
557 
558 		/* This is the enc0 interface unless for ipcomp. */
559 		if ((ifp = if_get(m->m_pkthdr.ph_ifidx)) == NULL) {
560 			goto baddone;
561 		}
562 		if (pf_test(af, PF_IN, ifp, mp) != PF_PASS) {
563 			if_put(ifp);
564 			goto baddone;
565 		}
566 		m = *mp;
567 		if_put(ifp);
568 		if (m == NULL)
569 			return IPPROTO_DONE;
570 	}
571 #endif
572 	/* Return to the appropriate protocol handler in deliver loop. */
573 	return prot;
574 
575  baddone:
576 	m_freemp(mp);
577 	return IPPROTO_DONE;
578 #undef IPSEC_ISTAT
579 }
580 
581 int
582 ipsec_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
583     size_t newlen)
584 {
585 	int error;
586 
587 	switch (name[0]) {
588 	case IPCTL_IPSEC_ENC_ALGORITHM:
589 		NET_LOCK();
590 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
591 		    ipsec_def_enc, sizeof(ipsec_def_enc));
592 		NET_UNLOCK();
593 		return (error);
594 	case IPCTL_IPSEC_AUTH_ALGORITHM:
595 		NET_LOCK();
596 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
597 		    ipsec_def_auth, sizeof(ipsec_def_auth));
598 		NET_UNLOCK();
599 		return (error);
600 	case IPCTL_IPSEC_IPCOMP_ALGORITHM:
601 		NET_LOCK();
602 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
603 		    ipsec_def_comp, sizeof(ipsec_def_comp));
604 		NET_UNLOCK();
605 		return (error);
606 	case IPCTL_IPSEC_STATS:
607 		return (ipsec_sysctl_ipsecstat(oldp, oldlenp, newp));
608 	default:
609 		NET_LOCK();
610 		error = sysctl_bounded_arr(ipsecctl_vars, nitems(ipsecctl_vars),
611 		    name, namelen, oldp, oldlenp, newp, newlen);
612 		NET_UNLOCK();
613 		return (error);
614 	}
615 }
616 
617 int
618 esp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
619     size_t newlen)
620 {
621 	int error;
622 
623 	/* All sysctl names at this level are terminal. */
624 	if (namelen != 1)
625 		return (ENOTDIR);
626 
627 	switch (name[0]) {
628 	case ESPCTL_STATS:
629 		return (esp_sysctl_espstat(oldp, oldlenp, newp));
630 	default:
631 		NET_LOCK();
632 		error = sysctl_bounded_arr(espctl_vars, nitems(espctl_vars),
633 		    name, namelen, oldp, oldlenp, newp, newlen);
634 		NET_UNLOCK();
635 		return (error);
636 	}
637 }
638 
639 int
640 esp_sysctl_espstat(void *oldp, size_t *oldlenp, void *newp)
641 {
642 	struct espstat espstat;
643 
644 	CTASSERT(sizeof(espstat) == (esps_ncounters * sizeof(uint64_t)));
645 	memset(&espstat, 0, sizeof espstat);
646 	counters_read(espcounters, (uint64_t *)&espstat, esps_ncounters);
647 	return (sysctl_rdstruct(oldp, oldlenp, newp, &espstat,
648 	    sizeof(espstat)));
649 }
650 
651 int
652 ah_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
653     size_t newlen)
654 {
655 	int error;
656 
657 	/* All sysctl names at this level are terminal. */
658 	if (namelen != 1)
659 		return (ENOTDIR);
660 
661 	switch (name[0]) {
662 	case AHCTL_STATS:
663 		return ah_sysctl_ahstat(oldp, oldlenp, newp);
664 	default:
665 		NET_LOCK();
666 		error = sysctl_bounded_arr(ahctl_vars, nitems(ahctl_vars), name,
667 		    namelen, oldp, oldlenp, newp, newlen);
668 		NET_UNLOCK();
669 		return (error);
670 	}
671 }
672 
673 int
674 ah_sysctl_ahstat(void *oldp, size_t *oldlenp, void *newp)
675 {
676 	struct ahstat ahstat;
677 
678 	CTASSERT(sizeof(ahstat) == (ahs_ncounters * sizeof(uint64_t)));
679 	memset(&ahstat, 0, sizeof ahstat);
680 	counters_read(ahcounters, (uint64_t *)&ahstat, ahs_ncounters);
681 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ahstat, sizeof(ahstat)));
682 }
683 
684 int
685 ipcomp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
686     size_t newlen)
687 {
688 	int error;
689 
690 	/* All sysctl names at this level are terminal. */
691 	if (namelen != 1)
692 		return (ENOTDIR);
693 
694 	switch (name[0]) {
695 	case IPCOMPCTL_STATS:
696 		return ipcomp_sysctl_ipcompstat(oldp, oldlenp, newp);
697 	default:
698 		NET_LOCK();
699 		error = sysctl_bounded_arr(ipcompctl_vars,
700 		    nitems(ipcompctl_vars), name, namelen, oldp, oldlenp,
701 		    newp, newlen);
702 		NET_UNLOCK();
703 		return (error);
704 	}
705 }
706 
707 int
708 ipcomp_sysctl_ipcompstat(void *oldp, size_t *oldlenp, void *newp)
709 {
710 	struct ipcompstat ipcompstat;
711 
712 	CTASSERT(sizeof(ipcompstat) == (ipcomps_ncounters * sizeof(uint64_t)));
713 	memset(&ipcompstat, 0, sizeof ipcompstat);
714 	counters_read(ipcompcounters, (uint64_t *)&ipcompstat,
715 	    ipcomps_ncounters);
716 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ipcompstat,
717 	    sizeof(ipcompstat)));
718 }
719 
720 int
721 ipsec_sysctl_ipsecstat(void *oldp, size_t *oldlenp, void *newp)
722 {
723 	struct ipsecstat ipsecstat;
724 
725 	CTASSERT(sizeof(ipsecstat) == (ipsec_ncounters * sizeof(uint64_t)));
726 	memset(&ipsecstat, 0, sizeof ipsecstat);
727 	counters_read(ipseccounters, (uint64_t *)&ipsecstat, ipsec_ncounters);
728 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ipsecstat,
729 	    sizeof(ipsecstat)));
730 }
731 
732 int
733 ipsec_input_disabled(struct mbuf **mp, int *offp, int proto, int af)
734 {
735 	switch (af) {
736 	case AF_INET:
737 		return rip_input(mp, offp, proto, af);
738 #ifdef INET6
739 	case AF_INET6:
740 		return rip6_input(mp, offp, proto, af);
741 #endif
742 	default:
743 		unhandled_af(af);
744 	}
745 }
746 
747 int
748 ah46_input(struct mbuf **mp, int *offp, int proto, int af)
749 {
750 	int protoff;
751 
752 	if (
753 #if NPF > 0
754 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
755 #endif
756 	    !ah_enable)
757 		return ipsec_input_disabled(mp, offp, proto, af);
758 
759 	protoff = ipsec_protoff(*mp, *offp, af);
760 	if (protoff < 0) {
761 		DPRINTF("bad packet header chain");
762 		ahstat_inc(ahs_hdrops);
763 		m_freemp(mp);
764 		return IPPROTO_DONE;
765 	}
766 
767 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
768 }
769 
770 void
771 ah4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
772 {
773 	if (sa->sa_family != AF_INET ||
774 	    sa->sa_len != sizeof(struct sockaddr_in))
775 		return;
776 
777 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_AH);
778 }
779 
780 int
781 esp46_input(struct mbuf **mp, int *offp, int proto, int af)
782 {
783 	int protoff;
784 
785 	if (
786 #if NPF > 0
787 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
788 #endif
789 	    !esp_enable)
790 		return ipsec_input_disabled(mp, offp, proto, af);
791 
792 	protoff = ipsec_protoff(*mp, *offp, af);
793 	if (protoff < 0) {
794 		DPRINTF("bad packet header chain");
795 		espstat_inc(esps_hdrops);
796 		m_freemp(mp);
797 		return IPPROTO_DONE;
798 	}
799 
800 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
801 }
802 
803 /* IPv4 IPCOMP wrapper */
804 int
805 ipcomp46_input(struct mbuf **mp, int *offp, int proto, int af)
806 {
807 	int protoff;
808 
809 	if (
810 #if NPF > 0
811 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
812 #endif
813 	    !ipcomp_enable)
814 		return ipsec_input_disabled(mp, offp, proto, af);
815 
816 	protoff = ipsec_protoff(*mp, *offp, af);
817 	if (protoff < 0) {
818 		DPRINTF("bad packet header chain");
819 		ipcompstat_inc(ipcomps_hdrops);
820 		m_freemp(mp);
821 		return IPPROTO_DONE;
822 	}
823 
824 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
825 }
826 
827 void
828 ipsec_set_mtu(struct tdb *tdbp, u_int32_t mtu)
829 {
830 	ssize_t adjust;
831 
832 	NET_ASSERT_LOCKED();
833 
834 	/* Walk the chain backwards to the first tdb */
835 	for (; tdbp != NULL; tdbp = tdbp->tdb_inext) {
836 		if (tdbp->tdb_flags & TDBF_INVALID ||
837 		    (adjust = ipsec_hdrsz(tdbp)) == -1)
838 			return;
839 
840 		mtu -= adjust;
841 
842 		/* Store adjusted MTU in tdb */
843 		tdbp->tdb_mtu = mtu;
844 		tdbp->tdb_mtutimeout = gettime() + ip_mtudisc_timeout;
845 		DPRINTF("spi %08x mtu %d adjust %ld",
846 		    ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust);
847 	}
848 }
849 
850 void
851 ipsec_common_ctlinput(u_int rdomain, int cmd, struct sockaddr *sa,
852     void *v, int proto)
853 {
854 	struct ip *ip = v;
855 
856 	if (cmd == PRC_MSGSIZE && ip && ip_mtudisc && ip->ip_v == 4) {
857 		struct tdb *tdbp;
858 		struct sockaddr_in dst;
859 		struct icmp *icp;
860 		int hlen = ip->ip_hl << 2;
861 		u_int32_t spi, mtu;
862 
863 		/* Find the right MTU. */
864 		icp = (struct icmp *)((caddr_t) ip -
865 		    offsetof(struct icmp, icmp_ip));
866 		mtu = ntohs(icp->icmp_nextmtu);
867 
868 		/*
869 		 * Ignore the packet, if we do not receive a MTU
870 		 * or the MTU is too small to be acceptable.
871 		 */
872 		if (mtu < 296)
873 			return;
874 
875 		memset(&dst, 0, sizeof(struct sockaddr_in));
876 		dst.sin_family = AF_INET;
877 		dst.sin_len = sizeof(struct sockaddr_in);
878 		dst.sin_addr.s_addr = ip->ip_dst.s_addr;
879 
880 		memcpy(&spi, (caddr_t)ip + hlen, sizeof(u_int32_t));
881 
882 		tdbp = gettdb_rev(rdomain, spi, (union sockaddr_union *)&dst,
883 		    proto);
884 		ipsec_set_mtu(tdbp, mtu);
885 		tdb_unref(tdbp);
886 	}
887 }
888 
889 void
890 udpencap_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
891 {
892 	struct ip *ip = v;
893 	struct tdb *tdbp, *first;
894 	struct icmp *icp;
895 	u_int32_t mtu;
896 	struct sockaddr_in dst, src;
897 	union sockaddr_union *su_dst, *su_src;
898 
899 	NET_ASSERT_LOCKED();
900 
901 	icp = (struct icmp *)((caddr_t) ip - offsetof(struct icmp, icmp_ip));
902 	mtu = ntohs(icp->icmp_nextmtu);
903 
904 	/*
905 	 * Ignore the packet, if we do not receive a MTU
906 	 * or the MTU is too small to be acceptable.
907 	 */
908 	if (mtu < 296)
909 		return;
910 
911 	memset(&dst, 0, sizeof(dst));
912 	dst.sin_family = AF_INET;
913 	dst.sin_len = sizeof(struct sockaddr_in);
914 	dst.sin_addr.s_addr = ip->ip_dst.s_addr;
915 	su_dst = (union sockaddr_union *)&dst;
916 	memset(&src, 0, sizeof(src));
917 	src.sin_family = AF_INET;
918 	src.sin_len = sizeof(struct sockaddr_in);
919 	src.sin_addr.s_addr = ip->ip_src.s_addr;
920 	su_src = (union sockaddr_union *)&src;
921 
922 	first = gettdbbysrcdst_rev(rdomain, 0, su_src, su_dst, IPPROTO_ESP);
923 
924 	mtx_enter(&tdb_sadb_mtx);
925 	for (tdbp = first; tdbp != NULL; tdbp = tdbp->tdb_snext) {
926 		if (tdbp->tdb_sproto == IPPROTO_ESP &&
927 		    ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_UDPENCAP)) ==
928 		    TDBF_UDPENCAP) &&
929 		    !memcmp(&tdbp->tdb_dst, &dst, su_dst->sa.sa_len) &&
930 		    !memcmp(&tdbp->tdb_src, &src, su_src->sa.sa_len))
931 			ipsec_set_mtu(tdbp, mtu);
932 	}
933 	mtx_leave(&tdb_sadb_mtx);
934 	tdb_unref(first);
935 }
936 
937 void
938 esp4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
939 {
940 	if (sa->sa_family != AF_INET ||
941 	    sa->sa_len != sizeof(struct sockaddr_in))
942 		return;
943 
944 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_ESP);
945 }
946 
947 /* Find the offset of the next protocol field in the previous header. */
948 int
949 ipsec_protoff(struct mbuf *m, int off, int af)
950 {
951 #ifdef INET6
952 	struct ip6_ext ip6e;
953 	int protoff, nxt, l;
954 #endif /* INET6 */
955 
956 	switch (af) {
957 	case AF_INET:
958 		return offsetof(struct ip, ip_p);
959 #ifdef INET6
960 	case AF_INET6:
961 		break;
962 #endif /* INET6 */
963 	default:
964 		unhandled_af(af);
965 	}
966 
967 #ifdef INET6
968 	if (off < sizeof(struct ip6_hdr))
969 		return -1;
970 
971 	if (off == sizeof(struct ip6_hdr))
972 		return offsetof(struct ip6_hdr, ip6_nxt);
973 
974 	/* Chase down the header chain... */
975 	protoff = sizeof(struct ip6_hdr);
976 	nxt = (mtod(m, struct ip6_hdr *))->ip6_nxt;
977 	l = 0;
978 
979 	do {
980 		protoff += l;
981 		m_copydata(m, protoff, sizeof(ip6e),
982 		    (caddr_t) &ip6e);
983 
984 		if (nxt == IPPROTO_AH)
985 			l = (ip6e.ip6e_len + 2) << 2;
986 		else
987 			l = (ip6e.ip6e_len + 1) << 3;
988 #ifdef DIAGNOSTIC
989 		if (l <= 0)
990 			panic("%s: l went zero or negative", __func__);
991 #endif
992 
993 		nxt = ip6e.ip6e_nxt;
994 	} while (protoff + l < off);
995 
996 	/* Malformed packet check */
997 	if (protoff + l != off)
998 		return -1;
999 
1000 	protoff += offsetof(struct ip6_ext, ip6e_nxt);
1001 	return protoff;
1002 #endif /* INET6 */
1003 }
1004 
1005 int
1006 ipsec_forward_check(struct mbuf *m, int hlen, int af)
1007 {
1008 	struct tdb *tdb;
1009 	struct tdb_ident *tdbi;
1010 	struct m_tag *mtag;
1011 	int error = 0;
1012 
1013 	/*
1014 	 * IPsec policy check for forwarded packets. Look at
1015 	 * inner-most IPsec SA used.
1016 	 */
1017 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1018 	if (mtag != NULL) {
1019 		tdbi = (struct tdb_ident *)(mtag + 1);
1020 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst, tdbi->proto);
1021 	} else
1022 		tdb = NULL;
1023 	error = ipsp_spd_lookup(m, af, hlen, IPSP_DIRECTION_IN,
1024 	    tdb, NULL, NULL, NULL);
1025 	tdb_unref(tdb);
1026 
1027 	return error;
1028 }
1029 
1030 int
1031 ipsec_local_check(struct mbuf *m, int hlen, int proto, int af)
1032 {
1033 	struct tdb *tdb;
1034 	struct tdb_ident *tdbi;
1035 	struct m_tag *mtag;
1036 	int error = 0;
1037 
1038 	/*
1039 	 * If it's a protected packet for us, skip the policy check.
1040 	 * That's because we really only care about the properties of
1041 	 * the protected packet, and not the intermediate versions.
1042 	 * While this is not the most paranoid setting, it allows
1043 	 * some flexibility in handling nested tunnels (in setting up
1044 	 * the policies).
1045 	 */
1046 	if ((proto == IPPROTO_ESP) || (proto == IPPROTO_AH) ||
1047 	    (proto == IPPROTO_IPCOMP))
1048 		return 0;
1049 
1050 	/*
1051 	 * If the protected packet was tunneled, then we need to
1052 	 * verify the protected packet's information, not the
1053 	 * external headers. Thus, skip the policy lookup for the
1054 	 * external packet, and keep the IPsec information linked on
1055 	 * the packet header (the encapsulation routines know how
1056 	 * to deal with that).
1057 	 */
1058 	if ((proto == IPPROTO_IPV4) || (proto == IPPROTO_IPV6))
1059 		return 0;
1060 
1061 	/*
1062 	 * When processing IPv6 header chains, do not look at the
1063 	 * outer header.  The inner protocol is relevant and will
1064 	 * be checked by the local delivery loop later.
1065 	 */
1066 	if ((af == AF_INET6) && ((proto == IPPROTO_DSTOPTS) ||
1067 	    (proto == IPPROTO_ROUTING) || (proto == IPPROTO_FRAGMENT)))
1068 		return 0;
1069 
1070 	/*
1071 	 * If the protected packet is TCP or UDP, we'll do the
1072 	 * policy check in the respective input routine, so we can
1073 	 * check for bypass sockets.
1074 	 */
1075 	if ((proto == IPPROTO_TCP) || (proto == IPPROTO_UDP))
1076 		return 0;
1077 
1078 	/*
1079 	 * IPsec policy check for local-delivery packets. Look at the
1080 	 * inner-most SA that protected the packet. This is in fact
1081 	 * a bit too restrictive (it could end up causing packets to
1082 	 * be dropped that semantically follow the policy, e.g., in
1083 	 * certain SA-bundle configurations); but the alternative is
1084 	 * very complicated (and requires keeping track of what
1085 	 * kinds of tunneling headers have been seen in-between the
1086 	 * IPsec headers), and I don't think we lose much functionality
1087 	 * that's needed in the real world (who uses bundles anyway ?).
1088 	 */
1089 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1090 	if (mtag) {
1091 		tdbi = (struct tdb_ident *)(mtag + 1);
1092 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst,
1093 		    tdbi->proto);
1094 	} else
1095 		tdb = NULL;
1096 	error = ipsp_spd_lookup(m, af, hlen, IPSP_DIRECTION_IN,
1097 	    tdb, NULL, NULL, NULL);
1098 	tdb_unref(tdb);
1099 
1100 	return error;
1101 }
1102