xref: /openbsd-src/sys/netinet/ipsec_input.c (revision d0f299081ecdda98dc7a87b149b17b038a84b9fa)
1 /*	$OpenBSD: ipsec_input.c,v 1.201 2021/12/23 12:21:48 bluhm Exp $	*/
2 /*
3  * The authors of this code are John Ioannidis (ji@tla.org),
4  * Angelos D. Keromytis (kermit@csd.uch.gr) and
5  * Niels Provos (provos@physnet.uni-hamburg.de).
6  *
7  * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
8  * in November 1995.
9  *
10  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
11  * by Angelos D. Keromytis.
12  *
13  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
14  * and Niels Provos.
15  *
16  * Additional features in 1999 by Angelos D. Keromytis.
17  *
18  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
19  * Angelos D. Keromytis and Niels Provos.
20  * Copyright (c) 2001, Angelos D. Keromytis.
21  *
22  * Permission to use, copy, and modify this software with or without fee
23  * is hereby granted, provided that this entire notice is included in
24  * all copies of any software which is or includes a copy or
25  * modification of this software.
26  * You may use this code under the GNU public license if you so wish. Please
27  * contribute changes back to the authors under this freer than GPL license
28  * so that we may further the use of strong encryption without limitations to
29  * all.
30  *
31  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
32  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
33  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
34  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
35  * PURPOSE.
36  */
37 
38 #include "pf.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/protosw.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
46 #include <sys/kernel.h>
47 #include <sys/timeout.h>
48 
49 #include <net/if.h>
50 #include <net/if_var.h>
51 #include <net/netisr.h>
52 #include <net/bpf.h>
53 #include <net/route.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_icmp.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 
62 #if NPF > 0
63 #include <net/pfvar.h>
64 #endif
65 
66 #ifdef INET6
67 #include <netinet6/in6_var.h>
68 #include <netinet/ip6.h>
69 #include <netinet6/ip6_var.h>
70 #include <netinet6/ip6protosw.h>
71 #endif /* INET6 */
72 
73 #include <netinet/ip_ipsp.h>
74 #include <netinet/ip_esp.h>
75 #include <netinet/ip_ah.h>
76 #include <netinet/ip_ipcomp.h>
77 
78 #include <net/if_enc.h>
79 
80 #include <crypto/cryptodev.h>
81 #include <crypto/xform.h>
82 
83 #include "bpfilter.h"
84 
85 void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
86 
87 #ifdef ENCDEBUG
88 #define DPRINTF(fmt, args...)						\
89 	do {								\
90 		if (encdebug)						\
91 			printf("%s: " fmt "\n", __func__, ## args);	\
92 	} while (0)
93 #else
94 #define DPRINTF(fmt, args...)						\
95 	do { } while (0)
96 #endif
97 
98 /* sysctl variables */
99 int encdebug = 0;
100 int ipsec_keep_invalid = IPSEC_DEFAULT_EMBRYONIC_SA_TIMEOUT;
101 int ipsec_require_pfs = IPSEC_DEFAULT_PFS;
102 int ipsec_soft_allocations = IPSEC_DEFAULT_SOFT_ALLOCATIONS;
103 int ipsec_exp_allocations = IPSEC_DEFAULT_EXP_ALLOCATIONS;
104 int ipsec_soft_bytes = IPSEC_DEFAULT_SOFT_BYTES;
105 int ipsec_exp_bytes = IPSEC_DEFAULT_EXP_BYTES;
106 int ipsec_soft_timeout = IPSEC_DEFAULT_SOFT_TIMEOUT;
107 int ipsec_exp_timeout = IPSEC_DEFAULT_EXP_TIMEOUT;
108 int ipsec_soft_first_use = IPSEC_DEFAULT_SOFT_FIRST_USE;
109 int ipsec_exp_first_use = IPSEC_DEFAULT_EXP_FIRST_USE;
110 int ipsec_expire_acquire = IPSEC_DEFAULT_EXPIRE_ACQUIRE;
111 
112 int esp_enable = 1;
113 int ah_enable = 1;
114 int ipcomp_enable = 0;
115 
116 const struct sysctl_bounded_args espctl_vars[] = {
117 	{ESPCTL_ENABLE, &esp_enable, 0, 1},
118 	{ESPCTL_UDPENCAP_ENABLE, &udpencap_enable, 0, 1},
119 	{ESPCTL_UDPENCAP_PORT, &udpencap_port, 0, 65535},
120 };
121 const struct sysctl_bounded_args ahctl_vars[] = {
122 	{AHCTL_ENABLE, &ah_enable, 0, 1},
123 };
124 const struct sysctl_bounded_args ipcompctl_vars[] = {
125 	{IPCOMPCTL_ENABLE, &ipcomp_enable, 0, 1},
126 };
127 
128 struct cpumem *espcounters;
129 struct cpumem *ahcounters;
130 struct cpumem *ipcompcounters;
131 struct cpumem *ipseccounters;
132 
133 char ipsec_def_enc[20];
134 char ipsec_def_auth[20];
135 char ipsec_def_comp[20];
136 
137 const struct sysctl_bounded_args ipsecctl_vars[] = {
138 	{ IPSEC_ENCDEBUG, &encdebug, 0, 1 },
139 	{ IPSEC_EXPIRE_ACQUIRE, &ipsec_expire_acquire, 0, INT_MAX },
140 	{ IPSEC_EMBRYONIC_SA_TIMEOUT, &ipsec_keep_invalid, 0, INT_MAX },
141 	{ IPSEC_REQUIRE_PFS, &ipsec_require_pfs, 0, 1 },
142 	{ IPSEC_SOFT_ALLOCATIONS, &ipsec_soft_allocations, 0, INT_MAX },
143 	{ IPSEC_ALLOCATIONS, &ipsec_exp_allocations, 0, INT_MAX },
144 	{ IPSEC_SOFT_BYTES, &ipsec_soft_bytes, 0, INT_MAX },
145 	{ IPSEC_BYTES, &ipsec_exp_bytes, 0, INT_MAX },
146 	{ IPSEC_TIMEOUT, &ipsec_exp_timeout, 0, INT_MAX },
147 	{ IPSEC_SOFT_TIMEOUT, &ipsec_soft_timeout,0, INT_MAX },
148 	{ IPSEC_SOFT_FIRSTUSE, &ipsec_soft_first_use, 0, INT_MAX },
149 	{ IPSEC_FIRSTUSE, &ipsec_exp_first_use, 0, INT_MAX },
150 };
151 
152 int esp_sysctl_espstat(void *, size_t *, void *);
153 int ah_sysctl_ahstat(void *, size_t *, void *);
154 int ipcomp_sysctl_ipcompstat(void *, size_t *, void *);
155 int ipsec_sysctl_ipsecstat(void *, size_t *, void *);
156 
157 void
158 ipsec_init(void)
159 {
160 	espcounters = counters_alloc(esps_ncounters);
161 	ahcounters = counters_alloc(ahs_ncounters);
162 	ipcompcounters = counters_alloc(ipcomps_ncounters);
163 	ipseccounters = counters_alloc(ipsec_ncounters);
164 
165 	strlcpy(ipsec_def_enc, IPSEC_DEFAULT_DEF_ENC, sizeof(ipsec_def_enc));
166 	strlcpy(ipsec_def_auth, IPSEC_DEFAULT_DEF_AUTH, sizeof(ipsec_def_auth));
167 	strlcpy(ipsec_def_comp, IPSEC_DEFAULT_DEF_COMP, sizeof(ipsec_def_comp));
168 
169 	ipsp_init();
170 }
171 
172 /*
173  * ipsec_common_input() gets called when we receive an IPsec-protected packet
174  * in IPv4 or IPv6. All it does is find the right TDB and call the appropriate
175  * transform. The callback takes care of further processing (like ingress
176  * filtering).
177  */
178 int
179 ipsec_common_input(struct mbuf **mp, int skip, int protoff, int af, int sproto,
180     int udpencap)
181 {
182 #define IPSEC_ISTAT(x,y,z) do {			\
183 	if (sproto == IPPROTO_ESP)		\
184 		espstat_inc(x);			\
185 	else if (sproto == IPPROTO_AH)		\
186 		ahstat_inc(y);			\
187 	else					\
188 		ipcompstat_inc(z);		\
189 } while (0)
190 
191 	struct mbuf *m = *mp;
192 	union sockaddr_union dst_address;
193 	struct tdb *tdbp = NULL;
194 	u_int32_t spi;
195 	u_int16_t cpi;
196 	int prot;
197 #ifdef ENCDEBUG
198 	char buf[INET6_ADDRSTRLEN];
199 #endif
200 
201 	NET_ASSERT_LOCKED();
202 
203 	ipsecstat_pkt(ipsec_ipackets, ipsec_ibytes, m->m_pkthdr.len);
204 	IPSEC_ISTAT(esps_input, ahs_input, ipcomps_input);
205 
206 	if ((sproto == IPPROTO_IPCOMP) && (m->m_flags & M_COMP)) {
207 		DPRINTF("repeated decompression");
208 		ipcompstat_inc(ipcomps_pdrops);
209 		goto drop;
210 	}
211 
212 	if (m->m_pkthdr.len - skip < 2 * sizeof(u_int32_t)) {
213 		DPRINTF("packet too small");
214 		IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
215 		goto drop;
216 	}
217 
218 	/* Retrieve the SPI from the relevant IPsec header */
219 	switch (sproto) {
220 	case IPPROTO_ESP:
221 		m_copydata(m, skip, sizeof(u_int32_t), (caddr_t) &spi);
222 		break;
223 	case IPPROTO_AH:
224 		m_copydata(m, skip + sizeof(u_int32_t), sizeof(u_int32_t),
225 		    (caddr_t) &spi);
226 		break;
227 	case IPPROTO_IPCOMP:
228 		m_copydata(m, skip + sizeof(u_int16_t), sizeof(u_int16_t),
229 		    (caddr_t) &cpi);
230 		spi = ntohl(htons(cpi));
231 		break;
232 	default:
233 		panic("%s: unknown/unsupported security protocol %d",
234 		    __func__, sproto);
235 	}
236 
237 	/*
238 	 * Find tunnel control block and (indirectly) call the appropriate
239 	 * kernel crypto routine. The resulting mbuf chain is a valid
240 	 * IP packet ready to go through input processing.
241 	 */
242 
243 	memset(&dst_address, 0, sizeof(dst_address));
244 	dst_address.sa.sa_family = af;
245 
246 	switch (af) {
247 	case AF_INET:
248 		dst_address.sin.sin_len = sizeof(struct sockaddr_in);
249 		m_copydata(m, offsetof(struct ip, ip_dst),
250 		    sizeof(struct in_addr),
251 		    (caddr_t) &(dst_address.sin.sin_addr));
252 		break;
253 
254 #ifdef INET6
255 	case AF_INET6:
256 		dst_address.sin6.sin6_len = sizeof(struct sockaddr_in6);
257 		m_copydata(m, offsetof(struct ip6_hdr, ip6_dst),
258 		    sizeof(struct in6_addr),
259 		    (caddr_t) &(dst_address.sin6.sin6_addr));
260 		in6_recoverscope(&dst_address.sin6,
261 		    &dst_address.sin6.sin6_addr);
262 		break;
263 #endif /* INET6 */
264 
265 	default:
266 		DPRINTF("unsupported protocol family %d", af);
267 		IPSEC_ISTAT(esps_nopf, ahs_nopf, ipcomps_nopf);
268 		goto drop;
269 	}
270 
271 	tdbp = gettdb(rtable_l2(m->m_pkthdr.ph_rtableid),
272 	    spi, &dst_address, sproto);
273 	if (tdbp == NULL) {
274 		DPRINTF("could not find SA for packet to %s, spi %08x",
275 		    ipsp_address(&dst_address, buf, sizeof(buf)), ntohl(spi));
276 		IPSEC_ISTAT(esps_notdb, ahs_notdb, ipcomps_notdb);
277 		goto drop;
278 	}
279 
280 	if (tdbp->tdb_flags & TDBF_INVALID) {
281 		DPRINTF("attempted to use invalid SA %s/%08x/%u",
282 		    ipsp_address(&dst_address, buf, sizeof(buf)),
283 		    ntohl(spi), tdbp->tdb_sproto);
284 		IPSEC_ISTAT(esps_invalid, ahs_invalid, ipcomps_invalid);
285 		goto drop;
286 	}
287 
288 	if (udpencap && !(tdbp->tdb_flags & TDBF_UDPENCAP)) {
289 		DPRINTF("attempted to use non-udpencap SA %s/%08x/%u",
290 		    ipsp_address(&dst_address, buf, sizeof(buf)),
291 		    ntohl(spi), tdbp->tdb_sproto);
292 		espstat_inc(esps_udpinval);
293 		goto drop;
294 	}
295 
296 	if (!udpencap && (tdbp->tdb_flags & TDBF_UDPENCAP)) {
297 		DPRINTF("attempted to use udpencap SA %s/%08x/%u",
298 		    ipsp_address(&dst_address, buf, sizeof(buf)),
299 		    ntohl(spi), tdbp->tdb_sproto);
300 		espstat_inc(esps_udpneeded);
301 		goto drop;
302 	}
303 
304 	if (tdbp->tdb_xform == NULL) {
305 		DPRINTF("attempted to use uninitialized SA %s/%08x/%u",
306 		    ipsp_address(&dst_address, buf, sizeof(buf)),
307 		    ntohl(spi), tdbp->tdb_sproto);
308 		IPSEC_ISTAT(esps_noxform, ahs_noxform, ipcomps_noxform);
309 		goto drop;
310 	}
311 
312 	KERNEL_LOCK();
313 	/* Register first use, setup expiration timer. */
314 	if (tdbp->tdb_first_use == 0) {
315 		tdbp->tdb_first_use = gettime();
316 		if (tdbp->tdb_flags & TDBF_FIRSTUSE) {
317 			if (timeout_add_sec(&tdbp->tdb_first_tmo,
318 			    tdbp->tdb_exp_first_use))
319 				tdb_ref(tdbp);
320 		}
321 		if (tdbp->tdb_flags & TDBF_SOFT_FIRSTUSE) {
322 			if (timeout_add_sec(&tdbp->tdb_sfirst_tmo,
323 			    tdbp->tdb_soft_first_use))
324 				tdb_ref(tdbp);
325 		}
326 	}
327 
328 	tdbstat_pkt(tdbp, tdb_ipackets, tdb_ibytes, m->m_pkthdr.len);
329 
330 	/*
331 	 * Call appropriate transform and return -- callback takes care of
332 	 * everything else.
333 	 */
334 	prot = (*(tdbp->tdb_xform->xf_input))(mp, tdbp, skip, protoff);
335 	if (prot == IPPROTO_DONE) {
336 		ipsecstat_inc(ipsec_idrops);
337 		tdbstat_inc(tdbp, tdb_idrops);
338 	}
339 	tdb_unref(tdbp);
340 	KERNEL_UNLOCK();
341 	return prot;
342 
343  drop:
344 	m_freemp(mp);
345 	ipsecstat_inc(ipsec_idrops);
346 	if (tdbp != NULL)
347 		tdbstat_inc(tdbp, tdb_idrops);
348 	tdb_unref(tdbp);
349 	return IPPROTO_DONE;
350 }
351 
352 /*
353  * IPsec input callback, called by the transform callback. Takes care of
354  * filtering and other sanity checks on the processed packet.
355  */
356 int
357 ipsec_common_input_cb(struct mbuf **mp, struct tdb *tdbp, int skip, int protoff)
358 {
359 	struct mbuf *m = *mp;
360 	int af, sproto;
361 	u_int8_t prot;
362 #if NBPFILTER > 0
363 	struct ifnet *encif;
364 #endif
365 	struct ip *ip;
366 #ifdef INET6
367 	struct ip6_hdr *ip6;
368 #endif /* INET6 */
369 	struct m_tag *mtag;
370 	struct tdb_ident *tdbi;
371 #ifdef ENCDEBUG
372 	char buf[INET6_ADDRSTRLEN];
373 #endif
374 
375 	af = tdbp->tdb_dst.sa.sa_family;
376 	sproto = tdbp->tdb_sproto;
377 
378 	tdbp->tdb_last_used = gettime();
379 
380 	/* Fix IPv4 header */
381 	if (af == AF_INET) {
382 		if (m->m_len < skip &&
383 		    (m = *mp = m_pullup(m, skip)) == NULL) {
384 			DPRINTF("processing failed for SA %s/%08x",
385 			    ipsp_address(&tdbp->tdb_dst, buf, sizeof(buf)),
386 			    ntohl(tdbp->tdb_spi));
387 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
388 			goto baddone;
389 		}
390 
391 		ip = mtod(m, struct ip *);
392 		ip->ip_len = htons(m->m_pkthdr.len);
393 		ip->ip_sum = 0;
394 		ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
395 		prot = ip->ip_p;
396 	}
397 
398 #ifdef INET6
399 	/* Fix IPv6 header */
400 	if (af == AF_INET6) {
401 		if (m->m_len < sizeof(struct ip6_hdr) &&
402 		    (m = *mp = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
403 
404 			DPRINTF("processing failed for SA %s/%08x",
405 			    ipsp_address(&tdbp->tdb_dst, buf, sizeof(buf)),
406 			    ntohl(tdbp->tdb_spi));
407 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
408 			goto baddone;
409 		}
410 
411 		ip6 = mtod(m, struct ip6_hdr *);
412 		ip6->ip6_plen = htons(m->m_pkthdr.len - skip);
413 
414 		/* Save protocol */
415 		m_copydata(m, protoff, 1, (caddr_t) &prot);
416 	}
417 #endif /* INET6 */
418 
419 	/*
420 	 * Fix TCP/UDP checksum of UDP encapsulated transport mode ESP packet.
421 	 * (RFC3948 3.1.2)
422 	 */
423 	if ((af == AF_INET || af == AF_INET6) &&
424 	    (tdbp->tdb_flags & TDBF_UDPENCAP) &&
425 	    (tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
426 		u_int16_t cksum;
427 
428 		switch (prot) {
429 		case IPPROTO_UDP:
430 			if (m->m_pkthdr.len < skip + sizeof(struct udphdr)) {
431 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
432 				    ipcomps_hdrops);
433 				goto baddone;
434 			}
435 			cksum = 0;
436 			m_copyback(m, skip + offsetof(struct udphdr, uh_sum),
437 			    sizeof(cksum), &cksum, M_NOWAIT);
438 #ifdef INET6
439 			if (af == AF_INET6) {
440 				cksum = in6_cksum(m, IPPROTO_UDP, skip,
441 				    m->m_pkthdr.len - skip);
442 				m_copyback(m, skip + offsetof(struct udphdr,
443 				    uh_sum), sizeof(cksum), &cksum, M_NOWAIT);
444 			}
445 #endif
446 			break;
447 		case IPPROTO_TCP:
448 			if (m->m_pkthdr.len < skip + sizeof(struct tcphdr)) {
449 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
450 				    ipcomps_hdrops);
451 				goto baddone;
452 			}
453 			cksum = 0;
454 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
455 			    sizeof(cksum), &cksum, M_NOWAIT);
456 			if (af == AF_INET)
457 				cksum = in4_cksum(m, IPPROTO_TCP, skip,
458 				    m->m_pkthdr.len - skip);
459 #ifdef INET6
460 			else if (af == AF_INET6)
461 				cksum = in6_cksum(m, IPPROTO_TCP, skip,
462 				    m->m_pkthdr.len - skip);
463 #endif
464 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
465 			    sizeof(cksum), &cksum, M_NOWAIT);
466 			break;
467 		}
468 	}
469 
470 	/*
471 	 * Record what we've done to the packet (under what SA it was
472 	 * processed).
473 	 */
474 	if (tdbp->tdb_sproto != IPPROTO_IPCOMP) {
475 		mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE,
476 		    sizeof(struct tdb_ident), M_NOWAIT);
477 		if (mtag == NULL) {
478 			DPRINTF("failed to get tag");
479 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
480 			goto baddone;
481 		}
482 
483 		tdbi = (struct tdb_ident *)(mtag + 1);
484 		tdbi->dst = tdbp->tdb_dst;
485 		tdbi->proto = tdbp->tdb_sproto;
486 		tdbi->spi = tdbp->tdb_spi;
487 		tdbi->rdomain = tdbp->tdb_rdomain;
488 
489 		m_tag_prepend(m, mtag);
490 	}
491 
492 	switch (sproto) {
493 	case IPPROTO_ESP:
494 		/* Packet is confidential ? */
495 		if (tdbp->tdb_encalgxform)
496 			m->m_flags |= M_CONF;
497 
498 		/* Check if we had authenticated ESP. */
499 		if (tdbp->tdb_authalgxform)
500 			m->m_flags |= M_AUTH;
501 		break;
502 	case IPPROTO_AH:
503 		m->m_flags |= M_AUTH;
504 		break;
505 	case IPPROTO_IPCOMP:
506 		m->m_flags |= M_COMP;
507 		break;
508 	default:
509 		panic("%s: unknown/unsupported security protocol %d",
510 		    __func__, sproto);
511 	}
512 
513 #if NPF > 0
514 	/* Add pf tag if requested. */
515 	pf_tag_packet(m, tdbp->tdb_tag, -1);
516 	pf_pkt_addr_changed(m);
517 #endif
518 	if (tdbp->tdb_rdomain != tdbp->tdb_rdomain_post)
519 		m->m_pkthdr.ph_rtableid = tdbp->tdb_rdomain_post;
520 
521 	if (tdbp->tdb_flags & TDBF_TUNNELING)
522 		m->m_flags |= M_TUNNEL;
523 
524 	ipsecstat_add(ipsec_idecompbytes, m->m_pkthdr.len);
525 	tdbstat_add(tdbp, tdb_idecompbytes, m->m_pkthdr.len);
526 
527 #if NBPFILTER > 0
528 	encif = enc_getif(tdbp->tdb_rdomain_post, tdbp->tdb_tap);
529 	if (encif != NULL) {
530 		encif->if_ipackets++;
531 		encif->if_ibytes += m->m_pkthdr.len;
532 
533 		if (sproto != IPPROTO_IPCOMP) {
534 			/* XXX This conflicts with the scoped nature of IPv6 */
535 			m->m_pkthdr.ph_ifidx = encif->if_index;
536 		}
537 		if (encif->if_bpf) {
538 			struct enchdr hdr;
539 
540 			hdr.af = af;
541 			hdr.spi = tdbp->tdb_spi;
542 			hdr.flags = m->m_flags & (M_AUTH|M_CONF);
543 
544 			bpf_mtap_hdr(encif->if_bpf, (char *)&hdr,
545 			    ENC_HDRLEN, m, BPF_DIRECTION_IN);
546 		}
547 	}
548 #endif
549 
550 #if NPF > 0
551 	/*
552 	 * The ip_deliver() shortcut avoids running through ip_input() with the
553 	 * same IP header twice.  Packets in transport mode have to be be
554 	 * passed to pf explicitly.  In tunnel mode the inner IP header will
555 	 * run through ip_input() and pf anyway.
556 	 */
557 	if ((tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
558 		struct ifnet *ifp;
559 
560 		/* This is the enc0 interface unless for ipcomp. */
561 		if ((ifp = if_get(m->m_pkthdr.ph_ifidx)) == NULL) {
562 			goto baddone;
563 		}
564 		if (pf_test(af, PF_IN, ifp, mp) != PF_PASS) {
565 			if_put(ifp);
566 			goto baddone;
567 		}
568 		m = *mp;
569 		if_put(ifp);
570 		if (m == NULL)
571 			return IPPROTO_DONE;
572 	}
573 #endif
574 	/* Return to the appropriate protocol handler in deliver loop. */
575 	return prot;
576 
577  baddone:
578 	m_freemp(mp);
579 	return IPPROTO_DONE;
580 #undef IPSEC_ISTAT
581 }
582 
583 int
584 ipsec_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
585     size_t newlen)
586 {
587 	int error;
588 
589 	switch (name[0]) {
590 	case IPCTL_IPSEC_ENC_ALGORITHM:
591 		NET_LOCK();
592 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
593 		    ipsec_def_enc, sizeof(ipsec_def_enc));
594 		NET_UNLOCK();
595 		return (error);
596 	case IPCTL_IPSEC_AUTH_ALGORITHM:
597 		NET_LOCK();
598 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
599 		    ipsec_def_auth, sizeof(ipsec_def_auth));
600 		NET_UNLOCK();
601 		return (error);
602 	case IPCTL_IPSEC_IPCOMP_ALGORITHM:
603 		NET_LOCK();
604 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
605 		    ipsec_def_comp, sizeof(ipsec_def_comp));
606 		NET_UNLOCK();
607 		return (error);
608 	case IPCTL_IPSEC_STATS:
609 		return (ipsec_sysctl_ipsecstat(oldp, oldlenp, newp));
610 	default:
611 		NET_LOCK();
612 		error = sysctl_bounded_arr(ipsecctl_vars, nitems(ipsecctl_vars),
613 		    name, namelen, oldp, oldlenp, newp, newlen);
614 		NET_UNLOCK();
615 		return (error);
616 	}
617 }
618 
619 int
620 esp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
621     size_t newlen)
622 {
623 	int error;
624 
625 	/* All sysctl names at this level are terminal. */
626 	if (namelen != 1)
627 		return (ENOTDIR);
628 
629 	switch (name[0]) {
630 	case ESPCTL_STATS:
631 		return (esp_sysctl_espstat(oldp, oldlenp, newp));
632 	default:
633 		NET_LOCK();
634 		error = sysctl_bounded_arr(espctl_vars, nitems(espctl_vars),
635 		    name, namelen, oldp, oldlenp, newp, newlen);
636 		NET_UNLOCK();
637 		return (error);
638 	}
639 }
640 
641 int
642 esp_sysctl_espstat(void *oldp, size_t *oldlenp, void *newp)
643 {
644 	struct espstat espstat;
645 
646 	CTASSERT(sizeof(espstat) == (esps_ncounters * sizeof(uint64_t)));
647 	memset(&espstat, 0, sizeof espstat);
648 	counters_read(espcounters, (uint64_t *)&espstat, esps_ncounters);
649 	return (sysctl_rdstruct(oldp, oldlenp, newp, &espstat,
650 	    sizeof(espstat)));
651 }
652 
653 int
654 ah_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
655     size_t newlen)
656 {
657 	int error;
658 
659 	/* All sysctl names at this level are terminal. */
660 	if (namelen != 1)
661 		return (ENOTDIR);
662 
663 	switch (name[0]) {
664 	case AHCTL_STATS:
665 		return ah_sysctl_ahstat(oldp, oldlenp, newp);
666 	default:
667 		NET_LOCK();
668 		error = sysctl_bounded_arr(ahctl_vars, nitems(ahctl_vars), name,
669 		    namelen, oldp, oldlenp, newp, newlen);
670 		NET_UNLOCK();
671 		return (error);
672 	}
673 }
674 
675 int
676 ah_sysctl_ahstat(void *oldp, size_t *oldlenp, void *newp)
677 {
678 	struct ahstat ahstat;
679 
680 	CTASSERT(sizeof(ahstat) == (ahs_ncounters * sizeof(uint64_t)));
681 	memset(&ahstat, 0, sizeof ahstat);
682 	counters_read(ahcounters, (uint64_t *)&ahstat, ahs_ncounters);
683 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ahstat, sizeof(ahstat)));
684 }
685 
686 int
687 ipcomp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
688     size_t newlen)
689 {
690 	int error;
691 
692 	/* All sysctl names at this level are terminal. */
693 	if (namelen != 1)
694 		return (ENOTDIR);
695 
696 	switch (name[0]) {
697 	case IPCOMPCTL_STATS:
698 		return ipcomp_sysctl_ipcompstat(oldp, oldlenp, newp);
699 	default:
700 		NET_LOCK();
701 		error = sysctl_bounded_arr(ipcompctl_vars,
702 		    nitems(ipcompctl_vars), name, namelen, oldp, oldlenp,
703 		    newp, newlen);
704 		NET_UNLOCK();
705 		return (error);
706 	}
707 }
708 
709 int
710 ipcomp_sysctl_ipcompstat(void *oldp, size_t *oldlenp, void *newp)
711 {
712 	struct ipcompstat ipcompstat;
713 
714 	CTASSERT(sizeof(ipcompstat) == (ipcomps_ncounters * sizeof(uint64_t)));
715 	memset(&ipcompstat, 0, sizeof ipcompstat);
716 	counters_read(ipcompcounters, (uint64_t *)&ipcompstat,
717 	    ipcomps_ncounters);
718 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ipcompstat,
719 	    sizeof(ipcompstat)));
720 }
721 
722 int
723 ipsec_sysctl_ipsecstat(void *oldp, size_t *oldlenp, void *newp)
724 {
725 	struct ipsecstat ipsecstat;
726 
727 	CTASSERT(sizeof(ipsecstat) == (ipsec_ncounters * sizeof(uint64_t)));
728 	memset(&ipsecstat, 0, sizeof ipsecstat);
729 	counters_read(ipseccounters, (uint64_t *)&ipsecstat, ipsec_ncounters);
730 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ipsecstat,
731 	    sizeof(ipsecstat)));
732 }
733 
734 int
735 ipsec_input_disabled(struct mbuf **mp, int *offp, int proto, int af)
736 {
737 	switch (af) {
738 	case AF_INET:
739 		return rip_input(mp, offp, proto, af);
740 #ifdef INET6
741 	case AF_INET6:
742 		return rip6_input(mp, offp, proto, af);
743 #endif
744 	default:
745 		unhandled_af(af);
746 	}
747 }
748 
749 int
750 ah46_input(struct mbuf **mp, int *offp, int proto, int af)
751 {
752 	int protoff;
753 
754 	if (
755 #if NPF > 0
756 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
757 #endif
758 	    !ah_enable)
759 		return ipsec_input_disabled(mp, offp, proto, af);
760 
761 	protoff = ipsec_protoff(*mp, *offp, af);
762 	if (protoff < 0) {
763 		DPRINTF("bad packet header chain");
764 		ahstat_inc(ahs_hdrops);
765 		m_freemp(mp);
766 		return IPPROTO_DONE;
767 	}
768 
769 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
770 }
771 
772 void
773 ah4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
774 {
775 	if (sa->sa_family != AF_INET ||
776 	    sa->sa_len != sizeof(struct sockaddr_in))
777 		return;
778 
779 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_AH);
780 }
781 
782 int
783 esp46_input(struct mbuf **mp, int *offp, int proto, int af)
784 {
785 	int protoff;
786 
787 	if (
788 #if NPF > 0
789 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
790 #endif
791 	    !esp_enable)
792 		return ipsec_input_disabled(mp, offp, proto, af);
793 
794 	protoff = ipsec_protoff(*mp, *offp, af);
795 	if (protoff < 0) {
796 		DPRINTF("bad packet header chain");
797 		espstat_inc(esps_hdrops);
798 		m_freemp(mp);
799 		return IPPROTO_DONE;
800 	}
801 
802 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
803 }
804 
805 /* IPv4 IPCOMP wrapper */
806 int
807 ipcomp46_input(struct mbuf **mp, int *offp, int proto, int af)
808 {
809 	int protoff;
810 
811 	if (
812 #if NPF > 0
813 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
814 #endif
815 	    !ipcomp_enable)
816 		return ipsec_input_disabled(mp, offp, proto, af);
817 
818 	protoff = ipsec_protoff(*mp, *offp, af);
819 	if (protoff < 0) {
820 		DPRINTF("bad packet header chain");
821 		ipcompstat_inc(ipcomps_hdrops);
822 		m_freemp(mp);
823 		return IPPROTO_DONE;
824 	}
825 
826 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
827 }
828 
829 void
830 ipsec_set_mtu(struct tdb *tdbp, u_int32_t mtu)
831 {
832 	ssize_t adjust;
833 
834 	NET_ASSERT_LOCKED();
835 
836 	/* Walk the chain backwards to the first tdb */
837 	for (; tdbp != NULL; tdbp = tdbp->tdb_inext) {
838 		if (tdbp->tdb_flags & TDBF_INVALID ||
839 		    (adjust = ipsec_hdrsz(tdbp)) == -1)
840 			return;
841 
842 		mtu -= adjust;
843 
844 		/* Store adjusted MTU in tdb */
845 		tdbp->tdb_mtu = mtu;
846 		tdbp->tdb_mtutimeout = gettime() + ip_mtudisc_timeout;
847 		DPRINTF("spi %08x mtu %d adjust %ld",
848 		    ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust);
849 	}
850 }
851 
852 void
853 ipsec_common_ctlinput(u_int rdomain, int cmd, struct sockaddr *sa,
854     void *v, int proto)
855 {
856 	struct ip *ip = v;
857 
858 	if (cmd == PRC_MSGSIZE && ip && ip_mtudisc && ip->ip_v == 4) {
859 		struct tdb *tdbp;
860 		struct sockaddr_in dst;
861 		struct icmp *icp;
862 		int hlen = ip->ip_hl << 2;
863 		u_int32_t spi, mtu;
864 
865 		/* Find the right MTU. */
866 		icp = (struct icmp *)((caddr_t) ip -
867 		    offsetof(struct icmp, icmp_ip));
868 		mtu = ntohs(icp->icmp_nextmtu);
869 
870 		/*
871 		 * Ignore the packet, if we do not receive a MTU
872 		 * or the MTU is too small to be acceptable.
873 		 */
874 		if (mtu < 296)
875 			return;
876 
877 		memset(&dst, 0, sizeof(struct sockaddr_in));
878 		dst.sin_family = AF_INET;
879 		dst.sin_len = sizeof(struct sockaddr_in);
880 		dst.sin_addr.s_addr = ip->ip_dst.s_addr;
881 
882 		memcpy(&spi, (caddr_t)ip + hlen, sizeof(u_int32_t));
883 
884 		tdbp = gettdb_rev(rdomain, spi, (union sockaddr_union *)&dst,
885 		    proto);
886 		ipsec_set_mtu(tdbp, mtu);
887 		tdb_unref(tdbp);
888 	}
889 }
890 
891 void
892 udpencap_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
893 {
894 	struct ip *ip = v;
895 	struct tdb *tdbp, *first;
896 	struct icmp *icp;
897 	u_int32_t mtu;
898 	struct sockaddr_in dst, src;
899 	union sockaddr_union *su_dst, *su_src;
900 
901 	NET_ASSERT_LOCKED();
902 
903 	icp = (struct icmp *)((caddr_t) ip - offsetof(struct icmp, icmp_ip));
904 	mtu = ntohs(icp->icmp_nextmtu);
905 
906 	/*
907 	 * Ignore the packet, if we do not receive a MTU
908 	 * or the MTU is too small to be acceptable.
909 	 */
910 	if (mtu < 296)
911 		return;
912 
913 	memset(&dst, 0, sizeof(dst));
914 	dst.sin_family = AF_INET;
915 	dst.sin_len = sizeof(struct sockaddr_in);
916 	dst.sin_addr.s_addr = ip->ip_dst.s_addr;
917 	su_dst = (union sockaddr_union *)&dst;
918 	memset(&src, 0, sizeof(src));
919 	src.sin_family = AF_INET;
920 	src.sin_len = sizeof(struct sockaddr_in);
921 	src.sin_addr.s_addr = ip->ip_src.s_addr;
922 	su_src = (union sockaddr_union *)&src;
923 
924 	first = gettdbbysrcdst_rev(rdomain, 0, su_src, su_dst, IPPROTO_ESP);
925 
926 	mtx_enter(&tdb_sadb_mtx);
927 	for (tdbp = first; tdbp != NULL; tdbp = tdbp->tdb_snext) {
928 		if (tdbp->tdb_sproto == IPPROTO_ESP &&
929 		    ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_UDPENCAP)) ==
930 		    TDBF_UDPENCAP) &&
931 		    !memcmp(&tdbp->tdb_dst, &dst, su_dst->sa.sa_len) &&
932 		    !memcmp(&tdbp->tdb_src, &src, su_src->sa.sa_len))
933 			ipsec_set_mtu(tdbp, mtu);
934 	}
935 	mtx_leave(&tdb_sadb_mtx);
936 	tdb_unref(first);
937 }
938 
939 void
940 esp4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
941 {
942 	if (sa->sa_family != AF_INET ||
943 	    sa->sa_len != sizeof(struct sockaddr_in))
944 		return;
945 
946 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_ESP);
947 }
948 
949 /* Find the offset of the next protocol field in the previous header. */
950 int
951 ipsec_protoff(struct mbuf *m, int off, int af)
952 {
953 #ifdef INET6
954 	struct ip6_ext ip6e;
955 	int protoff, nxt, l;
956 #endif /* INET6 */
957 
958 	switch (af) {
959 	case AF_INET:
960 		return offsetof(struct ip, ip_p);
961 #ifdef INET6
962 	case AF_INET6:
963 		break;
964 #endif /* INET6 */
965 	default:
966 		unhandled_af(af);
967 	}
968 
969 #ifdef INET6
970 	if (off < sizeof(struct ip6_hdr))
971 		return -1;
972 
973 	if (off == sizeof(struct ip6_hdr))
974 		return offsetof(struct ip6_hdr, ip6_nxt);
975 
976 	/* Chase down the header chain... */
977 	protoff = sizeof(struct ip6_hdr);
978 	nxt = (mtod(m, struct ip6_hdr *))->ip6_nxt;
979 	l = 0;
980 
981 	do {
982 		protoff += l;
983 		m_copydata(m, protoff, sizeof(ip6e),
984 		    (caddr_t) &ip6e);
985 
986 		if (nxt == IPPROTO_AH)
987 			l = (ip6e.ip6e_len + 2) << 2;
988 		else
989 			l = (ip6e.ip6e_len + 1) << 3;
990 #ifdef DIAGNOSTIC
991 		if (l <= 0)
992 			panic("%s: l went zero or negative", __func__);
993 #endif
994 
995 		nxt = ip6e.ip6e_nxt;
996 	} while (protoff + l < off);
997 
998 	/* Malformed packet check */
999 	if (protoff + l != off)
1000 		return -1;
1001 
1002 	protoff += offsetof(struct ip6_ext, ip6e_nxt);
1003 	return protoff;
1004 #endif /* INET6 */
1005 }
1006 
1007 int
1008 ipsec_forward_check(struct mbuf *m, int hlen, int af)
1009 {
1010 	struct tdb *tdb;
1011 	struct tdb_ident *tdbi;
1012 	struct m_tag *mtag;
1013 	int error = 0;
1014 
1015 	/*
1016 	 * IPsec policy check for forwarded packets. Look at
1017 	 * inner-most IPsec SA used.
1018 	 */
1019 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1020 	if (mtag != NULL) {
1021 		tdbi = (struct tdb_ident *)(mtag + 1);
1022 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst, tdbi->proto);
1023 	} else
1024 		tdb = NULL;
1025 	error = ipsp_spd_lookup(m, af, hlen, IPSP_DIRECTION_IN,
1026 	    tdb, NULL, NULL, 0);
1027 	tdb_unref(tdb);
1028 
1029 	return error;
1030 }
1031 
1032 int
1033 ipsec_local_check(struct mbuf *m, int hlen, int proto, int af)
1034 {
1035 	struct tdb *tdb;
1036 	struct tdb_ident *tdbi;
1037 	struct m_tag *mtag;
1038 	int error = 0;
1039 
1040 	/*
1041 	 * If it's a protected packet for us, skip the policy check.
1042 	 * That's because we really only care about the properties of
1043 	 * the protected packet, and not the intermediate versions.
1044 	 * While this is not the most paranoid setting, it allows
1045 	 * some flexibility in handling nested tunnels (in setting up
1046 	 * the policies).
1047 	 */
1048 	if ((proto == IPPROTO_ESP) || (proto == IPPROTO_AH) ||
1049 	    (proto == IPPROTO_IPCOMP))
1050 		return 0;
1051 
1052 	/*
1053 	 * If the protected packet was tunneled, then we need to
1054 	 * verify the protected packet's information, not the
1055 	 * external headers. Thus, skip the policy lookup for the
1056 	 * external packet, and keep the IPsec information linked on
1057 	 * the packet header (the encapsulation routines know how
1058 	 * to deal with that).
1059 	 */
1060 	if ((proto == IPPROTO_IPV4) || (proto == IPPROTO_IPV6))
1061 		return 0;
1062 
1063 	/*
1064 	 * When processing IPv6 header chains, do not look at the
1065 	 * outer header.  The inner protocol is relevant and will
1066 	 * be checked by the local delivery loop later.
1067 	 */
1068 	if ((af == AF_INET6) && ((proto == IPPROTO_DSTOPTS) ||
1069 	    (proto == IPPROTO_ROUTING) || (proto == IPPROTO_FRAGMENT)))
1070 		return 0;
1071 
1072 	/*
1073 	 * If the protected packet is TCP or UDP, we'll do the
1074 	 * policy check in the respective input routine, so we can
1075 	 * check for bypass sockets.
1076 	 */
1077 	if ((proto == IPPROTO_TCP) || (proto == IPPROTO_UDP))
1078 		return 0;
1079 
1080 	/*
1081 	 * IPsec policy check for local-delivery packets. Look at the
1082 	 * inner-most SA that protected the packet. This is in fact
1083 	 * a bit too restrictive (it could end up causing packets to
1084 	 * be dropped that semantically follow the policy, e.g., in
1085 	 * certain SA-bundle configurations); but the alternative is
1086 	 * very complicated (and requires keeping track of what
1087 	 * kinds of tunneling headers have been seen in-between the
1088 	 * IPsec headers), and I don't think we lose much functionality
1089 	 * that's needed in the real world (who uses bundles anyway ?).
1090 	 */
1091 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1092 	if (mtag) {
1093 		tdbi = (struct tdb_ident *)(mtag + 1);
1094 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst,
1095 		    tdbi->proto);
1096 	} else
1097 		tdb = NULL;
1098 	error = ipsp_spd_lookup(m, af, hlen, IPSP_DIRECTION_IN,
1099 	    tdb, NULL, NULL, 0);
1100 	tdb_unref(tdb);
1101 
1102 	return error;
1103 }
1104