xref: /openbsd-src/sys/netinet/ipsec_input.c (revision f90ef06a3045119dcc88b72d8b98ca60e3c00d5a)
1 /*	$OpenBSD: ipsec_input.c,v 1.205 2023/08/07 03:43:57 dlg Exp $	*/
2 /*
3  * The authors of this code are John Ioannidis (ji@tla.org),
4  * Angelos D. Keromytis (kermit@csd.uch.gr) and
5  * Niels Provos (provos@physnet.uni-hamburg.de).
6  *
7  * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
8  * in November 1995.
9  *
10  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
11  * by Angelos D. Keromytis.
12  *
13  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
14  * and Niels Provos.
15  *
16  * Additional features in 1999 by Angelos D. Keromytis.
17  *
18  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
19  * Angelos D. Keromytis and Niels Provos.
20  * Copyright (c) 2001, Angelos D. Keromytis.
21  *
22  * Permission to use, copy, and modify this software with or without fee
23  * is hereby granted, provided that this entire notice is included in
24  * all copies of any software which is or includes a copy or
25  * modification of this software.
26  * You may use this code under the GNU public license if you so wish. Please
27  * contribute changes back to the authors under this freer than GPL license
28  * so that we may further the use of strong encryption without limitations to
29  * all.
30  *
31  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
32  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
33  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
34  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
35  * PURPOSE.
36  */
37 
38 #include "pf.h"
39 #include "sec.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/protosw.h>
44 #include <sys/mbuf.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
47 #include <sys/kernel.h>
48 #include <sys/timeout.h>
49 
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/netisr.h>
53 #include <net/bpf.h>
54 #include <net/route.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/ip_icmp.h>
60 #include <netinet/tcp.h>
61 #include <netinet/udp.h>
62 
63 #if NPF > 0
64 #include <net/pfvar.h>
65 #endif
66 
67 #if NSEC > 0
68 #include <net/if_sec.h>
69 #endif
70 
71 #ifdef INET6
72 #include <netinet6/in6_var.h>
73 #include <netinet/ip6.h>
74 #include <netinet6/ip6_var.h>
75 #endif /* INET6 */
76 
77 #include <netinet/ip_ipsp.h>
78 #include <netinet/ip_esp.h>
79 #include <netinet/ip_ah.h>
80 #include <netinet/ip_ipcomp.h>
81 
82 #include <net/if_enc.h>
83 
84 #include <crypto/cryptodev.h>
85 #include <crypto/xform.h>
86 
87 #include "bpfilter.h"
88 
89 void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
90 
91 #ifdef ENCDEBUG
92 #define DPRINTF(fmt, args...)						\
93 	do {								\
94 		if (encdebug)						\
95 			printf("%s: " fmt "\n", __func__, ## args);	\
96 	} while (0)
97 #else
98 #define DPRINTF(fmt, args...)						\
99 	do { } while (0)
100 #endif
101 
102 /* sysctl variables */
103 int encdebug = 0;
104 int ipsec_keep_invalid = IPSEC_DEFAULT_EMBRYONIC_SA_TIMEOUT;
105 int ipsec_require_pfs = IPSEC_DEFAULT_PFS;
106 int ipsec_soft_allocations = IPSEC_DEFAULT_SOFT_ALLOCATIONS;
107 int ipsec_exp_allocations = IPSEC_DEFAULT_EXP_ALLOCATIONS;
108 int ipsec_soft_bytes = IPSEC_DEFAULT_SOFT_BYTES;
109 int ipsec_exp_bytes = IPSEC_DEFAULT_EXP_BYTES;
110 int ipsec_soft_timeout = IPSEC_DEFAULT_SOFT_TIMEOUT;
111 int ipsec_exp_timeout = IPSEC_DEFAULT_EXP_TIMEOUT;
112 int ipsec_soft_first_use = IPSEC_DEFAULT_SOFT_FIRST_USE;
113 int ipsec_exp_first_use = IPSEC_DEFAULT_EXP_FIRST_USE;
114 int ipsec_expire_acquire = IPSEC_DEFAULT_EXPIRE_ACQUIRE;
115 
116 int esp_enable = 1;
117 int ah_enable = 1;
118 int ipcomp_enable = 0;
119 
120 const struct sysctl_bounded_args espctl_vars[] = {
121 	{ESPCTL_ENABLE, &esp_enable, 0, 1},
122 	{ESPCTL_UDPENCAP_ENABLE, &udpencap_enable, 0, 1},
123 	{ESPCTL_UDPENCAP_PORT, &udpencap_port, 0, 65535},
124 };
125 const struct sysctl_bounded_args ahctl_vars[] = {
126 	{AHCTL_ENABLE, &ah_enable, 0, 1},
127 };
128 const struct sysctl_bounded_args ipcompctl_vars[] = {
129 	{IPCOMPCTL_ENABLE, &ipcomp_enable, 0, 1},
130 };
131 
132 struct cpumem *espcounters;
133 struct cpumem *ahcounters;
134 struct cpumem *ipcompcounters;
135 struct cpumem *ipseccounters;
136 
137 char ipsec_def_enc[20];
138 char ipsec_def_auth[20];
139 char ipsec_def_comp[20];
140 
141 const struct sysctl_bounded_args ipsecctl_vars[] = {
142 	{ IPSEC_ENCDEBUG, &encdebug, 0, 1 },
143 	{ IPSEC_EXPIRE_ACQUIRE, &ipsec_expire_acquire, 0, INT_MAX },
144 	{ IPSEC_EMBRYONIC_SA_TIMEOUT, &ipsec_keep_invalid, 0, INT_MAX },
145 	{ IPSEC_REQUIRE_PFS, &ipsec_require_pfs, 0, 1 },
146 	{ IPSEC_SOFT_ALLOCATIONS, &ipsec_soft_allocations, 0, INT_MAX },
147 	{ IPSEC_ALLOCATIONS, &ipsec_exp_allocations, 0, INT_MAX },
148 	{ IPSEC_SOFT_BYTES, &ipsec_soft_bytes, 0, INT_MAX },
149 	{ IPSEC_BYTES, &ipsec_exp_bytes, 0, INT_MAX },
150 	{ IPSEC_TIMEOUT, &ipsec_exp_timeout, 0, INT_MAX },
151 	{ IPSEC_SOFT_TIMEOUT, &ipsec_soft_timeout,0, INT_MAX },
152 	{ IPSEC_SOFT_FIRSTUSE, &ipsec_soft_first_use, 0, INT_MAX },
153 	{ IPSEC_FIRSTUSE, &ipsec_exp_first_use, 0, INT_MAX },
154 };
155 
156 int esp_sysctl_espstat(void *, size_t *, void *);
157 int ah_sysctl_ahstat(void *, size_t *, void *);
158 int ipcomp_sysctl_ipcompstat(void *, size_t *, void *);
159 int ipsec_sysctl_ipsecstat(void *, size_t *, void *);
160 
161 void
162 ipsec_init(void)
163 {
164 	espcounters = counters_alloc(esps_ncounters);
165 	ahcounters = counters_alloc(ahs_ncounters);
166 	ipcompcounters = counters_alloc(ipcomps_ncounters);
167 	ipseccounters = counters_alloc(ipsec_ncounters);
168 
169 	strlcpy(ipsec_def_enc, IPSEC_DEFAULT_DEF_ENC, sizeof(ipsec_def_enc));
170 	strlcpy(ipsec_def_auth, IPSEC_DEFAULT_DEF_AUTH, sizeof(ipsec_def_auth));
171 	strlcpy(ipsec_def_comp, IPSEC_DEFAULT_DEF_COMP, sizeof(ipsec_def_comp));
172 
173 	ipsp_init();
174 }
175 
176 /*
177  * ipsec_common_input() gets called when we receive an IPsec-protected packet
178  * in IPv4 or IPv6. All it does is find the right TDB and call the appropriate
179  * transform. The callback takes care of further processing (like ingress
180  * filtering).
181  */
182 int
183 ipsec_common_input(struct mbuf **mp, int skip, int protoff, int af, int sproto,
184     int udpencap)
185 {
186 #define IPSEC_ISTAT(x,y,z) do {			\
187 	if (sproto == IPPROTO_ESP)		\
188 		espstat_inc(x);			\
189 	else if (sproto == IPPROTO_AH)		\
190 		ahstat_inc(y);			\
191 	else					\
192 		ipcompstat_inc(z);		\
193 } while (0)
194 
195 	struct mbuf *m = *mp;
196 	union sockaddr_union dst_address;
197 	struct tdb *tdbp = NULL;
198 	u_int32_t spi;
199 	u_int16_t cpi;
200 	int prot;
201 #ifdef ENCDEBUG
202 	char buf[INET6_ADDRSTRLEN];
203 #endif
204 
205 	NET_ASSERT_LOCKED();
206 
207 	ipsecstat_pkt(ipsec_ipackets, ipsec_ibytes, m->m_pkthdr.len);
208 	IPSEC_ISTAT(esps_input, ahs_input, ipcomps_input);
209 
210 	if ((sproto == IPPROTO_IPCOMP) && (m->m_flags & M_COMP)) {
211 		DPRINTF("repeated decompression");
212 		ipcompstat_inc(ipcomps_pdrops);
213 		goto drop;
214 	}
215 
216 	if (m->m_pkthdr.len - skip < 2 * sizeof(u_int32_t)) {
217 		DPRINTF("packet too small");
218 		IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
219 		goto drop;
220 	}
221 
222 	/* Retrieve the SPI from the relevant IPsec header */
223 	switch (sproto) {
224 	case IPPROTO_ESP:
225 		m_copydata(m, skip, sizeof(u_int32_t), (caddr_t) &spi);
226 		break;
227 	case IPPROTO_AH:
228 		m_copydata(m, skip + sizeof(u_int32_t), sizeof(u_int32_t),
229 		    (caddr_t) &spi);
230 		break;
231 	case IPPROTO_IPCOMP:
232 		m_copydata(m, skip + sizeof(u_int16_t), sizeof(u_int16_t),
233 		    (caddr_t) &cpi);
234 		spi = ntohl(htons(cpi));
235 		break;
236 	default:
237 		panic("%s: unknown/unsupported security protocol %d",
238 		    __func__, sproto);
239 	}
240 
241 	/*
242 	 * Find tunnel control block and (indirectly) call the appropriate
243 	 * kernel crypto routine. The resulting mbuf chain is a valid
244 	 * IP packet ready to go through input processing.
245 	 */
246 
247 	memset(&dst_address, 0, sizeof(dst_address));
248 	dst_address.sa.sa_family = af;
249 
250 	switch (af) {
251 	case AF_INET:
252 		dst_address.sin.sin_len = sizeof(struct sockaddr_in);
253 		m_copydata(m, offsetof(struct ip, ip_dst),
254 		    sizeof(struct in_addr),
255 		    (caddr_t) &(dst_address.sin.sin_addr));
256 		break;
257 
258 #ifdef INET6
259 	case AF_INET6:
260 		dst_address.sin6.sin6_len = sizeof(struct sockaddr_in6);
261 		m_copydata(m, offsetof(struct ip6_hdr, ip6_dst),
262 		    sizeof(struct in6_addr),
263 		    (caddr_t) &(dst_address.sin6.sin6_addr));
264 		in6_recoverscope(&dst_address.sin6,
265 		    &dst_address.sin6.sin6_addr);
266 		break;
267 #endif /* INET6 */
268 
269 	default:
270 		DPRINTF("unsupported protocol family %d", af);
271 		IPSEC_ISTAT(esps_nopf, ahs_nopf, ipcomps_nopf);
272 		goto drop;
273 	}
274 
275 	tdbp = gettdb(rtable_l2(m->m_pkthdr.ph_rtableid),
276 	    spi, &dst_address, sproto);
277 	if (tdbp == NULL) {
278 		DPRINTF("could not find SA for packet to %s, spi %08x",
279 		    ipsp_address(&dst_address, buf, sizeof(buf)), ntohl(spi));
280 		IPSEC_ISTAT(esps_notdb, ahs_notdb, ipcomps_notdb);
281 		goto drop;
282 	}
283 
284 	if (tdbp->tdb_flags & TDBF_INVALID) {
285 		DPRINTF("attempted to use invalid SA %s/%08x/%u",
286 		    ipsp_address(&dst_address, buf, sizeof(buf)),
287 		    ntohl(spi), tdbp->tdb_sproto);
288 		IPSEC_ISTAT(esps_invalid, ahs_invalid, ipcomps_invalid);
289 		goto drop;
290 	}
291 
292 	if (udpencap && !(tdbp->tdb_flags & TDBF_UDPENCAP)) {
293 		DPRINTF("attempted to use non-udpencap SA %s/%08x/%u",
294 		    ipsp_address(&dst_address, buf, sizeof(buf)),
295 		    ntohl(spi), tdbp->tdb_sproto);
296 		espstat_inc(esps_udpinval);
297 		goto drop;
298 	}
299 
300 	if (!udpencap && (tdbp->tdb_flags & TDBF_UDPENCAP)) {
301 		DPRINTF("attempted to use udpencap SA %s/%08x/%u",
302 		    ipsp_address(&dst_address, buf, sizeof(buf)),
303 		    ntohl(spi), tdbp->tdb_sproto);
304 		espstat_inc(esps_udpneeded);
305 		goto drop;
306 	}
307 
308 	if (tdbp->tdb_xform == NULL) {
309 		DPRINTF("attempted to use uninitialized SA %s/%08x/%u",
310 		    ipsp_address(&dst_address, buf, sizeof(buf)),
311 		    ntohl(spi), tdbp->tdb_sproto);
312 		IPSEC_ISTAT(esps_noxform, ahs_noxform, ipcomps_noxform);
313 		goto drop;
314 	}
315 
316 	KERNEL_LOCK();
317 	/* Register first use, setup expiration timer. */
318 	if (tdbp->tdb_first_use == 0) {
319 		tdbp->tdb_first_use = gettime();
320 		if (tdbp->tdb_flags & TDBF_FIRSTUSE) {
321 			if (timeout_add_sec(&tdbp->tdb_first_tmo,
322 			    tdbp->tdb_exp_first_use))
323 				tdb_ref(tdbp);
324 		}
325 		if (tdbp->tdb_flags & TDBF_SOFT_FIRSTUSE) {
326 			if (timeout_add_sec(&tdbp->tdb_sfirst_tmo,
327 			    tdbp->tdb_soft_first_use))
328 				tdb_ref(tdbp);
329 		}
330 	}
331 
332 	tdbstat_pkt(tdbp, tdb_ipackets, tdb_ibytes, m->m_pkthdr.len);
333 
334 	/*
335 	 * Call appropriate transform and return -- callback takes care of
336 	 * everything else.
337 	 */
338 	prot = (*(tdbp->tdb_xform->xf_input))(mp, tdbp, skip, protoff);
339 	if (prot == IPPROTO_DONE) {
340 		ipsecstat_inc(ipsec_idrops);
341 		tdbstat_inc(tdbp, tdb_idrops);
342 	}
343 	tdb_unref(tdbp);
344 	KERNEL_UNLOCK();
345 	return prot;
346 
347  drop:
348 	m_freemp(mp);
349 	ipsecstat_inc(ipsec_idrops);
350 	if (tdbp != NULL)
351 		tdbstat_inc(tdbp, tdb_idrops);
352 	tdb_unref(tdbp);
353 	return IPPROTO_DONE;
354 }
355 
356 /*
357  * IPsec input callback, called by the transform callback. Takes care of
358  * filtering and other sanity checks on the processed packet.
359  */
360 int
361 ipsec_common_input_cb(struct mbuf **mp, struct tdb *tdbp, int skip, int protoff)
362 {
363 	struct mbuf *m = *mp;
364 	int af, sproto;
365 	u_int8_t prot;
366 #if NBPFILTER > 0
367 	struct ifnet *encif;
368 #endif
369 	struct ip *ip;
370 #ifdef INET6
371 	struct ip6_hdr *ip6;
372 #endif /* INET6 */
373 	struct m_tag *mtag;
374 	struct tdb_ident *tdbi;
375 #ifdef ENCDEBUG
376 	char buf[INET6_ADDRSTRLEN];
377 #endif
378 
379 	af = tdbp->tdb_dst.sa.sa_family;
380 	sproto = tdbp->tdb_sproto;
381 
382 	tdbp->tdb_last_used = gettime();
383 
384 	/* Fix IPv4 header */
385 	if (af == AF_INET) {
386 		if (m->m_len < skip &&
387 		    (m = *mp = m_pullup(m, skip)) == NULL) {
388 			DPRINTF("processing failed for SA %s/%08x",
389 			    ipsp_address(&tdbp->tdb_dst, buf, sizeof(buf)),
390 			    ntohl(tdbp->tdb_spi));
391 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
392 			goto baddone;
393 		}
394 
395 		ip = mtod(m, struct ip *);
396 		ip->ip_len = htons(m->m_pkthdr.len);
397 		in_hdr_cksum_out(m, NULL);
398 		prot = ip->ip_p;
399 	}
400 
401 #ifdef INET6
402 	/* Fix IPv6 header */
403 	if (af == AF_INET6) {
404 		if (m->m_len < sizeof(struct ip6_hdr) &&
405 		    (m = *mp = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
406 
407 			DPRINTF("processing failed for SA %s/%08x",
408 			    ipsp_address(&tdbp->tdb_dst, buf, sizeof(buf)),
409 			    ntohl(tdbp->tdb_spi));
410 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
411 			goto baddone;
412 		}
413 
414 		ip6 = mtod(m, struct ip6_hdr *);
415 		ip6->ip6_plen = htons(m->m_pkthdr.len - skip);
416 
417 		/* Save protocol */
418 		m_copydata(m, protoff, 1, (caddr_t) &prot);
419 	}
420 #endif /* INET6 */
421 
422 	/*
423 	 * Fix TCP/UDP checksum of UDP encapsulated transport mode ESP packet.
424 	 * (RFC3948 3.1.2)
425 	 */
426 	if ((af == AF_INET || af == AF_INET6) &&
427 	    (tdbp->tdb_flags & TDBF_UDPENCAP) &&
428 	    (tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
429 		u_int16_t cksum;
430 
431 		switch (prot) {
432 		case IPPROTO_UDP:
433 			if (m->m_pkthdr.len < skip + sizeof(struct udphdr)) {
434 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
435 				    ipcomps_hdrops);
436 				goto baddone;
437 			}
438 			cksum = 0;
439 			m_copyback(m, skip + offsetof(struct udphdr, uh_sum),
440 			    sizeof(cksum), &cksum, M_NOWAIT);
441 #ifdef INET6
442 			if (af == AF_INET6) {
443 				cksum = in6_cksum(m, IPPROTO_UDP, skip,
444 				    m->m_pkthdr.len - skip);
445 				m_copyback(m, skip + offsetof(struct udphdr,
446 				    uh_sum), sizeof(cksum), &cksum, M_NOWAIT);
447 			}
448 #endif
449 			break;
450 		case IPPROTO_TCP:
451 			if (m->m_pkthdr.len < skip + sizeof(struct tcphdr)) {
452 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
453 				    ipcomps_hdrops);
454 				goto baddone;
455 			}
456 			cksum = 0;
457 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
458 			    sizeof(cksum), &cksum, M_NOWAIT);
459 			if (af == AF_INET)
460 				cksum = in4_cksum(m, IPPROTO_TCP, skip,
461 				    m->m_pkthdr.len - skip);
462 #ifdef INET6
463 			else if (af == AF_INET6)
464 				cksum = in6_cksum(m, IPPROTO_TCP, skip,
465 				    m->m_pkthdr.len - skip);
466 #endif
467 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
468 			    sizeof(cksum), &cksum, M_NOWAIT);
469 			break;
470 		}
471 	}
472 
473 	/*
474 	 * Record what we've done to the packet (under what SA it was
475 	 * processed).
476 	 */
477 	if (tdbp->tdb_sproto != IPPROTO_IPCOMP) {
478 		mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE,
479 		    sizeof(struct tdb_ident), M_NOWAIT);
480 		if (mtag == NULL) {
481 			DPRINTF("failed to get tag");
482 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
483 			goto baddone;
484 		}
485 
486 		tdbi = (struct tdb_ident *)(mtag + 1);
487 		tdbi->dst = tdbp->tdb_dst;
488 		tdbi->proto = tdbp->tdb_sproto;
489 		tdbi->spi = tdbp->tdb_spi;
490 		tdbi->rdomain = tdbp->tdb_rdomain;
491 
492 		m_tag_prepend(m, mtag);
493 	}
494 
495 	switch (sproto) {
496 	case IPPROTO_ESP:
497 		/* Packet is confidential ? */
498 		if (tdbp->tdb_encalgxform)
499 			m->m_flags |= M_CONF;
500 
501 		/* Check if we had authenticated ESP. */
502 		if (tdbp->tdb_authalgxform)
503 			m->m_flags |= M_AUTH;
504 		break;
505 	case IPPROTO_AH:
506 		m->m_flags |= M_AUTH;
507 		break;
508 	case IPPROTO_IPCOMP:
509 		m->m_flags |= M_COMP;
510 		break;
511 	default:
512 		panic("%s: unknown/unsupported security protocol %d",
513 		    __func__, sproto);
514 	}
515 
516 #if NPF > 0
517 	/* Add pf tag if requested. */
518 	pf_tag_packet(m, tdbp->tdb_tag, -1);
519 	pf_pkt_addr_changed(m);
520 #endif
521 	if (tdbp->tdb_rdomain != tdbp->tdb_rdomain_post)
522 		m->m_pkthdr.ph_rtableid = tdbp->tdb_rdomain_post;
523 
524 	if (tdbp->tdb_flags & TDBF_TUNNELING)
525 		m->m_flags |= M_TUNNEL;
526 
527 	ipsecstat_add(ipsec_idecompbytes, m->m_pkthdr.len);
528 	tdbstat_add(tdbp, tdb_idecompbytes, m->m_pkthdr.len);
529 
530 #if NBPFILTER > 0
531 	encif = enc_getif(tdbp->tdb_rdomain_post, tdbp->tdb_tap);
532 	if (encif != NULL) {
533 		encif->if_ipackets++;
534 		encif->if_ibytes += m->m_pkthdr.len;
535 
536 		if (sproto != IPPROTO_IPCOMP) {
537 			/* XXX This conflicts with the scoped nature of IPv6 */
538 			m->m_pkthdr.ph_ifidx = encif->if_index;
539 		}
540 		if (encif->if_bpf) {
541 			struct enchdr hdr;
542 
543 			hdr.af = af;
544 			hdr.spi = tdbp->tdb_spi;
545 			hdr.flags = m->m_flags & (M_AUTH|M_CONF);
546 
547 			bpf_mtap_hdr(encif->if_bpf, (char *)&hdr,
548 			    ENC_HDRLEN, m, BPF_DIRECTION_IN);
549 		}
550 	}
551 #endif
552 
553 	if (ISSET(tdbp->tdb_flags, TDBF_IFACE)) {
554 #if NSEC > 0
555 		if (ISSET(tdbp->tdb_flags, TDBF_TUNNELING) &&
556 		    tdbp->tdb_iface_dir == IPSP_DIRECTION_IN) {
557 			struct sec_softc *sc = sec_get(tdbp->tdb_iface);
558 			if (sc == NULL)
559 				goto baddone;
560 
561 			sec_input(sc, af, prot, m);
562 			sec_put(sc);
563 			return IPPROTO_DONE;
564 		}
565 #endif /* NSEC > 0 */
566 		goto baddone;
567 	}
568 
569 #if NPF > 0
570 	/*
571 	 * The ip_deliver() shortcut avoids running through ip_input() with the
572 	 * same IP header twice.  Packets in transport mode have to be be
573 	 * passed to pf explicitly.  In tunnel mode the inner IP header will
574 	 * run through ip_input() and pf anyway.
575 	 */
576 	if ((tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
577 		struct ifnet *ifp;
578 
579 		/* This is the enc0 interface unless for ipcomp. */
580 		if ((ifp = if_get(m->m_pkthdr.ph_ifidx)) == NULL) {
581 			goto baddone;
582 		}
583 		if (pf_test(af, PF_IN, ifp, mp) != PF_PASS) {
584 			if_put(ifp);
585 			goto baddone;
586 		}
587 		m = *mp;
588 		if_put(ifp);
589 		if (m == NULL)
590 			return IPPROTO_DONE;
591 	}
592 #endif
593 	/* Return to the appropriate protocol handler in deliver loop. */
594 	return prot;
595 
596  baddone:
597 	m_freemp(mp);
598 	return IPPROTO_DONE;
599 #undef IPSEC_ISTAT
600 }
601 
602 int
603 ipsec_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
604     size_t newlen)
605 {
606 	int error;
607 
608 	switch (name[0]) {
609 	case IPCTL_IPSEC_ENC_ALGORITHM:
610 		NET_LOCK();
611 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
612 		    ipsec_def_enc, sizeof(ipsec_def_enc));
613 		NET_UNLOCK();
614 		return (error);
615 	case IPCTL_IPSEC_AUTH_ALGORITHM:
616 		NET_LOCK();
617 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
618 		    ipsec_def_auth, sizeof(ipsec_def_auth));
619 		NET_UNLOCK();
620 		return (error);
621 	case IPCTL_IPSEC_IPCOMP_ALGORITHM:
622 		NET_LOCK();
623 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
624 		    ipsec_def_comp, sizeof(ipsec_def_comp));
625 		NET_UNLOCK();
626 		return (error);
627 	case IPCTL_IPSEC_STATS:
628 		return (ipsec_sysctl_ipsecstat(oldp, oldlenp, newp));
629 	default:
630 		NET_LOCK();
631 		error = sysctl_bounded_arr(ipsecctl_vars, nitems(ipsecctl_vars),
632 		    name, namelen, oldp, oldlenp, newp, newlen);
633 		NET_UNLOCK();
634 		return (error);
635 	}
636 }
637 
638 int
639 esp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
640     size_t newlen)
641 {
642 	int error;
643 
644 	/* All sysctl names at this level are terminal. */
645 	if (namelen != 1)
646 		return (ENOTDIR);
647 
648 	switch (name[0]) {
649 	case ESPCTL_STATS:
650 		return (esp_sysctl_espstat(oldp, oldlenp, newp));
651 	default:
652 		NET_LOCK();
653 		error = sysctl_bounded_arr(espctl_vars, nitems(espctl_vars),
654 		    name, namelen, oldp, oldlenp, newp, newlen);
655 		NET_UNLOCK();
656 		return (error);
657 	}
658 }
659 
660 int
661 esp_sysctl_espstat(void *oldp, size_t *oldlenp, void *newp)
662 {
663 	struct espstat espstat;
664 
665 	CTASSERT(sizeof(espstat) == (esps_ncounters * sizeof(uint64_t)));
666 	memset(&espstat, 0, sizeof espstat);
667 	counters_read(espcounters, (uint64_t *)&espstat, esps_ncounters);
668 	return (sysctl_rdstruct(oldp, oldlenp, newp, &espstat,
669 	    sizeof(espstat)));
670 }
671 
672 int
673 ah_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
674     size_t newlen)
675 {
676 	int error;
677 
678 	/* All sysctl names at this level are terminal. */
679 	if (namelen != 1)
680 		return (ENOTDIR);
681 
682 	switch (name[0]) {
683 	case AHCTL_STATS:
684 		return ah_sysctl_ahstat(oldp, oldlenp, newp);
685 	default:
686 		NET_LOCK();
687 		error = sysctl_bounded_arr(ahctl_vars, nitems(ahctl_vars), name,
688 		    namelen, oldp, oldlenp, newp, newlen);
689 		NET_UNLOCK();
690 		return (error);
691 	}
692 }
693 
694 int
695 ah_sysctl_ahstat(void *oldp, size_t *oldlenp, void *newp)
696 {
697 	struct ahstat ahstat;
698 
699 	CTASSERT(sizeof(ahstat) == (ahs_ncounters * sizeof(uint64_t)));
700 	memset(&ahstat, 0, sizeof ahstat);
701 	counters_read(ahcounters, (uint64_t *)&ahstat, ahs_ncounters);
702 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ahstat, sizeof(ahstat)));
703 }
704 
705 int
706 ipcomp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
707     size_t newlen)
708 {
709 	int error;
710 
711 	/* All sysctl names at this level are terminal. */
712 	if (namelen != 1)
713 		return (ENOTDIR);
714 
715 	switch (name[0]) {
716 	case IPCOMPCTL_STATS:
717 		return ipcomp_sysctl_ipcompstat(oldp, oldlenp, newp);
718 	default:
719 		NET_LOCK();
720 		error = sysctl_bounded_arr(ipcompctl_vars,
721 		    nitems(ipcompctl_vars), name, namelen, oldp, oldlenp,
722 		    newp, newlen);
723 		NET_UNLOCK();
724 		return (error);
725 	}
726 }
727 
728 int
729 ipcomp_sysctl_ipcompstat(void *oldp, size_t *oldlenp, void *newp)
730 {
731 	struct ipcompstat ipcompstat;
732 
733 	CTASSERT(sizeof(ipcompstat) == (ipcomps_ncounters * sizeof(uint64_t)));
734 	memset(&ipcompstat, 0, sizeof ipcompstat);
735 	counters_read(ipcompcounters, (uint64_t *)&ipcompstat,
736 	    ipcomps_ncounters);
737 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ipcompstat,
738 	    sizeof(ipcompstat)));
739 }
740 
741 int
742 ipsec_sysctl_ipsecstat(void *oldp, size_t *oldlenp, void *newp)
743 {
744 	struct ipsecstat ipsecstat;
745 
746 	CTASSERT(sizeof(ipsecstat) == (ipsec_ncounters * sizeof(uint64_t)));
747 	memset(&ipsecstat, 0, sizeof ipsecstat);
748 	counters_read(ipseccounters, (uint64_t *)&ipsecstat, ipsec_ncounters);
749 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ipsecstat,
750 	    sizeof(ipsecstat)));
751 }
752 
753 int
754 ipsec_input_disabled(struct mbuf **mp, int *offp, int proto, int af)
755 {
756 	switch (af) {
757 	case AF_INET:
758 		return rip_input(mp, offp, proto, af);
759 #ifdef INET6
760 	case AF_INET6:
761 		return rip6_input(mp, offp, proto, af);
762 #endif
763 	default:
764 		unhandled_af(af);
765 	}
766 }
767 
768 int
769 ah46_input(struct mbuf **mp, int *offp, int proto, int af)
770 {
771 	int protoff;
772 
773 	if (
774 #if NPF > 0
775 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
776 #endif
777 	    !ah_enable)
778 		return ipsec_input_disabled(mp, offp, proto, af);
779 
780 	protoff = ipsec_protoff(*mp, *offp, af);
781 	if (protoff < 0) {
782 		DPRINTF("bad packet header chain");
783 		ahstat_inc(ahs_hdrops);
784 		m_freemp(mp);
785 		return IPPROTO_DONE;
786 	}
787 
788 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
789 }
790 
791 void
792 ah4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
793 {
794 	if (sa->sa_family != AF_INET ||
795 	    sa->sa_len != sizeof(struct sockaddr_in))
796 		return;
797 
798 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_AH);
799 }
800 
801 int
802 esp46_input(struct mbuf **mp, int *offp, int proto, int af)
803 {
804 	int protoff;
805 
806 	if (
807 #if NPF > 0
808 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
809 #endif
810 	    !esp_enable)
811 		return ipsec_input_disabled(mp, offp, proto, af);
812 
813 	protoff = ipsec_protoff(*mp, *offp, af);
814 	if (protoff < 0) {
815 		DPRINTF("bad packet header chain");
816 		espstat_inc(esps_hdrops);
817 		m_freemp(mp);
818 		return IPPROTO_DONE;
819 	}
820 
821 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
822 }
823 
824 /* IPv4 IPCOMP wrapper */
825 int
826 ipcomp46_input(struct mbuf **mp, int *offp, int proto, int af)
827 {
828 	int protoff;
829 
830 	if (
831 #if NPF > 0
832 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
833 #endif
834 	    !ipcomp_enable)
835 		return ipsec_input_disabled(mp, offp, proto, af);
836 
837 	protoff = ipsec_protoff(*mp, *offp, af);
838 	if (protoff < 0) {
839 		DPRINTF("bad packet header chain");
840 		ipcompstat_inc(ipcomps_hdrops);
841 		m_freemp(mp);
842 		return IPPROTO_DONE;
843 	}
844 
845 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
846 }
847 
848 void
849 ipsec_set_mtu(struct tdb *tdbp, u_int32_t mtu)
850 {
851 	ssize_t adjust;
852 
853 	NET_ASSERT_LOCKED();
854 
855 	/* Walk the chain backwards to the first tdb */
856 	for (; tdbp != NULL; tdbp = tdbp->tdb_inext) {
857 		if (tdbp->tdb_flags & TDBF_INVALID ||
858 		    (adjust = ipsec_hdrsz(tdbp)) == -1)
859 			return;
860 
861 		mtu -= adjust;
862 
863 		/* Store adjusted MTU in tdb */
864 		tdbp->tdb_mtu = mtu;
865 		tdbp->tdb_mtutimeout = gettime() + ip_mtudisc_timeout;
866 		DPRINTF("spi %08x mtu %d adjust %ld",
867 		    ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust);
868 	}
869 }
870 
871 void
872 ipsec_common_ctlinput(u_int rdomain, int cmd, struct sockaddr *sa,
873     void *v, int proto)
874 {
875 	struct ip *ip = v;
876 
877 	if (cmd == PRC_MSGSIZE && ip && ip_mtudisc && ip->ip_v == 4) {
878 		struct tdb *tdbp;
879 		struct sockaddr_in dst;
880 		struct icmp *icp;
881 		int hlen = ip->ip_hl << 2;
882 		u_int32_t spi, mtu;
883 
884 		/* Find the right MTU. */
885 		icp = (struct icmp *)((caddr_t) ip -
886 		    offsetof(struct icmp, icmp_ip));
887 		mtu = ntohs(icp->icmp_nextmtu);
888 
889 		/*
890 		 * Ignore the packet, if we do not receive a MTU
891 		 * or the MTU is too small to be acceptable.
892 		 */
893 		if (mtu < 296)
894 			return;
895 
896 		memset(&dst, 0, sizeof(struct sockaddr_in));
897 		dst.sin_family = AF_INET;
898 		dst.sin_len = sizeof(struct sockaddr_in);
899 		dst.sin_addr.s_addr = ip->ip_dst.s_addr;
900 
901 		memcpy(&spi, (caddr_t)ip + hlen, sizeof(u_int32_t));
902 
903 		tdbp = gettdb_rev(rdomain, spi, (union sockaddr_union *)&dst,
904 		    proto);
905 		ipsec_set_mtu(tdbp, mtu);
906 		tdb_unref(tdbp);
907 	}
908 }
909 
910 void
911 udpencap_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
912 {
913 	struct ip *ip = v;
914 	struct tdb *tdbp, *first;
915 	struct icmp *icp;
916 	u_int32_t mtu;
917 	struct sockaddr_in dst, src;
918 	union sockaddr_union *su_dst, *su_src;
919 
920 	NET_ASSERT_LOCKED();
921 
922 	icp = (struct icmp *)((caddr_t) ip - offsetof(struct icmp, icmp_ip));
923 	mtu = ntohs(icp->icmp_nextmtu);
924 
925 	/*
926 	 * Ignore the packet, if we do not receive a MTU
927 	 * or the MTU is too small to be acceptable.
928 	 */
929 	if (mtu < 296)
930 		return;
931 
932 	memset(&dst, 0, sizeof(dst));
933 	dst.sin_family = AF_INET;
934 	dst.sin_len = sizeof(struct sockaddr_in);
935 	dst.sin_addr.s_addr = ip->ip_dst.s_addr;
936 	su_dst = (union sockaddr_union *)&dst;
937 	memset(&src, 0, sizeof(src));
938 	src.sin_family = AF_INET;
939 	src.sin_len = sizeof(struct sockaddr_in);
940 	src.sin_addr.s_addr = ip->ip_src.s_addr;
941 	su_src = (union sockaddr_union *)&src;
942 
943 	first = gettdbbysrcdst_rev(rdomain, 0, su_src, su_dst, IPPROTO_ESP);
944 
945 	mtx_enter(&tdb_sadb_mtx);
946 	for (tdbp = first; tdbp != NULL; tdbp = tdbp->tdb_snext) {
947 		if (tdbp->tdb_sproto == IPPROTO_ESP &&
948 		    ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_UDPENCAP)) ==
949 		    TDBF_UDPENCAP) &&
950 		    !memcmp(&tdbp->tdb_dst, &dst, su_dst->sa.sa_len) &&
951 		    !memcmp(&tdbp->tdb_src, &src, su_src->sa.sa_len))
952 			ipsec_set_mtu(tdbp, mtu);
953 	}
954 	mtx_leave(&tdb_sadb_mtx);
955 	tdb_unref(first);
956 }
957 
958 void
959 esp4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
960 {
961 	if (sa->sa_family != AF_INET ||
962 	    sa->sa_len != sizeof(struct sockaddr_in))
963 		return;
964 
965 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_ESP);
966 }
967 
968 /* Find the offset of the next protocol field in the previous header. */
969 int
970 ipsec_protoff(struct mbuf *m, int off, int af)
971 {
972 #ifdef INET6
973 	struct ip6_ext ip6e;
974 	int protoff, nxt, l;
975 #endif /* INET6 */
976 
977 	switch (af) {
978 	case AF_INET:
979 		return offsetof(struct ip, ip_p);
980 #ifdef INET6
981 	case AF_INET6:
982 		break;
983 #endif /* INET6 */
984 	default:
985 		unhandled_af(af);
986 	}
987 
988 #ifdef INET6
989 	if (off < sizeof(struct ip6_hdr))
990 		return -1;
991 
992 	if (off == sizeof(struct ip6_hdr))
993 		return offsetof(struct ip6_hdr, ip6_nxt);
994 
995 	/* Chase down the header chain... */
996 	protoff = sizeof(struct ip6_hdr);
997 	nxt = (mtod(m, struct ip6_hdr *))->ip6_nxt;
998 	l = 0;
999 
1000 	do {
1001 		protoff += l;
1002 		m_copydata(m, protoff, sizeof(ip6e),
1003 		    (caddr_t) &ip6e);
1004 
1005 		if (nxt == IPPROTO_AH)
1006 			l = (ip6e.ip6e_len + 2) << 2;
1007 		else
1008 			l = (ip6e.ip6e_len + 1) << 3;
1009 #ifdef DIAGNOSTIC
1010 		if (l <= 0)
1011 			panic("%s: l went zero or negative", __func__);
1012 #endif
1013 
1014 		nxt = ip6e.ip6e_nxt;
1015 	} while (protoff + l < off);
1016 
1017 	/* Malformed packet check */
1018 	if (protoff + l != off)
1019 		return -1;
1020 
1021 	protoff += offsetof(struct ip6_ext, ip6e_nxt);
1022 	return protoff;
1023 #endif /* INET6 */
1024 }
1025 
1026 int
1027 ipsec_forward_check(struct mbuf *m, int hlen, int af)
1028 {
1029 	struct tdb *tdb;
1030 	struct tdb_ident *tdbi;
1031 	struct m_tag *mtag;
1032 	int error = 0;
1033 
1034 	/*
1035 	 * IPsec policy check for forwarded packets. Look at
1036 	 * inner-most IPsec SA used.
1037 	 */
1038 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1039 	if (mtag != NULL) {
1040 		tdbi = (struct tdb_ident *)(mtag + 1);
1041 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst, tdbi->proto);
1042 	} else
1043 		tdb = NULL;
1044 	error = ipsp_spd_lookup(m, af, hlen, IPSP_DIRECTION_IN,
1045 	    tdb, NULL, NULL, NULL);
1046 	tdb_unref(tdb);
1047 
1048 	return error;
1049 }
1050 
1051 int
1052 ipsec_local_check(struct mbuf *m, int hlen, int proto, int af)
1053 {
1054 	struct tdb *tdb;
1055 	struct tdb_ident *tdbi;
1056 	struct m_tag *mtag;
1057 	int error = 0;
1058 
1059 	/*
1060 	 * If it's a protected packet for us, skip the policy check.
1061 	 * That's because we really only care about the properties of
1062 	 * the protected packet, and not the intermediate versions.
1063 	 * While this is not the most paranoid setting, it allows
1064 	 * some flexibility in handling nested tunnels (in setting up
1065 	 * the policies).
1066 	 */
1067 	if ((proto == IPPROTO_ESP) || (proto == IPPROTO_AH) ||
1068 	    (proto == IPPROTO_IPCOMP))
1069 		return 0;
1070 
1071 	/*
1072 	 * If the protected packet was tunneled, then we need to
1073 	 * verify the protected packet's information, not the
1074 	 * external headers. Thus, skip the policy lookup for the
1075 	 * external packet, and keep the IPsec information linked on
1076 	 * the packet header (the encapsulation routines know how
1077 	 * to deal with that).
1078 	 */
1079 	if ((proto == IPPROTO_IPV4) || (proto == IPPROTO_IPV6))
1080 		return 0;
1081 
1082 	/*
1083 	 * When processing IPv6 header chains, do not look at the
1084 	 * outer header.  The inner protocol is relevant and will
1085 	 * be checked by the local delivery loop later.
1086 	 */
1087 	if ((af == AF_INET6) && ((proto == IPPROTO_DSTOPTS) ||
1088 	    (proto == IPPROTO_ROUTING) || (proto == IPPROTO_FRAGMENT)))
1089 		return 0;
1090 
1091 	/*
1092 	 * If the protected packet is TCP or UDP, we'll do the
1093 	 * policy check in the respective input routine, so we can
1094 	 * check for bypass sockets.
1095 	 */
1096 	if ((proto == IPPROTO_TCP) || (proto == IPPROTO_UDP))
1097 		return 0;
1098 
1099 	/*
1100 	 * IPsec policy check for local-delivery packets. Look at the
1101 	 * inner-most SA that protected the packet. This is in fact
1102 	 * a bit too restrictive (it could end up causing packets to
1103 	 * be dropped that semantically follow the policy, e.g., in
1104 	 * certain SA-bundle configurations); but the alternative is
1105 	 * very complicated (and requires keeping track of what
1106 	 * kinds of tunneling headers have been seen in-between the
1107 	 * IPsec headers), and I don't think we lose much functionality
1108 	 * that's needed in the real world (who uses bundles anyway ?).
1109 	 */
1110 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1111 	if (mtag) {
1112 		tdbi = (struct tdb_ident *)(mtag + 1);
1113 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst,
1114 		    tdbi->proto);
1115 	} else
1116 		tdb = NULL;
1117 	error = ipsp_spd_lookup(m, af, hlen, IPSP_DIRECTION_IN,
1118 	    tdb, NULL, NULL, NULL);
1119 	tdb_unref(tdb);
1120 
1121 	return error;
1122 }
1123