xref: /openbsd-src/sys/netinet/ipsec_input.c (revision ef70a379631ec7e97481e1d0e500e21496e6c4aa)
1 /*	$OpenBSD: ipsec_input.c,v 1.191 2021/11/11 18:08:18 bluhm Exp $	*/
2 /*
3  * The authors of this code are John Ioannidis (ji@tla.org),
4  * Angelos D. Keromytis (kermit@csd.uch.gr) and
5  * Niels Provos (provos@physnet.uni-hamburg.de).
6  *
7  * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
8  * in November 1995.
9  *
10  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
11  * by Angelos D. Keromytis.
12  *
13  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
14  * and Niels Provos.
15  *
16  * Additional features in 1999 by Angelos D. Keromytis.
17  *
18  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
19  * Angelos D. Keromytis and Niels Provos.
20  * Copyright (c) 2001, Angelos D. Keromytis.
21  *
22  * Permission to use, copy, and modify this software with or without fee
23  * is hereby granted, provided that this entire notice is included in
24  * all copies of any software which is or includes a copy or
25  * modification of this software.
26  * You may use this code under the GNU public license if you so wish. Please
27  * contribute changes back to the authors under this freer than GPL license
28  * so that we may further the use of strong encryption without limitations to
29  * all.
30  *
31  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
32  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
33  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
34  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
35  * PURPOSE.
36  */
37 
38 #include "pf.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/protosw.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
46 #include <sys/kernel.h>
47 #include <sys/timeout.h>
48 
49 #include <net/if.h>
50 #include <net/if_var.h>
51 #include <net/netisr.h>
52 #include <net/bpf.h>
53 #include <net/route.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_icmp.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 
62 #if NPF > 0
63 #include <net/pfvar.h>
64 #endif
65 
66 #ifdef INET6
67 #include <netinet6/in6_var.h>
68 #include <netinet/ip6.h>
69 #include <netinet6/ip6_var.h>
70 #include <netinet6/ip6protosw.h>
71 #endif /* INET6 */
72 
73 #include <netinet/ip_ipsp.h>
74 #include <netinet/ip_esp.h>
75 #include <netinet/ip_ah.h>
76 #include <netinet/ip_ipcomp.h>
77 
78 #include <net/if_enc.h>
79 
80 #include <crypto/cryptodev.h>
81 #include <crypto/xform.h>
82 
83 #include "bpfilter.h"
84 
85 void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
86 
87 #ifdef ENCDEBUG
88 #define DPRINTF(fmt, args...)						\
89 	do {								\
90 		if (encdebug)						\
91 			printf("%s: " fmt "\n", __func__, ## args);	\
92 	} while (0)
93 #else
94 #define DPRINTF(fmt, args...)						\
95 	do { } while (0)
96 #endif
97 
98 /* sysctl variables */
99 int encdebug = 0;
100 int ipsec_keep_invalid = IPSEC_DEFAULT_EMBRYONIC_SA_TIMEOUT;
101 int ipsec_require_pfs = IPSEC_DEFAULT_PFS;
102 int ipsec_soft_allocations = IPSEC_DEFAULT_SOFT_ALLOCATIONS;
103 int ipsec_exp_allocations = IPSEC_DEFAULT_EXP_ALLOCATIONS;
104 int ipsec_soft_bytes = IPSEC_DEFAULT_SOFT_BYTES;
105 int ipsec_exp_bytes = IPSEC_DEFAULT_EXP_BYTES;
106 int ipsec_soft_timeout = IPSEC_DEFAULT_SOFT_TIMEOUT;
107 int ipsec_exp_timeout = IPSEC_DEFAULT_EXP_TIMEOUT;
108 int ipsec_soft_first_use = IPSEC_DEFAULT_SOFT_FIRST_USE;
109 int ipsec_exp_first_use = IPSEC_DEFAULT_EXP_FIRST_USE;
110 int ipsec_expire_acquire = IPSEC_DEFAULT_EXPIRE_ACQUIRE;
111 
112 int esp_enable = 1;
113 int ah_enable = 1;
114 int ipcomp_enable = 0;
115 
116 const struct sysctl_bounded_args espctl_vars[] = {
117 	{ESPCTL_ENABLE, &esp_enable, 0, 1},
118 	{ESPCTL_UDPENCAP_ENABLE, &udpencap_enable, 0, 1},
119 	{ESPCTL_UDPENCAP_PORT, &udpencap_port, 0, 65535},
120 };
121 const struct sysctl_bounded_args ahctl_vars[] = {
122 	{AHCTL_ENABLE, &ah_enable, 0, 1},
123 };
124 const struct sysctl_bounded_args ipcompctl_vars[] = {
125 	{IPCOMPCTL_ENABLE, &ipcomp_enable, 0, 1},
126 };
127 
128 struct cpumem *espcounters;
129 struct cpumem *ahcounters;
130 struct cpumem *ipcompcounters;
131 struct cpumem *ipseccounters;
132 
133 char ipsec_def_enc[20];
134 char ipsec_def_auth[20];
135 char ipsec_def_comp[20];
136 
137 const struct sysctl_bounded_args ipsecctl_vars[] = {
138 	{ IPSEC_ENCDEBUG, &encdebug, 0, 1 },
139 	{ IPSEC_EXPIRE_ACQUIRE, &ipsec_expire_acquire, 0, INT_MAX },
140 	{ IPSEC_EMBRYONIC_SA_TIMEOUT, &ipsec_keep_invalid, 0, INT_MAX },
141 	{ IPSEC_REQUIRE_PFS, &ipsec_require_pfs, 0, 1 },
142 	{ IPSEC_SOFT_ALLOCATIONS, &ipsec_soft_allocations, 0, INT_MAX },
143 	{ IPSEC_ALLOCATIONS, &ipsec_exp_allocations, 0, INT_MAX },
144 	{ IPSEC_SOFT_BYTES, &ipsec_soft_bytes, 0, INT_MAX },
145 	{ IPSEC_BYTES, &ipsec_exp_bytes, 0, INT_MAX },
146 	{ IPSEC_TIMEOUT, &ipsec_exp_timeout, 0, INT_MAX },
147 	{ IPSEC_SOFT_TIMEOUT, &ipsec_soft_timeout,0, INT_MAX },
148 	{ IPSEC_SOFT_FIRSTUSE, &ipsec_soft_first_use, 0, INT_MAX },
149 	{ IPSEC_FIRSTUSE, &ipsec_exp_first_use, 0, INT_MAX },
150 };
151 
152 int esp_sysctl_espstat(void *, size_t *, void *);
153 int ah_sysctl_ahstat(void *, size_t *, void *);
154 int ipcomp_sysctl_ipcompstat(void *, size_t *, void *);
155 int ipsec_sysctl_ipsecstat(void *, size_t *, void *);
156 
157 void
158 ipsec_init(void)
159 {
160 	espcounters = counters_alloc(esps_ncounters);
161 	ahcounters = counters_alloc(ahs_ncounters);
162 	ipcompcounters = counters_alloc(ipcomps_ncounters);
163 	ipseccounters = counters_alloc(ipsec_ncounters);
164 
165 	strlcpy(ipsec_def_enc, IPSEC_DEFAULT_DEF_ENC, sizeof(ipsec_def_enc));
166 	strlcpy(ipsec_def_auth, IPSEC_DEFAULT_DEF_AUTH, sizeof(ipsec_def_auth));
167 	strlcpy(ipsec_def_comp, IPSEC_DEFAULT_DEF_COMP, sizeof(ipsec_def_comp));
168 
169 	ipsp_init();
170 }
171 
172 /*
173  * ipsec_common_input() gets called when we receive an IPsec-protected packet
174  * in IPv4 or IPv6. All it does is find the right TDB and call the appropriate
175  * transform. The callback takes care of further processing (like ingress
176  * filtering).
177  */
178 int
179 ipsec_common_input(struct mbuf **mp, int skip, int protoff, int af, int sproto,
180     int udpencap)
181 {
182 #define IPSEC_ISTAT(x,y,z) do {			\
183 	if (sproto == IPPROTO_ESP)		\
184 		espstat_inc(x);			\
185 	else if (sproto == IPPROTO_AH)		\
186 		ahstat_inc(y);			\
187 	else					\
188 		ipcompstat_inc(z);		\
189 } while (0)
190 
191 	struct mbuf *m = *mp;
192 	union sockaddr_union dst_address;
193 	struct tdb *tdbp = NULL;
194 	struct ifnet *encif;
195 	u_int32_t spi;
196 	u_int16_t cpi;
197 	int prot;
198 #ifdef ENCDEBUG
199 	char buf[INET6_ADDRSTRLEN];
200 #endif
201 
202 	NET_ASSERT_LOCKED();
203 
204 	ipsecstat_pkt(ipsec_ipackets, ipsec_ibytes, m->m_pkthdr.len);
205 	IPSEC_ISTAT(esps_input, ahs_input, ipcomps_input);
206 
207 	if ((sproto == IPPROTO_IPCOMP) && (m->m_flags & M_COMP)) {
208 		DPRINTF("repeated decompression");
209 		ipcompstat_inc(ipcomps_pdrops);
210 		goto drop;
211 	}
212 
213 	if (m->m_pkthdr.len - skip < 2 * sizeof(u_int32_t)) {
214 		DPRINTF("packet too small");
215 		IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
216 		goto drop;
217 	}
218 
219 	/* Retrieve the SPI from the relevant IPsec header */
220 	switch (sproto) {
221 	case IPPROTO_ESP:
222 		m_copydata(m, skip, sizeof(u_int32_t), (caddr_t) &spi);
223 		break;
224 	case IPPROTO_AH:
225 		m_copydata(m, skip + sizeof(u_int32_t), sizeof(u_int32_t),
226 		    (caddr_t) &spi);
227 		break;
228 	case IPPROTO_IPCOMP:
229 		m_copydata(m, skip + sizeof(u_int16_t), sizeof(u_int16_t),
230 		    (caddr_t) &cpi);
231 		spi = ntohl(htons(cpi));
232 		break;
233 	default:
234 		panic("%s: unknown/unsupported security protocol %d",
235 		    __func__, sproto);
236 	}
237 
238 	/*
239 	 * Find tunnel control block and (indirectly) call the appropriate
240 	 * kernel crypto routine. The resulting mbuf chain is a valid
241 	 * IP packet ready to go through input processing.
242 	 */
243 
244 	memset(&dst_address, 0, sizeof(dst_address));
245 	dst_address.sa.sa_family = af;
246 
247 	switch (af) {
248 	case AF_INET:
249 		dst_address.sin.sin_len = sizeof(struct sockaddr_in);
250 		m_copydata(m, offsetof(struct ip, ip_dst),
251 		    sizeof(struct in_addr),
252 		    (caddr_t) &(dst_address.sin.sin_addr));
253 		break;
254 
255 #ifdef INET6
256 	case AF_INET6:
257 		dst_address.sin6.sin6_len = sizeof(struct sockaddr_in6);
258 		m_copydata(m, offsetof(struct ip6_hdr, ip6_dst),
259 		    sizeof(struct in6_addr),
260 		    (caddr_t) &(dst_address.sin6.sin6_addr));
261 		in6_recoverscope(&dst_address.sin6,
262 		    &dst_address.sin6.sin6_addr);
263 		break;
264 #endif /* INET6 */
265 
266 	default:
267 		DPRINTF("unsupported protocol family %d", af);
268 		IPSEC_ISTAT(esps_nopf, ahs_nopf, ipcomps_nopf);
269 		goto drop;
270 	}
271 
272 	tdbp = gettdb(rtable_l2(m->m_pkthdr.ph_rtableid),
273 	    spi, &dst_address, sproto);
274 	if (tdbp == NULL) {
275 		DPRINTF("could not find SA for packet to %s, spi %08x",
276 		    ipsp_address(&dst_address, buf, sizeof(buf)), ntohl(spi));
277 		IPSEC_ISTAT(esps_notdb, ahs_notdb, ipcomps_notdb);
278 		goto drop;
279 	}
280 
281 	if (tdbp->tdb_flags & TDBF_INVALID) {
282 		DPRINTF("attempted to use invalid SA %s/%08x/%u",
283 		    ipsp_address(&dst_address, buf, sizeof(buf)),
284 		    ntohl(spi), tdbp->tdb_sproto);
285 		IPSEC_ISTAT(esps_invalid, ahs_invalid, ipcomps_invalid);
286 		goto drop;
287 	}
288 
289 	if (udpencap && !(tdbp->tdb_flags & TDBF_UDPENCAP)) {
290 		DPRINTF("attempted to use non-udpencap SA %s/%08x/%u",
291 		    ipsp_address(&dst_address, buf, sizeof(buf)),
292 		    ntohl(spi), tdbp->tdb_sproto);
293 		espstat_inc(esps_udpinval);
294 		goto drop;
295 	}
296 
297 	if (!udpencap && (tdbp->tdb_flags & TDBF_UDPENCAP)) {
298 		DPRINTF("attempted to use udpencap SA %s/%08x/%u",
299 		    ipsp_address(&dst_address, buf, sizeof(buf)),
300 		    ntohl(spi), tdbp->tdb_sproto);
301 		espstat_inc(esps_udpneeded);
302 		goto drop;
303 	}
304 
305 	if (tdbp->tdb_xform == NULL) {
306 		DPRINTF("attempted to use uninitialized SA %s/%08x/%u",
307 		    ipsp_address(&dst_address, buf, sizeof(buf)),
308 		    ntohl(spi), tdbp->tdb_sproto);
309 		IPSEC_ISTAT(esps_noxform, ahs_noxform, ipcomps_noxform);
310 		goto drop;
311 	}
312 
313 	if (sproto != IPPROTO_IPCOMP) {
314 		if ((encif = enc_getif(tdbp->tdb_rdomain_post,
315 		    tdbp->tdb_tap)) == NULL) {
316 			DPRINTF("no enc%u interface for SA %s/%08x/%u",
317 			    tdbp->tdb_tap,
318 			    ipsp_address(&dst_address, buf, sizeof(buf)),
319 			    ntohl(spi), tdbp->tdb_sproto);
320 			IPSEC_ISTAT(esps_pdrops, ahs_pdrops, ipcomps_pdrops);
321 			goto drop;
322 		}
323 
324 		/* XXX This conflicts with the scoped nature of IPv6 */
325 		m->m_pkthdr.ph_ifidx = encif->if_index;
326 	}
327 
328 	/* Register first use, setup expiration timer. */
329 	if (tdbp->tdb_first_use == 0) {
330 		tdbp->tdb_first_use = gettime();
331 		if (tdbp->tdb_flags & TDBF_FIRSTUSE)
332 			timeout_add_sec(&tdbp->tdb_first_tmo,
333 			    tdbp->tdb_exp_first_use);
334 		if (tdbp->tdb_flags & TDBF_SOFT_FIRSTUSE)
335 			timeout_add_sec(&tdbp->tdb_sfirst_tmo,
336 			    tdbp->tdb_soft_first_use);
337 	}
338 
339 	tdbp->tdb_ipackets++;
340 	tdbp->tdb_ibytes += m->m_pkthdr.len;
341 
342 	/*
343 	 * Call appropriate transform and return -- callback takes care of
344 	 * everything else.
345 	 */
346 	prot = (*(tdbp->tdb_xform->xf_input))(mp, tdbp, skip, protoff);
347 	if (prot == IPPROTO_DONE) {
348 		ipsecstat_inc(ipsec_idrops);
349 		tdbp->tdb_idrops++;
350 	}
351 	return prot;
352 
353  drop:
354 	m_freemp(mp);
355 	ipsecstat_inc(ipsec_idrops);
356 	if (tdbp != NULL)
357 		tdbp->tdb_idrops++;
358 	return IPPROTO_DONE;
359 }
360 
361 /*
362  * IPsec input callback, called by the transform callback. Takes care of
363  * filtering and other sanity checks on the processed packet.
364  */
365 int
366 ipsec_common_input_cb(struct mbuf **mp, struct tdb *tdbp, int skip, int protoff)
367 {
368 	struct mbuf *m = *mp;
369 	int af, sproto;
370 	u_int8_t prot;
371 #if NBPFILTER > 0
372 	struct ifnet *encif;
373 #endif
374 	struct ip *ip, ipn;
375 #ifdef INET6
376 	struct ip6_hdr *ip6, ip6n;
377 #endif /* INET6 */
378 	struct m_tag *mtag;
379 	struct tdb_ident *tdbi;
380 #ifdef ENCDEBUG
381 	char buf[INET6_ADDRSTRLEN];
382 #endif
383 
384 	af = tdbp->tdb_dst.sa.sa_family;
385 	sproto = tdbp->tdb_sproto;
386 
387 	tdbp->tdb_last_used = gettime();
388 
389 	/* Fix IPv4 header */
390 	if (af == AF_INET) {
391 		if (m->m_len < skip &&
392 		    (m = *mp = m_pullup(m, skip)) == NULL) {
393 			DPRINTF("processing failed for SA %s/%08x",
394 			    ipsp_address(&tdbp->tdb_dst, buf, sizeof(buf)),
395 			    ntohl(tdbp->tdb_spi));
396 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
397 			goto baddone;
398 		}
399 
400 		ip = mtod(m, struct ip *);
401 		ip->ip_len = htons(m->m_pkthdr.len);
402 		ip->ip_sum = 0;
403 		ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
404 		prot = ip->ip_p;
405 
406 		/* IP-in-IP encapsulation */
407 		if (prot == IPPROTO_IPIP) {
408 			if (m->m_pkthdr.len - skip < sizeof(struct ip)) {
409 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
410 				    ipcomps_hdrops);
411 				goto baddone;
412 			}
413 			/* ipn will now contain the inner IPv4 header */
414 			m_copydata(m, skip, sizeof(struct ip),
415 			    (caddr_t) &ipn);
416 		}
417 
418 #ifdef INET6
419 		/* IPv6-in-IP encapsulation. */
420 		if (prot == IPPROTO_IPV6) {
421 			if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) {
422 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
423 				    ipcomps_hdrops);
424 				goto baddone;
425 			}
426 			/* ip6n will now contain the inner IPv6 header. */
427 			m_copydata(m, skip, sizeof(struct ip6_hdr),
428 			    (caddr_t) &ip6n);
429 		}
430 #endif /* INET6 */
431 	}
432 
433 #ifdef INET6
434 	/* Fix IPv6 header */
435 	if (af == AF_INET6) {
436 		if (m->m_len < sizeof(struct ip6_hdr) &&
437 		    (m = *mp = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
438 
439 			DPRINTF("processing failed for SA %s/%08x",
440 			    ipsp_address(&tdbp->tdb_dst, buf, sizeof(buf)),
441 			    ntohl(tdbp->tdb_spi));
442 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
443 			goto baddone;
444 		}
445 
446 		ip6 = mtod(m, struct ip6_hdr *);
447 		ip6->ip6_plen = htons(m->m_pkthdr.len - skip);
448 
449 		/* Save protocol */
450 		m_copydata(m, protoff, 1, (caddr_t) &prot);
451 
452 		/* IP-in-IP encapsulation */
453 		if (prot == IPPROTO_IPIP) {
454 			if (m->m_pkthdr.len - skip < sizeof(struct ip)) {
455 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
456 				    ipcomps_hdrops);
457 				goto baddone;
458 			}
459 			/* ipn will now contain the inner IPv4 header */
460 			m_copydata(m, skip, sizeof(struct ip), (caddr_t) &ipn);
461 		}
462 
463 		/* IPv6-in-IP encapsulation */
464 		if (prot == IPPROTO_IPV6) {
465 			if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) {
466 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
467 				    ipcomps_hdrops);
468 				goto baddone;
469 			}
470 			/* ip6n will now contain the inner IPv6 header. */
471 			m_copydata(m, skip, sizeof(struct ip6_hdr),
472 			    (caddr_t) &ip6n);
473 		}
474 	}
475 #endif /* INET6 */
476 
477 	/*
478 	 * Fix TCP/UDP checksum of UDP encapsulated transport mode ESP packet.
479 	 * (RFC3948 3.1.2)
480 	 */
481 	if ((af == AF_INET || af == AF_INET6) &&
482 	    (tdbp->tdb_flags & TDBF_UDPENCAP) &&
483 	    (tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
484 		u_int16_t cksum;
485 
486 		switch (prot) {
487 		case IPPROTO_UDP:
488 			if (m->m_pkthdr.len < skip + sizeof(struct udphdr)) {
489 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
490 				    ipcomps_hdrops);
491 				goto baddone;
492 			}
493 			cksum = 0;
494 			m_copyback(m, skip + offsetof(struct udphdr, uh_sum),
495 			    sizeof(cksum), &cksum, M_NOWAIT);
496 #ifdef INET6
497 			if (af == AF_INET6) {
498 				cksum = in6_cksum(m, IPPROTO_UDP, skip,
499 				    m->m_pkthdr.len - skip);
500 				m_copyback(m, skip + offsetof(struct udphdr,
501 				    uh_sum), sizeof(cksum), &cksum, M_NOWAIT);
502 			}
503 #endif
504 			break;
505 		case IPPROTO_TCP:
506 			if (m->m_pkthdr.len < skip + sizeof(struct tcphdr)) {
507 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
508 				    ipcomps_hdrops);
509 				goto baddone;
510 			}
511 			cksum = 0;
512 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
513 			    sizeof(cksum), &cksum, M_NOWAIT);
514 			if (af == AF_INET)
515 				cksum = in4_cksum(m, IPPROTO_TCP, skip,
516 				    m->m_pkthdr.len - skip);
517 #ifdef INET6
518 			else if (af == AF_INET6)
519 				cksum = in6_cksum(m, IPPROTO_TCP, skip,
520 				    m->m_pkthdr.len - skip);
521 #endif
522 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
523 			    sizeof(cksum), &cksum, M_NOWAIT);
524 			break;
525 		}
526 	}
527 
528 	/*
529 	 * Record what we've done to the packet (under what SA it was
530 	 * processed).
531 	 */
532 	if (tdbp->tdb_sproto != IPPROTO_IPCOMP) {
533 		mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE,
534 		    sizeof(struct tdb_ident), M_NOWAIT);
535 		if (mtag == NULL) {
536 			DPRINTF("failed to get tag");
537 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
538 			goto baddone;
539 		}
540 
541 		tdbi = (struct tdb_ident *)(mtag + 1);
542 		tdbi->dst = tdbp->tdb_dst;
543 		tdbi->proto = tdbp->tdb_sproto;
544 		tdbi->spi = tdbp->tdb_spi;
545 		tdbi->rdomain = tdbp->tdb_rdomain;
546 
547 		m_tag_prepend(m, mtag);
548 	}
549 
550 	switch (sproto) {
551 	case IPPROTO_ESP:
552 		/* Packet is confidential ? */
553 		if (tdbp->tdb_encalgxform)
554 			m->m_flags |= M_CONF;
555 
556 		/* Check if we had authenticated ESP. */
557 		if (tdbp->tdb_authalgxform)
558 			m->m_flags |= M_AUTH;
559 		break;
560 	case IPPROTO_AH:
561 		m->m_flags |= M_AUTH;
562 		break;
563 	case IPPROTO_IPCOMP:
564 		m->m_flags |= M_COMP;
565 		break;
566 	default:
567 		panic("%s: unknown/unsupported security protocol %d",
568 		    __func__, sproto);
569 	}
570 
571 #if NPF > 0
572 	/* Add pf tag if requested. */
573 	pf_tag_packet(m, tdbp->tdb_tag, -1);
574 	pf_pkt_addr_changed(m);
575 #endif
576 	if (tdbp->tdb_rdomain != tdbp->tdb_rdomain_post)
577 		m->m_pkthdr.ph_rtableid = tdbp->tdb_rdomain_post;
578 
579 	if (tdbp->tdb_flags & TDBF_TUNNELING)
580 		m->m_flags |= M_TUNNEL;
581 
582 	ipsecstat_add(ipsec_idecompbytes, m->m_pkthdr.len);
583 	tdbp->tdb_idecompbytes += m->m_pkthdr.len;
584 
585 #if NBPFILTER > 0
586 	if ((encif = enc_getif(tdbp->tdb_rdomain_post, tdbp->tdb_tap)) != NULL) {
587 		encif->if_ipackets++;
588 		encif->if_ibytes += m->m_pkthdr.len;
589 
590 		if (encif->if_bpf) {
591 			struct enchdr hdr;
592 
593 			hdr.af = af;
594 			hdr.spi = tdbp->tdb_spi;
595 			hdr.flags = m->m_flags & (M_AUTH|M_CONF);
596 
597 			bpf_mtap_hdr(encif->if_bpf, (char *)&hdr,
598 			    ENC_HDRLEN, m, BPF_DIRECTION_IN);
599 		}
600 	}
601 #endif
602 
603 #if NPF > 0
604 	/*
605 	 * The ip_deliver() shortcut avoids running through ip_input() with the
606 	 * same IP header twice.  Packets in transport mode have to be be
607 	 * passed to pf explicitly.  In tunnel mode the inner IP header will
608 	 * run through ip_input() and pf anyway.
609 	 */
610 	if ((tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
611 		struct ifnet *ifp;
612 
613 		/* This is the enc0 interface unless for ipcomp. */
614 		if ((ifp = if_get(m->m_pkthdr.ph_ifidx)) == NULL) {
615 			goto baddone;
616 		}
617 		if (pf_test(af, PF_IN, ifp, mp) != PF_PASS) {
618 			if_put(ifp);
619 			goto baddone;
620 		}
621 		m = *mp;
622 		if_put(ifp);
623 		if (m == NULL)
624 			return IPPROTO_DONE;
625 	}
626 #endif
627 	/* Return to the appropriate protocol handler in deliver loop. */
628 	return prot;
629 
630  baddone:
631 	m_freemp(mp);
632 	return IPPROTO_DONE;
633 #undef IPSEC_ISTAT
634 }
635 
636 int
637 ipsec_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
638     size_t newlen)
639 {
640 	int error;
641 
642 	switch (name[0]) {
643 	case IPCTL_IPSEC_ENC_ALGORITHM:
644 		NET_LOCK();
645 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
646 		    ipsec_def_enc, sizeof(ipsec_def_enc));
647 		NET_UNLOCK();
648 		return (error);
649 	case IPCTL_IPSEC_AUTH_ALGORITHM:
650 		NET_LOCK();
651 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
652 		    ipsec_def_auth, sizeof(ipsec_def_auth));
653 		NET_UNLOCK();
654 		return (error);
655 	case IPCTL_IPSEC_IPCOMP_ALGORITHM:
656 		NET_LOCK();
657 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
658 		    ipsec_def_comp, sizeof(ipsec_def_comp));
659 		NET_UNLOCK();
660 		return (error);
661 	case IPCTL_IPSEC_STATS:
662 		return (ipsec_sysctl_ipsecstat(oldp, oldlenp, newp));
663 	default:
664 		NET_LOCK();
665 		error = sysctl_bounded_arr(ipsecctl_vars, nitems(ipsecctl_vars),
666 		    name, namelen, oldp, oldlenp, newp, newlen);
667 		NET_UNLOCK();
668 		return (error);
669 	}
670 }
671 
672 int
673 esp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
674     size_t newlen)
675 {
676 	int error;
677 
678 	/* All sysctl names at this level are terminal. */
679 	if (namelen != 1)
680 		return (ENOTDIR);
681 
682 	switch (name[0]) {
683 	case ESPCTL_STATS:
684 		return (esp_sysctl_espstat(oldp, oldlenp, newp));
685 	default:
686 		NET_LOCK();
687 		error = sysctl_bounded_arr(espctl_vars, nitems(espctl_vars),
688 		    name, namelen, oldp, oldlenp, newp, newlen);
689 		NET_UNLOCK();
690 		return (error);
691 	}
692 }
693 
694 int
695 esp_sysctl_espstat(void *oldp, size_t *oldlenp, void *newp)
696 {
697 	struct espstat espstat;
698 
699 	CTASSERT(sizeof(espstat) == (esps_ncounters * sizeof(uint64_t)));
700 	memset(&espstat, 0, sizeof espstat);
701 	counters_read(espcounters, (uint64_t *)&espstat, esps_ncounters);
702 	return (sysctl_rdstruct(oldp, oldlenp, newp, &espstat,
703 	    sizeof(espstat)));
704 }
705 
706 int
707 ah_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
708     size_t newlen)
709 {
710 	int error;
711 
712 	/* All sysctl names at this level are terminal. */
713 	if (namelen != 1)
714 		return (ENOTDIR);
715 
716 	switch (name[0]) {
717 	case AHCTL_STATS:
718 		return ah_sysctl_ahstat(oldp, oldlenp, newp);
719 	default:
720 		NET_LOCK();
721 		error = sysctl_bounded_arr(ahctl_vars, nitems(ahctl_vars), name,
722 		    namelen, oldp, oldlenp, newp, newlen);
723 		NET_UNLOCK();
724 		return (error);
725 	}
726 }
727 
728 int
729 ah_sysctl_ahstat(void *oldp, size_t *oldlenp, void *newp)
730 {
731 	struct ahstat ahstat;
732 
733 	CTASSERT(sizeof(ahstat) == (ahs_ncounters * sizeof(uint64_t)));
734 	memset(&ahstat, 0, sizeof ahstat);
735 	counters_read(ahcounters, (uint64_t *)&ahstat, ahs_ncounters);
736 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ahstat, sizeof(ahstat)));
737 }
738 
739 int
740 ipcomp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
741     size_t newlen)
742 {
743 	int error;
744 
745 	/* All sysctl names at this level are terminal. */
746 	if (namelen != 1)
747 		return (ENOTDIR);
748 
749 	switch (name[0]) {
750 	case IPCOMPCTL_STATS:
751 		return ipcomp_sysctl_ipcompstat(oldp, oldlenp, newp);
752 	default:
753 		NET_LOCK();
754 		error = sysctl_bounded_arr(ipcompctl_vars,
755 		    nitems(ipcompctl_vars), name, namelen, oldp, oldlenp,
756 		    newp, newlen);
757 		NET_UNLOCK();
758 		return (error);
759 	}
760 }
761 
762 int
763 ipcomp_sysctl_ipcompstat(void *oldp, size_t *oldlenp, void *newp)
764 {
765 	struct ipcompstat ipcompstat;
766 
767 	CTASSERT(sizeof(ipcompstat) == (ipcomps_ncounters * sizeof(uint64_t)));
768 	memset(&ipcompstat, 0, sizeof ipcompstat);
769 	counters_read(ipcompcounters, (uint64_t *)&ipcompstat,
770 	    ipcomps_ncounters);
771 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ipcompstat,
772 	    sizeof(ipcompstat)));
773 }
774 
775 int
776 ipsec_sysctl_ipsecstat(void *oldp, size_t *oldlenp, void *newp)
777 {
778 	struct ipsecstat ipsecstat;
779 
780 	CTASSERT(sizeof(ipsecstat) == (ipsec_ncounters * sizeof(uint64_t)));
781 	memset(&ipsecstat, 0, sizeof ipsecstat);
782 	counters_read(ipseccounters, (uint64_t *)&ipsecstat, ipsec_ncounters);
783 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ipsecstat,
784 	    sizeof(ipsecstat)));
785 }
786 
787 int
788 ipsec_input_disabled(struct mbuf **mp, int *offp, int proto, int af)
789 {
790 	switch (af) {
791 	case AF_INET:
792 		return rip_input(mp, offp, proto, af);
793 #ifdef INET6
794 	case AF_INET6:
795 		return rip6_input(mp, offp, proto, af);
796 #endif
797 	default:
798 		unhandled_af(af);
799 	}
800 }
801 
802 int
803 ah46_input(struct mbuf **mp, int *offp, int proto, int af)
804 {
805 	int protoff;
806 
807 	if (
808 #if NPF > 0
809 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
810 #endif
811 	    !ah_enable)
812 		return ipsec_input_disabled(mp, offp, proto, af);
813 
814 	protoff = ipsec_protoff(*mp, *offp, af);
815 	if (protoff < 0) {
816 		DPRINTF("bad packet header chain");
817 		ahstat_inc(ahs_hdrops);
818 		m_freemp(mp);
819 		return IPPROTO_DONE;
820 	}
821 
822 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
823 }
824 
825 void
826 ah4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
827 {
828 	if (sa->sa_family != AF_INET ||
829 	    sa->sa_len != sizeof(struct sockaddr_in))
830 		return;
831 
832 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_AH);
833 }
834 
835 int
836 esp46_input(struct mbuf **mp, int *offp, int proto, int af)
837 {
838 	int protoff;
839 
840 	if (
841 #if NPF > 0
842 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
843 #endif
844 	    !esp_enable)
845 		return ipsec_input_disabled(mp, offp, proto, af);
846 
847 	protoff = ipsec_protoff(*mp, *offp, af);
848 	if (protoff < 0) {
849 		DPRINTF("bad packet header chain");
850 		espstat_inc(esps_hdrops);
851 		m_freemp(mp);
852 		return IPPROTO_DONE;
853 	}
854 
855 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
856 }
857 
858 /* IPv4 IPCOMP wrapper */
859 int
860 ipcomp46_input(struct mbuf **mp, int *offp, int proto, int af)
861 {
862 	int protoff;
863 
864 	if (
865 #if NPF > 0
866 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
867 #endif
868 	    !ipcomp_enable)
869 		return ipsec_input_disabled(mp, offp, proto, af);
870 
871 	protoff = ipsec_protoff(*mp, *offp, af);
872 	if (protoff < 0) {
873 		DPRINTF("bad packet header chain");
874 		ipcompstat_inc(ipcomps_hdrops);
875 		m_freemp(mp);
876 		return IPPROTO_DONE;
877 	}
878 
879 	return ipsec_common_input(mp, *offp, protoff, af, proto, 0);
880 }
881 
882 void
883 ipsec_set_mtu(struct tdb *tdbp, u_int32_t mtu)
884 {
885 	ssize_t adjust;
886 
887 	NET_ASSERT_LOCKED();
888 
889 	/* Walk the chain backwards to the first tdb */
890 	for (; tdbp != NULL; tdbp = tdbp->tdb_inext) {
891 		if (tdbp->tdb_flags & TDBF_INVALID ||
892 		    (adjust = ipsec_hdrsz(tdbp)) == -1)
893 			return;
894 
895 		mtu -= adjust;
896 
897 		/* Store adjusted MTU in tdb */
898 		tdbp->tdb_mtu = mtu;
899 		tdbp->tdb_mtutimeout = gettime() + ip_mtudisc_timeout;
900 		DPRINTF("spi %08x mtu %d adjust %ld",
901 		    ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust);
902 	}
903 }
904 
905 void
906 ipsec_common_ctlinput(u_int rdomain, int cmd, struct sockaddr *sa,
907     void *v, int proto)
908 {
909 	struct ip *ip = v;
910 
911 	if (cmd == PRC_MSGSIZE && ip && ip_mtudisc && ip->ip_v == 4) {
912 		struct tdb *tdbp;
913 		struct sockaddr_in dst;
914 		struct icmp *icp;
915 		int hlen = ip->ip_hl << 2;
916 		u_int32_t spi, mtu;
917 
918 		/* Find the right MTU. */
919 		icp = (struct icmp *)((caddr_t) ip -
920 		    offsetof(struct icmp, icmp_ip));
921 		mtu = ntohs(icp->icmp_nextmtu);
922 
923 		/*
924 		 * Ignore the packet, if we do not receive a MTU
925 		 * or the MTU is too small to be acceptable.
926 		 */
927 		if (mtu < 296)
928 			return;
929 
930 		memset(&dst, 0, sizeof(struct sockaddr_in));
931 		dst.sin_family = AF_INET;
932 		dst.sin_len = sizeof(struct sockaddr_in);
933 		dst.sin_addr.s_addr = ip->ip_dst.s_addr;
934 
935 		memcpy(&spi, (caddr_t)ip + hlen, sizeof(u_int32_t));
936 
937 		tdbp = gettdb_rev(rdomain, spi, (union sockaddr_union *)&dst,
938 		    proto);
939 		ipsec_set_mtu(tdbp, mtu);
940 	}
941 }
942 
943 void
944 udpencap_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
945 {
946 	struct ip *ip = v;
947 	struct tdb *tdbp;
948 	struct icmp *icp;
949 	u_int32_t mtu;
950 	struct sockaddr_in dst, src;
951 	union sockaddr_union *su_dst, *su_src;
952 
953 	NET_ASSERT_LOCKED();
954 
955 	icp = (struct icmp *)((caddr_t) ip - offsetof(struct icmp, icmp_ip));
956 	mtu = ntohs(icp->icmp_nextmtu);
957 
958 	/*
959 	 * Ignore the packet, if we do not receive a MTU
960 	 * or the MTU is too small to be acceptable.
961 	 */
962 	if (mtu < 296)
963 		return;
964 
965 	memset(&dst, 0, sizeof(dst));
966 	dst.sin_family = AF_INET;
967 	dst.sin_len = sizeof(struct sockaddr_in);
968 	dst.sin_addr.s_addr = ip->ip_dst.s_addr;
969 	su_dst = (union sockaddr_union *)&dst;
970 	memset(&src, 0, sizeof(src));
971 	src.sin_family = AF_INET;
972 	src.sin_len = sizeof(struct sockaddr_in);
973 	src.sin_addr.s_addr = ip->ip_src.s_addr;
974 	su_src = (union sockaddr_union *)&src;
975 
976 	tdbp = gettdbbysrcdst_rev(rdomain, 0, su_src, su_dst,
977 	    IPPROTO_ESP);
978 
979 	for (; tdbp != NULL; tdbp = tdbp->tdb_snext) {
980 		if (tdbp->tdb_sproto == IPPROTO_ESP &&
981 		    ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_UDPENCAP)) ==
982 		    TDBF_UDPENCAP) &&
983 		    !memcmp(&tdbp->tdb_dst, &dst, su_dst->sa.sa_len) &&
984 		    !memcmp(&tdbp->tdb_src, &src, su_src->sa.sa_len)) {
985 			ipsec_set_mtu(tdbp, mtu);
986 		}
987 	}
988 }
989 
990 void
991 esp4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
992 {
993 	if (sa->sa_family != AF_INET ||
994 	    sa->sa_len != sizeof(struct sockaddr_in))
995 		return;
996 
997 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_ESP);
998 }
999 
1000 /* Find the offset of the next protocol field in the previous header. */
1001 int
1002 ipsec_protoff(struct mbuf *m, int off, int af)
1003 {
1004 	struct ip6_ext ip6e;
1005 	int protoff, nxt, l;
1006 
1007 	switch (af) {
1008 	case AF_INET:
1009 		return offsetof(struct ip, ip_p);
1010 #ifdef INET6
1011 	case AF_INET6:
1012 		break;
1013 #endif
1014 	default:
1015 		unhandled_af(af);
1016 	}
1017 
1018 	if (off < sizeof(struct ip6_hdr))
1019 		return -1;
1020 
1021 	if (off == sizeof(struct ip6_hdr))
1022 		return offsetof(struct ip6_hdr, ip6_nxt);
1023 
1024 	/* Chase down the header chain... */
1025 	protoff = sizeof(struct ip6_hdr);
1026 	nxt = (mtod(m, struct ip6_hdr *))->ip6_nxt;
1027 	l = 0;
1028 
1029 	do {
1030 		protoff += l;
1031 		m_copydata(m, protoff, sizeof(ip6e),
1032 		    (caddr_t) &ip6e);
1033 
1034 		if (nxt == IPPROTO_AH)
1035 			l = (ip6e.ip6e_len + 2) << 2;
1036 		else
1037 			l = (ip6e.ip6e_len + 1) << 3;
1038 #ifdef DIAGNOSTIC
1039 		if (l <= 0)
1040 			panic("ah6_input: l went zero or negative");
1041 #endif
1042 
1043 		nxt = ip6e.ip6e_nxt;
1044 	} while (protoff + l < off);
1045 
1046 	/* Malformed packet check */
1047 	if (protoff + l != off)
1048 		return -1;
1049 
1050 	protoff += offsetof(struct ip6_ext, ip6e_nxt);
1051 	return protoff;
1052 }
1053 
1054 int
1055 ipsec_forward_check(struct mbuf *m, int hlen, int af)
1056 {
1057 	struct tdb *tdb;
1058 	struct tdb_ident *tdbi;
1059 	struct m_tag *mtag;
1060 	int error = 0;
1061 
1062 	/*
1063 	 * IPsec policy check for forwarded packets. Look at
1064 	 * inner-most IPsec SA used.
1065 	 */
1066 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1067 	if (mtag != NULL) {
1068 		tdbi = (struct tdb_ident *)(mtag + 1);
1069 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst, tdbi->proto);
1070 	} else
1071 		tdb = NULL;
1072 	ipsp_spd_lookup(m, af, hlen, &error, IPSP_DIRECTION_IN, tdb, NULL, 0);
1073 
1074 	return error;
1075 }
1076 
1077 int
1078 ipsec_local_check(struct mbuf *m, int hlen, int proto, int af)
1079 {
1080 	struct tdb *tdb;
1081 	struct tdb_ident *tdbi;
1082 	struct m_tag *mtag;
1083 	int error = 0;
1084 
1085 	/*
1086 	 * If it's a protected packet for us, skip the policy check.
1087 	 * That's because we really only care about the properties of
1088 	 * the protected packet, and not the intermediate versions.
1089 	 * While this is not the most paranoid setting, it allows
1090 	 * some flexibility in handling nested tunnels (in setting up
1091 	 * the policies).
1092 	 */
1093 	if ((proto == IPPROTO_ESP) || (proto == IPPROTO_AH) ||
1094 	    (proto == IPPROTO_IPCOMP))
1095 		return 0;
1096 
1097 	/*
1098 	 * If the protected packet was tunneled, then we need to
1099 	 * verify the protected packet's information, not the
1100 	 * external headers. Thus, skip the policy lookup for the
1101 	 * external packet, and keep the IPsec information linked on
1102 	 * the packet header (the encapsulation routines know how
1103 	 * to deal with that).
1104 	 */
1105 	if ((proto == IPPROTO_IPV4) || (proto == IPPROTO_IPV6))
1106 		return 0;
1107 
1108 	/*
1109 	 * When processing IPv6 header chains, do not look at the
1110 	 * outer header.  The inner protocol is relevant and will
1111 	 * be checked by the local delivery loop later.
1112 	 */
1113 	if ((af == AF_INET6) && ((proto == IPPROTO_DSTOPTS) ||
1114 	    (proto == IPPROTO_ROUTING) || (proto == IPPROTO_FRAGMENT)))
1115 		return 0;
1116 
1117 	/*
1118 	 * If the protected packet is TCP or UDP, we'll do the
1119 	 * policy check in the respective input routine, so we can
1120 	 * check for bypass sockets.
1121 	 */
1122 	if ((proto == IPPROTO_TCP) || (proto == IPPROTO_UDP))
1123 		return 0;
1124 
1125 	/*
1126 	 * IPsec policy check for local-delivery packets. Look at the
1127 	 * inner-most SA that protected the packet. This is in fact
1128 	 * a bit too restrictive (it could end up causing packets to
1129 	 * be dropped that semantically follow the policy, e.g., in
1130 	 * certain SA-bundle configurations); but the alternative is
1131 	 * very complicated (and requires keeping track of what
1132 	 * kinds of tunneling headers have been seen in-between the
1133 	 * IPsec headers), and I don't think we lose much functionality
1134 	 * that's needed in the real world (who uses bundles anyway ?).
1135 	 */
1136 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1137 	if (mtag) {
1138 		tdbi = (struct tdb_ident *)(mtag + 1);
1139 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst,
1140 		    tdbi->proto);
1141 	} else
1142 		tdb = NULL;
1143 	ipsp_spd_lookup(m, af, hlen, &error, IPSP_DIRECTION_IN,
1144 	    tdb, NULL, 0);
1145 
1146 	return error;
1147 }
1148