xref: /openbsd-src/sys/netinet/ipsec_input.c (revision 8ead0783a05eee83ab02af2c7b14b10fbcdce47d)
1 /*	$OpenBSD: ipsec_input.c,v 1.157 2017/10/09 08:35:38 mpi Exp $	*/
2 /*
3  * The authors of this code are John Ioannidis (ji@tla.org),
4  * Angelos D. Keromytis (kermit@csd.uch.gr) and
5  * Niels Provos (provos@physnet.uni-hamburg.de).
6  *
7  * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
8  * in November 1995.
9  *
10  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
11  * by Angelos D. Keromytis.
12  *
13  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
14  * and Niels Provos.
15  *
16  * Additional features in 1999 by Angelos D. Keromytis.
17  *
18  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
19  * Angelos D. Keromytis and Niels Provos.
20  * Copyright (c) 2001, Angelos D. Keromytis.
21  *
22  * Permission to use, copy, and modify this software with or without fee
23  * is hereby granted, provided that this entire notice is included in
24  * all copies of any software which is or includes a copy or
25  * modification of this software.
26  * You may use this code under the GNU public license if you so wish. Please
27  * contribute changes back to the authors under this freer than GPL license
28  * so that we may further the use of strong encryption without limitations to
29  * all.
30  *
31  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
32  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
33  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
34  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
35  * PURPOSE.
36  */
37 
38 #include "pf.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/protosw.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
46 #include <sys/kernel.h>
47 #include <sys/timeout.h>
48 
49 #include <net/if.h>
50 #include <net/if_var.h>
51 #include <net/netisr.h>
52 #include <net/bpf.h>
53 #include <net/route.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_icmp.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 
62 #if NPF > 0
63 #include <net/pfvar.h>
64 #endif
65 
66 #ifdef INET6
67 #include <netinet6/in6_var.h>
68 #include <netinet/ip6.h>
69 #include <netinet6/ip6_var.h>
70 #include <netinet6/ip6protosw.h>
71 #endif /* INET6 */
72 
73 #include <netinet/ip_ipsp.h>
74 #include <netinet/ip_esp.h>
75 #include <netinet/ip_ah.h>
76 #include <netinet/ip_ipcomp.h>
77 
78 #include <net/if_enc.h>
79 
80 #include "bpfilter.h"
81 
82 void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
83 
84 #ifdef ENCDEBUG
85 #define DPRINTF(x)	if (encdebug) printf x
86 #else
87 #define DPRINTF(x)
88 #endif
89 
90 /* sysctl variables */
91 int esp_enable = 1;
92 int ah_enable = 1;
93 int ipcomp_enable = 0;
94 
95 int *espctl_vars[ESPCTL_MAXID] = ESPCTL_VARS;
96 int *ahctl_vars[AHCTL_MAXID] = AHCTL_VARS;
97 int *ipcompctl_vars[IPCOMPCTL_MAXID] = IPCOMPCTL_VARS;
98 
99 /*
100  * ipsec_common_input() gets called when we receive an IPsec-protected packet
101  * in IPv4 or IPv6. All it does is find the right TDB and call the appropriate
102  * transform. The callback takes care of further processing (like ingress
103  * filtering).
104  */
105 int
106 ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto,
107     int udpencap)
108 {
109 #define IPSEC_ISTAT(x,y,z) (sproto == IPPROTO_ESP ? (x)++ : \
110 			    sproto == IPPROTO_AH ? (y)++ : (z)++)
111 
112 	union sockaddr_union dst_address;
113 	struct tdb *tdbp;
114 	struct ifnet *encif;
115 	u_int32_t spi;
116 	u_int16_t cpi;
117 	int error;
118 #ifdef ENCDEBUG
119 	char buf[INET6_ADDRSTRLEN];
120 #endif
121 
122 	NET_ASSERT_LOCKED();
123 
124 	IPSEC_ISTAT(espstat.esps_input, ahstat.ahs_input,
125 	    ipcompstat.ipcomps_input);
126 
127 	if (m == NULL) {
128 		DPRINTF(("ipsec_common_input(): NULL packet received\n"));
129 		IPSEC_ISTAT(espstat.esps_hdrops, ahstat.ahs_hdrops,
130 		    ipcompstat.ipcomps_hdrops);
131 		return EINVAL;
132 	}
133 
134 	if ((sproto == IPPROTO_ESP && !esp_enable) ||
135 	    (sproto == IPPROTO_AH && !ah_enable) ||
136 #if NPF > 0
137 	    (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
138 #endif
139 	    (sproto == IPPROTO_IPCOMP && !ipcomp_enable)) {
140 		switch (af) {
141 		case AF_INET:
142 			rip_input(&m, &skip, sproto, af);
143 			break;
144 #ifdef INET6
145 		case AF_INET6:
146 			rip6_input(&m, &skip, sproto, af);
147 			break;
148 #endif /* INET6 */
149 		default:
150 			DPRINTF(("ipsec_common_input(): unsupported protocol "
151 			    "family %d\n", af));
152 			m_freem(m);
153 			IPSEC_ISTAT(espstat.esps_nopf, ahstat.ahs_nopf,
154 			    ipcompstat.ipcomps_nopf);
155 			return EPFNOSUPPORT;
156 		}
157 		return 0;
158 	}
159 	if ((sproto == IPPROTO_IPCOMP) && (m->m_flags & M_COMP)) {
160 		m_freem(m);
161 		ipcompstat.ipcomps_pdrops++;
162 		DPRINTF(("ipsec_common_input(): repeated decompression\n"));
163 		return EINVAL;
164 	}
165 
166 	if (m->m_pkthdr.len - skip < 2 * sizeof(u_int32_t)) {
167 		m_freem(m);
168 		IPSEC_ISTAT(espstat.esps_hdrops, ahstat.ahs_hdrops,
169 		    ipcompstat.ipcomps_hdrops);
170 		DPRINTF(("ipsec_common_input(): packet too small\n"));
171 		return EINVAL;
172 	}
173 
174 	/* Retrieve the SPI from the relevant IPsec header */
175 	switch (sproto) {
176 	case IPPROTO_ESP:
177 		m_copydata(m, skip, sizeof(u_int32_t), (caddr_t) &spi);
178 		break;
179 	case IPPROTO_AH:
180 		m_copydata(m, skip + sizeof(u_int32_t), sizeof(u_int32_t),
181 		    (caddr_t) &spi);
182 		break;
183 	case IPPROTO_IPCOMP:
184 		m_copydata(m, skip + sizeof(u_int16_t), sizeof(u_int16_t),
185 		    (caddr_t) &cpi);
186 		spi = ntohl(htons(cpi));
187 		break;
188 	default:
189 		panic("%s: unknown/unsupported security protocol %d",
190 		    __func__, sproto);
191 	}
192 
193 	/*
194 	 * Find tunnel control block and (indirectly) call the appropriate
195 	 * kernel crypto routine. The resulting mbuf chain is a valid
196 	 * IP packet ready to go through input processing.
197 	 */
198 
199 	memset(&dst_address, 0, sizeof(dst_address));
200 	dst_address.sa.sa_family = af;
201 
202 	switch (af) {
203 	case AF_INET:
204 		dst_address.sin.sin_len = sizeof(struct sockaddr_in);
205 		m_copydata(m, offsetof(struct ip, ip_dst),
206 		    sizeof(struct in_addr),
207 		    (caddr_t) &(dst_address.sin.sin_addr));
208 		break;
209 
210 #ifdef INET6
211 	case AF_INET6:
212 		dst_address.sin6.sin6_len = sizeof(struct sockaddr_in6);
213 		m_copydata(m, offsetof(struct ip6_hdr, ip6_dst),
214 		    sizeof(struct in6_addr),
215 		    (caddr_t) &(dst_address.sin6.sin6_addr));
216 		in6_recoverscope(&dst_address.sin6,
217 		    &dst_address.sin6.sin6_addr);
218 		break;
219 #endif /* INET6 */
220 
221 	default:
222 		DPRINTF(("ipsec_common_input(): unsupported protocol "
223 		    "family %d\n", af));
224 		m_freem(m);
225 		IPSEC_ISTAT(espstat.esps_nopf, ahstat.ahs_nopf,
226 		    ipcompstat.ipcomps_nopf);
227 		return EPFNOSUPPORT;
228 	}
229 
230 	tdbp = gettdb(rtable_l2(m->m_pkthdr.ph_rtableid),
231 	    spi, &dst_address, sproto);
232 	if (tdbp == NULL) {
233 		DPRINTF(("ipsec_common_input(): could not find SA for "
234 		    "packet to %s, spi %08x\n",
235 		    ipsp_address(&dst_address, buf, sizeof(buf)), ntohl(spi)));
236 		m_freem(m);
237 		IPSEC_ISTAT(espstat.esps_notdb, ahstat.ahs_notdb,
238 		    ipcompstat.ipcomps_notdb);
239 		return ENOENT;
240 	}
241 
242 	if (tdbp->tdb_flags & TDBF_INVALID) {
243 		DPRINTF(("ipsec_common_input(): attempted to use invalid "
244 		    "SA %s/%08x/%u\n", ipsp_address(&dst_address, buf,
245 		    sizeof(buf)), ntohl(spi), tdbp->tdb_sproto));
246 		m_freem(m);
247 		IPSEC_ISTAT(espstat.esps_invalid, ahstat.ahs_invalid,
248 		    ipcompstat.ipcomps_invalid);
249 		return EINVAL;
250 	}
251 
252 	if (udpencap && !(tdbp->tdb_flags & TDBF_UDPENCAP)) {
253 		DPRINTF(("ipsec_common_input(): attempted to use non-udpencap "
254 		    "SA %s/%08x/%u\n", ipsp_address(&dst_address, buf,
255 		    sizeof(buf)), ntohl(spi), tdbp->tdb_sproto));
256 		m_freem(m);
257 		espstat.esps_udpinval++;
258 		return EINVAL;
259 	}
260 
261 	if (!udpencap && (tdbp->tdb_flags & TDBF_UDPENCAP)) {
262 		DPRINTF(("ipsec_common_input(): attempted to use udpencap "
263 		    "SA %s/%08x/%u\n", ipsp_address(&dst_address, buf,
264 		    sizeof(buf)), ntohl(spi), tdbp->tdb_sproto));
265 		m_freem(m);
266 		espstat.esps_udpneeded++;
267 		return EINVAL;
268 	}
269 
270 	if (tdbp->tdb_xform == NULL) {
271 		DPRINTF(("ipsec_common_input(): attempted to use uninitialized "
272 		    "SA %s/%08x/%u\n", ipsp_address(&dst_address, buf,
273 		    sizeof(buf)), ntohl(spi), tdbp->tdb_sproto));
274 		m_freem(m);
275 		IPSEC_ISTAT(espstat.esps_noxform, ahstat.ahs_noxform,
276 		    ipcompstat.ipcomps_noxform);
277 		return ENXIO;
278 	}
279 
280 	if (sproto != IPPROTO_IPCOMP) {
281 		if ((encif = enc_getif(tdbp->tdb_rdomain,
282 		    tdbp->tdb_tap)) == NULL) {
283 			DPRINTF(("ipsec_common_input(): "
284 			    "no enc%u interface for SA %s/%08x/%u\n",
285 			    tdbp->tdb_tap, ipsp_address(&dst_address, buf,
286 			    sizeof(buf)), ntohl(spi), tdbp->tdb_sproto));
287 			m_freem(m);
288 
289 			IPSEC_ISTAT(espstat.esps_pdrops,
290 			    ahstat.ahs_pdrops,
291 			    ipcompstat.ipcomps_pdrops);
292 			return EACCES;
293 		}
294 
295 		/* XXX This conflicts with the scoped nature of IPv6 */
296 		m->m_pkthdr.ph_ifidx = encif->if_index;
297 	}
298 
299 	/* Register first use, setup expiration timer. */
300 	if (tdbp->tdb_first_use == 0) {
301 		tdbp->tdb_first_use = time_second;
302 		if (tdbp->tdb_flags & TDBF_FIRSTUSE)
303 			timeout_add_sec(&tdbp->tdb_first_tmo,
304 			    tdbp->tdb_exp_first_use);
305 		if (tdbp->tdb_flags & TDBF_SOFT_FIRSTUSE)
306 			timeout_add_sec(&tdbp->tdb_sfirst_tmo,
307 			    tdbp->tdb_soft_first_use);
308 	}
309 
310 	/*
311 	 * Call appropriate transform and return -- callback takes care of
312 	 * everything else.
313 	 */
314 	error = (*(tdbp->tdb_xform->xf_input))(m, tdbp, skip, protoff);
315 	return error;
316 }
317 
318 /*
319  * IPsec input callback, called by the transform callback. Takes care of
320  * filtering and other sanity checks on the processed packet.
321  */
322 void
323 ipsec_common_input_cb(struct mbuf *m, struct tdb *tdbp, int skip, int protoff)
324 {
325 	int af, sproto;
326 	u_int8_t prot;
327 
328 #if NBPFILTER > 0
329 	struct ifnet *encif;
330 #endif
331 
332 	struct ip *ip, ipn;
333 
334 #ifdef INET6
335 	struct ip6_hdr *ip6, ip6n;
336 #endif /* INET6 */
337 	struct m_tag *mtag;
338 	struct tdb_ident *tdbi;
339 
340 #ifdef ENCDEBUG
341 	char buf[INET6_ADDRSTRLEN];
342 #endif
343 
344 	af = tdbp->tdb_dst.sa.sa_family;
345 	sproto = tdbp->tdb_sproto;
346 
347 	tdbp->tdb_last_used = time_second;
348 
349 	/* Sanity check */
350 	if (m == NULL) {
351 		/* The called routine will print a message if necessary */
352 		IPSEC_ISTAT(espstat.esps_badkcr, ahstat.ahs_badkcr,
353 		    ipcompstat.ipcomps_badkcr);
354 		return;
355 	}
356 
357 	/* Fix IPv4 header */
358 	if (af == AF_INET) {
359 		if ((m->m_len < skip) && ((m = m_pullup(m, skip)) == NULL)) {
360 			DPRINTF(("ipsec_common_input_cb(): processing failed "
361 			    "for SA %s/%08x\n", ipsp_address(&tdbp->tdb_dst,
362 			    buf, sizeof(buf)), ntohl(tdbp->tdb_spi)));
363 			IPSEC_ISTAT(espstat.esps_hdrops, ahstat.ahs_hdrops,
364 			    ipcompstat.ipcomps_hdrops);
365 			return;
366 		}
367 
368 		ip = mtod(m, struct ip *);
369 		ip->ip_len = htons(m->m_pkthdr.len);
370 		ip->ip_sum = 0;
371 		ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
372 		prot = ip->ip_p;
373 
374 		/* IP-in-IP encapsulation */
375 		if (prot == IPPROTO_IPIP) {
376 			if (m->m_pkthdr.len - skip < sizeof(struct ip)) {
377 				m_freem(m);
378 				IPSEC_ISTAT(espstat.esps_hdrops,
379 				    ahstat.ahs_hdrops,
380 				    ipcompstat.ipcomps_hdrops);
381 				return;
382 			}
383 			/* ipn will now contain the inner IPv4 header */
384 			m_copydata(m, skip, sizeof(struct ip),
385 			    (caddr_t) &ipn);
386 		}
387 
388 #ifdef INET6
389 		/* IPv6-in-IP encapsulation. */
390 		if (prot == IPPROTO_IPV6) {
391 			if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) {
392 				m_freem(m);
393 				IPSEC_ISTAT(espstat.esps_hdrops,
394 				    ahstat.ahs_hdrops,
395 				    ipcompstat.ipcomps_hdrops);
396 				return;
397 			}
398 			/* ip6n will now contain the inner IPv6 header. */
399 			m_copydata(m, skip, sizeof(struct ip6_hdr),
400 			    (caddr_t) &ip6n);
401 		}
402 #endif /* INET6 */
403 	}
404 
405 #ifdef INET6
406 	/* Fix IPv6 header */
407 	if (af == AF_INET6)
408 	{
409 		if (m->m_len < sizeof(struct ip6_hdr) &&
410 		    (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
411 
412 			DPRINTF(("ipsec_common_input_cb(): processing failed "
413 			    "for SA %s/%08x\n", ipsp_address(&tdbp->tdb_dst,
414 			    buf, sizeof(buf)), ntohl(tdbp->tdb_spi)));
415 
416 			IPSEC_ISTAT(espstat.esps_hdrops, ahstat.ahs_hdrops,
417 			    ipcompstat.ipcomps_hdrops);
418 			return;
419 		}
420 
421 		ip6 = mtod(m, struct ip6_hdr *);
422 		ip6->ip6_plen = htons(m->m_pkthdr.len - skip);
423 
424 		/* Save protocol */
425 		m_copydata(m, protoff, 1, (caddr_t) &prot);
426 
427 		/* IP-in-IP encapsulation */
428 		if (prot == IPPROTO_IPIP) {
429 			if (m->m_pkthdr.len - skip < sizeof(struct ip)) {
430 				m_freem(m);
431 				IPSEC_ISTAT(espstat.esps_hdrops,
432 				    ahstat.ahs_hdrops,
433 				    ipcompstat.ipcomps_hdrops);
434 				return;
435 			}
436 			/* ipn will now contain the inner IPv4 header */
437 			m_copydata(m, skip, sizeof(struct ip), (caddr_t) &ipn);
438 		}
439 
440 		/* IPv6-in-IP encapsulation */
441 		if (prot == IPPROTO_IPV6) {
442 			if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) {
443 				m_freem(m);
444 				IPSEC_ISTAT(espstat.esps_hdrops,
445 				    ahstat.ahs_hdrops,
446 				    ipcompstat.ipcomps_hdrops);
447 				return;
448 			}
449 			/* ip6n will now contain the inner IPv6 header. */
450 			m_copydata(m, skip, sizeof(struct ip6_hdr),
451 			    (caddr_t) &ip6n);
452 		}
453 	}
454 #endif /* INET6 */
455 
456 	/*
457 	 * Fix TCP/UDP checksum of UDP encapsulated transport mode ESP packet.
458 	 * (RFC3948 3.1.2)
459 	 */
460 	if ((af == AF_INET || af == AF_INET6) &&
461 	    (tdbp->tdb_flags & TDBF_UDPENCAP) &&
462 	    (tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
463 		u_int16_t cksum;
464 
465 		switch (prot) {
466 		case IPPROTO_UDP:
467 			if (m->m_pkthdr.len < skip + sizeof(struct udphdr)) {
468 				m_freem(m);
469 				IPSEC_ISTAT(espstat.esps_hdrops,
470 				    ahstat.ahs_hdrops,
471 				    ipcompstat.ipcomps_hdrops);
472 				return;
473 			}
474 			cksum = 0;
475 			m_copyback(m, skip + offsetof(struct udphdr, uh_sum),
476 			    sizeof(cksum), &cksum, M_NOWAIT);
477 #ifdef INET6
478 			if (af == AF_INET6) {
479 				cksum = in6_cksum(m, IPPROTO_UDP, skip,
480 				    m->m_pkthdr.len - skip);
481 				m_copyback(m, skip + offsetof(struct udphdr,
482 				    uh_sum), sizeof(cksum), &cksum, M_NOWAIT);
483 			}
484 #endif
485 			break;
486 		case IPPROTO_TCP:
487 			if (m->m_pkthdr.len < skip + sizeof(struct tcphdr)) {
488 				m_freem(m);
489 				IPSEC_ISTAT(espstat.esps_hdrops,
490 				    ahstat.ahs_hdrops,
491 				    ipcompstat.ipcomps_hdrops);
492 				return;
493 			}
494 			cksum = 0;
495 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
496 			    sizeof(cksum), &cksum, M_NOWAIT);
497 			if (af == AF_INET)
498 				cksum = in4_cksum(m, IPPROTO_TCP, skip,
499 				    m->m_pkthdr.len - skip);
500 #ifdef INET6
501 			else if (af == AF_INET6)
502 				cksum = in6_cksum(m, IPPROTO_TCP, skip,
503 				    m->m_pkthdr.len - skip);
504 #endif
505 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
506 			    sizeof(cksum), &cksum, M_NOWAIT);
507 			break;
508 		}
509 	}
510 
511 	/*
512 	 * Record what we've done to the packet (under what SA it was
513 	 * processed).
514 	 */
515 	if (tdbp->tdb_sproto != IPPROTO_IPCOMP) {
516 		mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE,
517 		    sizeof(struct tdb_ident), M_NOWAIT);
518 		if (mtag == NULL) {
519 			m_freem(m);
520 			DPRINTF(("ipsec_common_input_cb(): failed to "
521 			    "get tag\n"));
522 			IPSEC_ISTAT(espstat.esps_hdrops, ahstat.ahs_hdrops,
523 			    ipcompstat.ipcomps_hdrops);
524 			return;
525 		}
526 
527 		tdbi = (struct tdb_ident *)(mtag + 1);
528 		tdbi->dst = tdbp->tdb_dst;
529 		tdbi->proto = tdbp->tdb_sproto;
530 		tdbi->spi = tdbp->tdb_spi;
531 		tdbi->rdomain = tdbp->tdb_rdomain;
532 
533 		m_tag_prepend(m, mtag);
534 	}
535 
536 	switch (sproto) {
537 	case IPPROTO_ESP:
538 		/* Packet is confidential ? */
539 		if (tdbp->tdb_encalgxform)
540 			m->m_flags |= M_CONF;
541 
542 		/* Check if we had authenticated ESP. */
543 		if (tdbp->tdb_authalgxform)
544 			m->m_flags |= M_AUTH;
545 		break;
546 	case IPPROTO_AH:
547 		m->m_flags |= M_AUTH;
548 		break;
549 	case IPPROTO_IPCOMP:
550 		m->m_flags |= M_COMP;
551 		break;
552 	default:
553 		panic("%s: unknown/unsupported security protocol %d",
554 		    __func__, sproto);
555 	}
556 
557 #if NPF > 0
558 	/* Add pf tag if requested. */
559 	pf_tag_packet(m, tdbp->tdb_tag, -1);
560 	pf_pkt_addr_changed(m);
561 #endif
562 
563 	if (tdbp->tdb_flags & TDBF_TUNNELING)
564 		m->m_flags |= M_TUNNEL;
565 
566 #if NBPFILTER > 0
567 	if ((encif = enc_getif(tdbp->tdb_rdomain, tdbp->tdb_tap)) != NULL) {
568 		encif->if_ipackets++;
569 		encif->if_ibytes += m->m_pkthdr.len;
570 
571 		if (encif->if_bpf) {
572 			struct enchdr hdr;
573 
574 			hdr.af = af;
575 			hdr.spi = tdbp->tdb_spi;
576 			hdr.flags = m->m_flags & (M_AUTH|M_CONF);
577 
578 			bpf_mtap_hdr(encif->if_bpf, (char *)&hdr,
579 			    ENC_HDRLEN, m, BPF_DIRECTION_IN, NULL);
580 		}
581 	}
582 #endif
583 
584 #if NPF > 0
585 	/*
586 	 * The ip_deliver() shortcut avoids running through ip_input() with the
587 	 * same IP header twice.  Packets in transport mode have to be be
588 	 * passed to pf explicitly.  In tunnel mode the inner IP header will
589 	 * run through ip_input() and pf anyway.
590 	 */
591 	if ((tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
592 		struct ifnet *ifp;
593 
594 		/* This is the enc0 interface unless for ipcomp. */
595 		if ((ifp = if_get(m->m_pkthdr.ph_ifidx)) == NULL) {
596 			m_freem(m);
597 			return;
598 		}
599 		if (pf_test(af, PF_IN, ifp, &m) != PF_PASS) {
600 			if_put(ifp);
601 			m_freem(m);
602 			return;
603 		}
604 		if_put(ifp);
605 		if (m == NULL)
606 			return;
607 	}
608 #endif
609 	/* Call the appropriate IPsec transform callback. */
610 	ip_deliver(&m, &skip, prot, af);
611 #undef IPSEC_ISTAT
612 }
613 
614 int
615 esp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
616     size_t newlen)
617 {
618 	int error;
619 
620 	/* All sysctl names at this level are terminal. */
621 	if (namelen != 1)
622 		return (ENOTDIR);
623 
624 	switch (name[0]) {
625 	case ESPCTL_STATS:
626 		if (newp != NULL)
627 			return (EPERM);
628 		NET_LOCK();
629 		error = sysctl_struct(oldp, oldlenp, newp, newlen,
630 		    &espstat, sizeof(espstat));
631 		NET_UNLOCK();
632 		return (error);
633 	default:
634 		if (name[0] < ESPCTL_MAXID) {
635 			NET_LOCK();
636 			error = sysctl_int_arr(espctl_vars, name, namelen,
637 			    oldp, oldlenp, newp, newlen);
638 			NET_UNLOCK();
639 			return (error);
640 		}
641 		return (ENOPROTOOPT);
642 	}
643 }
644 
645 int
646 ah_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
647     size_t newlen)
648 {
649 	int error;
650 
651 	/* All sysctl names at this level are terminal. */
652 	if (namelen != 1)
653 		return (ENOTDIR);
654 
655 	switch (name[0]) {
656 	case AHCTL_STATS:
657 		if (newp != NULL)
658 			return (EPERM);
659 		NET_LOCK();
660 		error = sysctl_struct(oldp, oldlenp, newp, newlen,
661 		    &ahstat, sizeof(ahstat));
662 		NET_UNLOCK();
663 		return (error);
664 	default:
665 		if (name[0] < AHCTL_MAXID) {
666 			NET_LOCK();
667 			error = sysctl_int_arr(ahctl_vars, name, namelen,
668 			    oldp, oldlenp, newp, newlen);
669 			NET_UNLOCK();
670 			return (error);
671 		}
672 		return (ENOPROTOOPT);
673 	}
674 }
675 
676 int
677 ipcomp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
678     size_t newlen)
679 {
680 	int error;
681 
682 	/* All sysctl names at this level are terminal. */
683 	if (namelen != 1)
684 		return (ENOTDIR);
685 
686 	switch (name[0]) {
687 	case IPCOMPCTL_STATS:
688 		if (newp != NULL)
689 			return (EPERM);
690 		NET_LOCK();
691 		error = sysctl_struct(oldp, oldlenp, newp, newlen,
692 		    &ipcompstat, sizeof(ipcompstat));
693 		NET_UNLOCK();
694 		return (error);
695 	default:
696 		if (name[0] < IPCOMPCTL_MAXID) {
697 			NET_LOCK();
698 			error = sysctl_int_arr(ipcompctl_vars, name, namelen,
699 			    oldp, oldlenp, newp, newlen);
700 			NET_UNLOCK();
701 			return (error);
702 		}
703 		return (ENOPROTOOPT);
704 	}
705 }
706 
707 /* IPv4 AH wrapper. */
708 int
709 ah4_input(struct mbuf **mp, int *offp, int proto, int af)
710 {
711 	ipsec_common_input(*mp, *offp, offsetof(struct ip, ip_p), AF_INET,
712 	    proto, 0);
713 	return IPPROTO_DONE;
714 }
715 
716 /* XXX rdomain */
717 void
718 ah4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
719 {
720 	if (sa->sa_family != AF_INET ||
721 	    sa->sa_len != sizeof(struct sockaddr_in))
722 		return;
723 
724 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_AH);
725 }
726 
727 /* IPv4 ESP wrapper. */
728 int
729 esp4_input(struct mbuf **mp, int *offp, int proto, int af)
730 {
731 	ipsec_common_input(*mp, *offp, offsetof(struct ip, ip_p), AF_INET,
732 	    proto, 0);
733 	return IPPROTO_DONE;
734 }
735 
736 /* IPv4 IPCOMP wrapper */
737 int
738 ipcomp4_input(struct mbuf **mp, int *offp, int proto, int af)
739 {
740 	ipsec_common_input(*mp, *offp, offsetof(struct ip, ip_p), AF_INET,
741 	    proto, 0);
742 	return IPPROTO_DONE;
743 }
744 
745 void
746 ipsec_common_ctlinput(u_int rdomain, int cmd, struct sockaddr *sa,
747     void *v, int proto)
748 {
749 	struct ip *ip = v;
750 
751 	if (cmd == PRC_MSGSIZE && ip && ip_mtudisc && ip->ip_v == 4) {
752 		struct tdb *tdbp;
753 		struct sockaddr_in dst;
754 		struct icmp *icp;
755 		int hlen = ip->ip_hl << 2;
756 		u_int32_t spi, mtu;
757 		ssize_t adjust;
758 
759 		/* Find the right MTU. */
760 		icp = (struct icmp *)((caddr_t) ip -
761 		    offsetof(struct icmp, icmp_ip));
762 		mtu = ntohs(icp->icmp_nextmtu);
763 
764 		/*
765 		 * Ignore the packet, if we do not receive a MTU
766 		 * or the MTU is too small to be acceptable.
767 		 */
768 		if (mtu < 296)
769 			return;
770 
771 		memset(&dst, 0, sizeof(struct sockaddr_in));
772 		dst.sin_family = AF_INET;
773 		dst.sin_len = sizeof(struct sockaddr_in);
774 		dst.sin_addr.s_addr = ip->ip_dst.s_addr;
775 
776 		memcpy(&spi, (caddr_t)ip + hlen, sizeof(u_int32_t));
777 
778 		tdbp = gettdb(rdomain, spi, (union sockaddr_union *)&dst,
779 		    proto);
780 		if (tdbp == NULL || tdbp->tdb_flags & TDBF_INVALID)
781 			return;
782 
783 		/* Walk the chain backwards to the first tdb */
784 		for (; tdbp; tdbp = tdbp->tdb_inext) {
785 			if (tdbp->tdb_flags & TDBF_INVALID ||
786 			    (adjust = ipsec_hdrsz(tdbp)) == -1)
787 				return;
788 
789 			mtu -= adjust;
790 
791 			/* Store adjusted MTU in tdb */
792 			tdbp->tdb_mtu = mtu;
793 			tdbp->tdb_mtutimeout = time_second +
794 			    ip_mtudisc_timeout;
795 			DPRINTF(("ipsec_common_ctlinput: "
796 			    "spi %08x mtu %d adjust %ld\n",
797 			    ntohl(tdbp->tdb_spi), tdbp->tdb_mtu,
798 			    adjust));
799 		}
800 	}
801 }
802 
803 /* XXX rdomain */
804 void
805 udpencap_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
806 {
807 	struct ip *ip = v;
808 	struct tdb *tdbp;
809 	struct icmp *icp;
810 	u_int32_t mtu;
811 	ssize_t adjust;
812 	struct sockaddr_in dst, src;
813 	union sockaddr_union *su_dst, *su_src;
814 
815 	NET_ASSERT_LOCKED();
816 
817 	icp = (struct icmp *)((caddr_t) ip - offsetof(struct icmp, icmp_ip));
818 	mtu = ntohs(icp->icmp_nextmtu);
819 
820 	/*
821 	 * Ignore the packet, if we do not receive a MTU
822 	 * or the MTU is too small to be acceptable.
823 	 */
824 	if (mtu < 296)
825 		return;
826 
827 	memset(&dst, 0, sizeof(dst));
828 	dst.sin_family = AF_INET;
829 	dst.sin_len = sizeof(struct sockaddr_in);
830 	dst.sin_addr.s_addr = ip->ip_dst.s_addr;
831 	su_dst = (union sockaddr_union *)&dst;
832 	memset(&src, 0, sizeof(src));
833 	src.sin_family = AF_INET;
834 	src.sin_len = sizeof(struct sockaddr_in);
835 	src.sin_addr.s_addr = ip->ip_src.s_addr;
836 	su_src = (union sockaddr_union *)&src;
837 
838 	tdbp = gettdbbysrcdst(rdomain, 0, su_src, su_dst, IPPROTO_ESP);
839 
840 	for (; tdbp != NULL; tdbp = tdbp->tdb_snext) {
841 		if (tdbp->tdb_sproto == IPPROTO_ESP &&
842 		    ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_UDPENCAP)) ==
843 		    TDBF_UDPENCAP) &&
844 		    !memcmp(&tdbp->tdb_dst, &dst, su_dst->sa.sa_len) &&
845 		    !memcmp(&tdbp->tdb_src, &src, su_src->sa.sa_len)) {
846 			if ((adjust = ipsec_hdrsz(tdbp)) != -1) {
847 				/* Store adjusted MTU in tdb */
848 				tdbp->tdb_mtu = mtu - adjust;
849 				tdbp->tdb_mtutimeout = time_second +
850 				    ip_mtudisc_timeout;
851 				DPRINTF(("udpencap_ctlinput: "
852 				    "spi %08x mtu %d adjust %ld\n",
853 				    ntohl(tdbp->tdb_spi), tdbp->tdb_mtu,
854 				    adjust));
855 			}
856 		}
857 	}
858 }
859 
860 /* XXX rdomain */
861 void
862 esp4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
863 {
864 	if (sa->sa_family != AF_INET ||
865 	    sa->sa_len != sizeof(struct sockaddr_in))
866 		return;
867 
868 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_ESP);
869 }
870 
871 #ifdef INET6
872 /* IPv6 AH wrapper. */
873 int
874 ah6_input(struct mbuf **mp, int *offp, int proto, int af)
875 {
876 	int l = 0;
877 	int protoff, nxt;
878 	struct ip6_ext ip6e;
879 
880 	if (*offp < sizeof(struct ip6_hdr)) {
881 		DPRINTF(("ah6_input(): bad offset\n"));
882 		ahstat.ahs_hdrops++;
883 		m_freemp(mp);
884 		return IPPROTO_DONE;
885 	} else if (*offp == sizeof(struct ip6_hdr)) {
886 		protoff = offsetof(struct ip6_hdr, ip6_nxt);
887 	} else {
888 		/* Chase down the header chain... */
889 		protoff = sizeof(struct ip6_hdr);
890 		nxt = (mtod(*mp, struct ip6_hdr *))->ip6_nxt;
891 
892 		do {
893 			protoff += l;
894 			m_copydata(*mp, protoff, sizeof(ip6e),
895 			    (caddr_t) &ip6e);
896 
897 			if (nxt == IPPROTO_AH)
898 				l = (ip6e.ip6e_len + 2) << 2;
899 			else
900 				l = (ip6e.ip6e_len + 1) << 3;
901 #ifdef DIAGNOSTIC
902 			if (l <= 0)
903 				panic("ah6_input: l went zero or negative");
904 #endif
905 
906 			nxt = ip6e.ip6e_nxt;
907 		} while (protoff + l < *offp);
908 
909 		/* Malformed packet check */
910 		if (protoff + l != *offp) {
911 			DPRINTF(("ah6_input(): bad packet header chain\n"));
912 			ahstat.ahs_hdrops++;
913 			m_freemp(mp);
914 			return IPPROTO_DONE;
915 		}
916 		protoff += offsetof(struct ip6_ext, ip6e_nxt);
917 	}
918 	ipsec_common_input(*mp, *offp, protoff, AF_INET6, proto, 0);
919 	return IPPROTO_DONE;
920 }
921 
922 /* IPv6 ESP wrapper. */
923 int
924 esp6_input(struct mbuf **mp, int *offp, int proto, int af)
925 {
926 	int l = 0;
927 	int protoff, nxt;
928 	struct ip6_ext ip6e;
929 
930 	if (*offp < sizeof(struct ip6_hdr)) {
931 		DPRINTF(("esp6_input(): bad offset\n"));
932 		espstat.esps_hdrops++;
933 		m_freemp(mp);
934 		return IPPROTO_DONE;
935 	} else if (*offp == sizeof(struct ip6_hdr)) {
936 		protoff = offsetof(struct ip6_hdr, ip6_nxt);
937 	} else {
938 		/* Chase down the header chain... */
939 		protoff = sizeof(struct ip6_hdr);
940 		nxt = (mtod(*mp, struct ip6_hdr *))->ip6_nxt;
941 
942 		do {
943 			protoff += l;
944 			m_copydata(*mp, protoff, sizeof(ip6e),
945 			    (caddr_t) &ip6e);
946 
947 			if (nxt == IPPROTO_AH)
948 				l = (ip6e.ip6e_len + 2) << 2;
949 			else
950 				l = (ip6e.ip6e_len + 1) << 3;
951 #ifdef DIAGNOSTIC
952 			if (l <= 0)
953 				panic("esp6_input: l went zero or negative");
954 #endif
955 
956 			nxt = ip6e.ip6e_nxt;
957 		} while (protoff + l < *offp);
958 
959 		/* Malformed packet check */
960 		if (protoff + l != *offp) {
961 			DPRINTF(("esp6_input(): bad packet header chain\n"));
962 			espstat.esps_hdrops++;
963 			m_freemp(mp);
964 			return IPPROTO_DONE;
965 		}
966 		protoff += offsetof(struct ip6_ext, ip6e_nxt);
967 	}
968 	ipsec_common_input(*mp, *offp, protoff, AF_INET6, proto, 0);
969 	return IPPROTO_DONE;
970 
971 }
972 
973 /* IPv6 IPcomp wrapper */
974 int
975 ipcomp6_input(struct mbuf **mp, int *offp, int proto, int af)
976 {
977 	int l = 0;
978 	int protoff, nxt;
979 	struct ip6_ext ip6e;
980 
981 	if (*offp < sizeof(struct ip6_hdr)) {
982 		DPRINTF(("ipcomp6_input(): bad offset\n"));
983 		ipcompstat.ipcomps_hdrops++;
984 		m_freemp(mp);
985 		return IPPROTO_DONE;
986 	} else if (*offp == sizeof(struct ip6_hdr)) {
987 		protoff = offsetof(struct ip6_hdr, ip6_nxt);
988 	} else {
989 		/* Chase down the header chain... */
990 		protoff = sizeof(struct ip6_hdr);
991 		nxt = (mtod(*mp, struct ip6_hdr *))->ip6_nxt;
992 
993 		do {
994 			protoff += l;
995 			m_copydata(*mp, protoff, sizeof(ip6e),
996 			    (caddr_t) &ip6e);
997 			if (nxt == IPPROTO_AH)
998 				l = (ip6e.ip6e_len + 2) << 2;
999 			else
1000 				l = (ip6e.ip6e_len + 1) << 3;
1001 #ifdef DIAGNOSTIC
1002 			if (l <= 0)
1003 				panic("ipcomp6_input: l went zero or negative");
1004 #endif
1005 
1006 			nxt = ip6e.ip6e_nxt;
1007 		} while (protoff + l < *offp);
1008 
1009 		/* Malformed packet check */
1010 		if (protoff + l != *offp) {
1011 			DPRINTF(("ipcomp6_input(): bad packet header chain\n"));
1012 			ipcompstat.ipcomps_hdrops++;
1013 			m_freemp(mp);
1014 			return IPPROTO_DONE;
1015 		}
1016 
1017 		protoff += offsetof(struct ip6_ext, ip6e_nxt);
1018 	}
1019 	ipsec_common_input(*mp, *offp, protoff, AF_INET6, proto, 0);
1020 	return IPPROTO_DONE;
1021 }
1022 #endif /* INET6 */
1023 
1024 int
1025 ipsec_forward_check(struct mbuf *m, int hlen, int af)
1026 {
1027 	struct tdb *tdb;
1028 	struct tdb_ident *tdbi;
1029 	struct m_tag *mtag;
1030 	int error = 0;
1031 
1032 	/*
1033 	 * IPsec policy check for forwarded packets. Look at
1034 	 * inner-most IPsec SA used.
1035 	 */
1036 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1037 	if (mtag != NULL) {
1038 		tdbi = (struct tdb_ident *)(mtag + 1);
1039 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst, tdbi->proto);
1040 	} else
1041 		tdb = NULL;
1042 	ipsp_spd_lookup(m, af, hlen, &error, IPSP_DIRECTION_IN, tdb, NULL, 0);
1043 
1044 	return error;
1045 }
1046 
1047 int
1048 ipsec_local_check(struct mbuf *m, int hlen, int proto, int af)
1049 {
1050 	struct tdb *tdb;
1051 	struct tdb_ident *tdbi;
1052 	struct m_tag *mtag;
1053 	int error = 0;
1054 
1055 	/*
1056 	 * If it's a protected packet for us, skip the policy check.
1057 	 * That's because we really only care about the properties of
1058 	 * the protected packet, and not the intermediate versions.
1059 	 * While this is not the most paranoid setting, it allows
1060 	 * some flexibility in handling nested tunnels (in setting up
1061 	 * the policies).
1062 	 */
1063 	if ((proto == IPPROTO_ESP) || (proto == IPPROTO_AH) ||
1064 	    (proto == IPPROTO_IPCOMP))
1065 		return 0;
1066 
1067 	/*
1068 	 * If the protected packet was tunneled, then we need to
1069 	 * verify the protected packet's information, not the
1070 	 * external headers. Thus, skip the policy lookup for the
1071 	 * external packet, and keep the IPsec information linked on
1072 	 * the packet header (the encapsulation routines know how
1073 	 * to deal with that).
1074 	 */
1075 	if ((proto == IPPROTO_IPV4) || (proto == IPPROTO_IPV6))
1076 		return 0;
1077 
1078 	/*
1079 	 * When processing IPv6 header chains, do not look at the
1080 	 * outer header.  The inner protocol is relevant and will
1081 	 * be checked by the local delivery loop later.
1082 	 */
1083 	if ((af == AF_INET6) && ((proto == IPPROTO_DSTOPTS) ||
1084 	    (proto == IPPROTO_ROUTING) || (proto == IPPROTO_FRAGMENT)))
1085 		return 0;
1086 
1087 	/*
1088 	 * If the protected packet is TCP or UDP, we'll do the
1089 	 * policy check in the respective input routine, so we can
1090 	 * check for bypass sockets.
1091 	 */
1092 	if ((proto == IPPROTO_TCP) || (proto == IPPROTO_UDP))
1093 		return 0;
1094 
1095 	/*
1096 	 * IPsec policy check for local-delivery packets. Look at the
1097 	 * inner-most SA that protected the packet. This is in fact
1098 	 * a bit too restrictive (it could end up causing packets to
1099 	 * be dropped that semantically follow the policy, e.g., in
1100 	 * certain SA-bundle configurations); but the alternative is
1101 	 * very complicated (and requires keeping track of what
1102 	 * kinds of tunneling headers have been seen in-between the
1103 	 * IPsec headers), and I don't think we lose much functionality
1104 	 * that's needed in the real world (who uses bundles anyway ?).
1105 	 */
1106 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1107 	if (mtag) {
1108 		tdbi = (struct tdb_ident *)(mtag + 1);
1109 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst,
1110 		    tdbi->proto);
1111 	} else
1112 		tdb = NULL;
1113 	ipsp_spd_lookup(m, af, hlen, &error, IPSP_DIRECTION_IN,
1114 	    tdb, NULL, 0);
1115 
1116 	return error;
1117 }
1118