xref: /openbsd-src/sys/netinet/ip_spd.c (revision f84b1df5a16cdd762c93854218de246e79975d3b)
1 /* $OpenBSD: ip_spd.c,v 1.115 2022/03/13 21:38:32 bluhm Exp $ */
2 /*
3  * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu)
4  *
5  * Copyright (c) 2000-2001 Angelos D. Keromytis.
6  *
7  * Permission to use, copy, and modify this software with or without fee
8  * is hereby granted, provided that this entire notice is included in
9  * all copies of any software which is or includes a copy or
10  * modification of this software.
11  * You may use this code under the GNU public license if you so wish. Please
12  * contribute changes back to the authors under this freer than GPL license
13  * so that we may further the use of strong encryption without limitations to
14  * all.
15  *
16  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
17  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
18  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
19  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
20  * PURPOSE.
21  */
22 
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/mbuf.h>
26 #include <sys/socket.h>
27 #include <sys/kernel.h>
28 #include <sys/socketvar.h>
29 #include <sys/pool.h>
30 #include <sys/timeout.h>
31 
32 #include <net/route.h>
33 #include <net/netisr.h>
34 
35 #include <netinet/in.h>
36 #include <netinet/ip.h>
37 #include <netinet/ip_var.h>
38 #include <netinet/in_pcb.h>
39 #include <netinet/ip_ipsp.h>
40 #include <net/pfkeyv2.h>
41 
42 int	ipsp_spd_inp(struct mbuf *, struct inpcb *, struct ipsec_policy *,
43 	    struct tdb **);
44 int	ipsp_acquire_sa(struct ipsec_policy *, union sockaddr_union *,
45 	    union sockaddr_union *, struct sockaddr_encap *, struct mbuf *);
46 int	ipsp_pending_acquire(struct ipsec_policy *, union sockaddr_union *);
47 void	ipsp_delete_acquire_timer(void *);
48 void	ipsp_delete_acquire_locked(struct ipsec_acquire *);
49 void	ipsp_delete_acquire(struct ipsec_acquire *);
50 void	ipsp_unref_acquire_locked(struct ipsec_acquire *);
51 
52 struct pool ipsec_policy_pool;
53 struct pool ipsec_acquire_pool;
54 
55 /*
56  * For tdb_walk() calling tdb_delete_locked() we need lock order
57  * tdb_sadb_mtx before ipo_tdb_mtx.
58  */
59 struct mutex ipo_tdb_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
60 
61 /* Protected by the NET_LOCK(). */
62 struct radix_node_head **spd_tables;
63 unsigned int spd_table_max;
64 
65 struct mutex ipsec_acquire_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
66 struct ipsec_acquire_head ipsec_acquire_head =
67     TAILQ_HEAD_INITIALIZER(ipsec_acquire_head);
68 
69 struct radix_node_head *
70 spd_table_get(unsigned int rtableid)
71 {
72 	unsigned int rdomain;
73 
74 	NET_ASSERT_LOCKED();
75 
76 	if (spd_tables == NULL)
77 		return (NULL);
78 
79 	rdomain = rtable_l2(rtableid);
80 	if (rdomain > spd_table_max)
81 		return (NULL);
82 
83 	return (spd_tables[rdomain]);
84 }
85 
86 struct radix_node_head *
87 spd_table_add(unsigned int rtableid)
88 {
89 	struct radix_node_head *rnh = NULL;
90 	unsigned int rdomain;
91 	void *p;
92 
93 	NET_ASSERT_LOCKED();
94 
95 	rdomain = rtable_l2(rtableid);
96 	if (spd_tables == NULL || rdomain > spd_table_max) {
97 		if ((p = mallocarray(rdomain + 1, sizeof(*rnh),
98 		    M_RTABLE, M_NOWAIT|M_ZERO)) == NULL)
99 			return (NULL);
100 
101 		if (spd_tables != NULL) {
102 			memcpy(p, spd_tables, sizeof(*rnh) * (spd_table_max+1));
103 			free(spd_tables, M_RTABLE,
104 			    sizeof(*rnh) * (spd_table_max+1));
105 		}
106 		spd_tables = p;
107 		spd_table_max = rdomain;
108 	}
109 
110 	if (spd_tables[rdomain] == NULL) {
111 		if (rn_inithead((void **)&rnh,
112 		    offsetof(struct sockaddr_encap, sen_type)) == 0)
113 			rnh = NULL;
114 		spd_tables[rdomain] = rnh;
115 	}
116 
117 	return (spd_tables[rdomain]);
118 }
119 
120 int
121 spd_table_walk(unsigned int rtableid,
122     int (*func)(struct ipsec_policy *, void *, unsigned int), void *arg)
123 {
124 	struct radix_node_head *rnh;
125 	int (*walker)(struct radix_node *, void *, u_int) = (void *)func;
126 	int error;
127 
128 	rnh = spd_table_get(rtableid);
129 	if (rnh == NULL)
130 		return (0);
131 
132 	/* EGAIN means the tree changed. */
133 	while ((error = rn_walktree(rnh, walker, arg)) == EAGAIN)
134 		continue;
135 
136 	return (error);
137 }
138 
139 /*
140  * Lookup at the SPD based on the headers contained on the mbuf. The second
141  * argument indicates what protocol family the header at the beginning of
142  * the mbuf is. hlen is the offset of the transport protocol header
143  * in the mbuf.
144  *
145  * Return combinations (of return value and *tdbout):
146  * - -EINVAL -> silently drop the packet
147  * - errno   -> drop packet and return error
148  * - 0/NULL  -> no IPsec required on packet
149  * - 0/TDB   -> do IPsec
150  *
151  * In the case of incoming flows, only the first three combinations are
152  * returned.
153  */
154 int
155 ipsp_spd_lookup(struct mbuf *m, int af, int hlen, int direction,
156     struct tdb *tdbp, struct inpcb *inp, struct tdb **tdbout,
157     struct ipsec_ids *ipsecflowinfo_ids)
158 {
159 	struct radix_node_head *rnh;
160 	struct radix_node *rn;
161 	union sockaddr_union sdst, ssrc;
162 	struct sockaddr_encap *ddst, dst;
163 	struct ipsec_policy *ipo;
164 	struct ipsec_ids *ids = NULL;
165 	int error, signore = 0, dignore = 0;
166 	u_int rdomain;
167 
168 	NET_ASSERT_LOCKED();
169 
170 	/*
171 	 * If there are no flows in place, there's no point
172 	 * continuing with the SPD lookup.
173 	 */
174 	if (!ipsec_in_use)
175 		return ipsp_spd_inp(m, inp, NULL, tdbout);
176 
177 	/*
178 	 * If an input packet is destined to a BYPASS socket, just accept it.
179 	 */
180 	if ((inp != NULL) && (direction == IPSP_DIRECTION_IN) &&
181 	    (inp->inp_seclevel[SL_ESP_TRANS] == IPSEC_LEVEL_BYPASS) &&
182 	    (inp->inp_seclevel[SL_ESP_NETWORK] == IPSEC_LEVEL_BYPASS) &&
183 	    (inp->inp_seclevel[SL_AUTH] == IPSEC_LEVEL_BYPASS)) {
184 		if (tdbout != NULL)
185 			*tdbout = NULL;
186 		return 0;
187 	}
188 
189 	memset(&dst, 0, sizeof(dst));
190 	memset(&sdst, 0, sizeof(union sockaddr_union));
191 	memset(&ssrc, 0, sizeof(union sockaddr_union));
192 	ddst = (struct sockaddr_encap *)&dst;
193 	ddst->sen_family = PF_KEY;
194 	ddst->sen_len = SENT_LEN;
195 
196 	switch (af) {
197 	case AF_INET:
198 		if (hlen < sizeof (struct ip) || m->m_pkthdr.len < hlen)
199 			return EINVAL;
200 
201 		ddst->sen_direction = direction;
202 		ddst->sen_type = SENT_IP4;
203 
204 		m_copydata(m, offsetof(struct ip, ip_src),
205 		    sizeof(struct in_addr), (caddr_t) &(ddst->sen_ip_src));
206 		m_copydata(m, offsetof(struct ip, ip_dst),
207 		    sizeof(struct in_addr), (caddr_t) &(ddst->sen_ip_dst));
208 		m_copydata(m, offsetof(struct ip, ip_p), sizeof(u_int8_t),
209 		    (caddr_t) &(ddst->sen_proto));
210 
211 		sdst.sin.sin_family = ssrc.sin.sin_family = AF_INET;
212 		sdst.sin.sin_len = ssrc.sin.sin_len =
213 		    sizeof(struct sockaddr_in);
214 		ssrc.sin.sin_addr = ddst->sen_ip_src;
215 		sdst.sin.sin_addr = ddst->sen_ip_dst;
216 
217 		/*
218 		 * If TCP/UDP, extract the port numbers to use in the lookup.
219 		 */
220 		switch (ddst->sen_proto) {
221 		case IPPROTO_UDP:
222 		case IPPROTO_TCP:
223 			/* Make sure there's enough data in the packet. */
224 			if (m->m_pkthdr.len < hlen + 2 * sizeof(u_int16_t))
225 				return EINVAL;
226 
227 			/*
228 			 * Luckily, the offset of the src/dst ports in
229 			 * both the UDP and TCP headers is the same (first
230 			 * two 16-bit values in the respective headers),
231 			 * so we can just copy them.
232 			 */
233 			m_copydata(m, hlen, sizeof(u_int16_t),
234 			    (caddr_t) &(ddst->sen_sport));
235 			m_copydata(m, hlen + sizeof(u_int16_t), sizeof(u_int16_t),
236 			    (caddr_t) &(ddst->sen_dport));
237 			break;
238 
239 		default:
240 			ddst->sen_sport = 0;
241 			ddst->sen_dport = 0;
242 		}
243 
244 		break;
245 
246 #ifdef INET6
247 	case AF_INET6:
248 		if (hlen < sizeof (struct ip6_hdr) || m->m_pkthdr.len < hlen)
249 			return EINVAL;
250 
251 		ddst->sen_type = SENT_IP6;
252 		ddst->sen_ip6_direction = direction;
253 
254 		m_copydata(m, offsetof(struct ip6_hdr, ip6_src),
255 		    sizeof(struct in6_addr),
256 		    (caddr_t) &(ddst->sen_ip6_src));
257 		m_copydata(m, offsetof(struct ip6_hdr, ip6_dst),
258 		    sizeof(struct in6_addr),
259 		    (caddr_t) &(ddst->sen_ip6_dst));
260 		m_copydata(m, offsetof(struct ip6_hdr, ip6_nxt),
261 		    sizeof(u_int8_t),
262 		    (caddr_t) &(ddst->sen_ip6_proto));
263 
264 		sdst.sin6.sin6_family = ssrc.sin6.sin6_family = AF_INET6;
265 		sdst.sin6.sin6_len = ssrc.sin6.sin6_len =
266 		    sizeof(struct sockaddr_in6);
267 		in6_recoverscope(&ssrc.sin6, &ddst->sen_ip6_src);
268 		in6_recoverscope(&sdst.sin6, &ddst->sen_ip6_dst);
269 
270 		/*
271 		 * If TCP/UDP, extract the port numbers to use in the lookup.
272 		 */
273 		switch (ddst->sen_ip6_proto) {
274 		case IPPROTO_UDP:
275 		case IPPROTO_TCP:
276 			/* Make sure there's enough data in the packet. */
277 			if (m->m_pkthdr.len < hlen + 2 * sizeof(u_int16_t))
278 				return EINVAL;
279 
280 			/*
281 			 * Luckily, the offset of the src/dst ports in
282 			 * both the UDP and TCP headers is the same
283 			 * (first two 16-bit values in the respective
284 			 * headers), so we can just copy them.
285 			 */
286 			m_copydata(m, hlen, sizeof(u_int16_t),
287 			    (caddr_t) &(ddst->sen_ip6_sport));
288 			m_copydata(m, hlen + sizeof(u_int16_t), sizeof(u_int16_t),
289 			    (caddr_t) &(ddst->sen_ip6_dport));
290 			break;
291 
292 		default:
293 			ddst->sen_ip6_sport = 0;
294 			ddst->sen_ip6_dport = 0;
295 		}
296 
297 		break;
298 #endif /* INET6 */
299 
300 	default:
301 		return EAFNOSUPPORT;
302 	}
303 
304 	/* Actual SPD lookup. */
305 	rdomain = rtable_l2(m->m_pkthdr.ph_rtableid);
306 	if ((rnh = spd_table_get(rdomain)) == NULL ||
307 	    (rn = rn_match((caddr_t)&dst, rnh)) == NULL) {
308 		/*
309 		 * Return whatever the socket requirements are, there are no
310 		 * system-wide policies.
311 		 */
312 		return ipsp_spd_inp(m, inp, NULL, tdbout);
313 	}
314 	ipo = (struct ipsec_policy *)rn;
315 
316 	switch (ipo->ipo_type) {
317 	case IPSP_PERMIT:
318 		return ipsp_spd_inp(m, inp, ipo, tdbout);
319 
320 	case IPSP_DENY:
321 		return EHOSTUNREACH;
322 
323 	case IPSP_IPSEC_USE:
324 	case IPSP_IPSEC_ACQUIRE:
325 	case IPSP_IPSEC_REQUIRE:
326 	case IPSP_IPSEC_DONTACQ:
327 		/* Nothing more needed here. */
328 		break;
329 
330 	default:
331 		return EINVAL;
332 	}
333 
334 	/* Check for non-specific destination in the policy. */
335 	switch (ipo->ipo_dst.sa.sa_family) {
336 	case AF_INET:
337 		if ((ipo->ipo_dst.sin.sin_addr.s_addr == INADDR_ANY) ||
338 		    (ipo->ipo_dst.sin.sin_addr.s_addr == INADDR_BROADCAST))
339 			dignore = 1;
340 		break;
341 
342 #ifdef INET6
343 	case AF_INET6:
344 		if ((IN6_IS_ADDR_UNSPECIFIED(&ipo->ipo_dst.sin6.sin6_addr)) ||
345 		    (memcmp(&ipo->ipo_dst.sin6.sin6_addr, &in6mask128,
346 		    sizeof(in6mask128)) == 0))
347 			dignore = 1;
348 		break;
349 #endif /* INET6 */
350 	}
351 
352 	/* Likewise for source. */
353 	switch (ipo->ipo_src.sa.sa_family) {
354 	case AF_INET:
355 		if (ipo->ipo_src.sin.sin_addr.s_addr == INADDR_ANY)
356 			signore = 1;
357 		break;
358 
359 #ifdef INET6
360 	case AF_INET6:
361 		if (IN6_IS_ADDR_UNSPECIFIED(&ipo->ipo_src.sin6.sin6_addr))
362 			signore = 1;
363 		break;
364 #endif /* INET6 */
365 	}
366 
367 	/* Do we have a cached entry ? If so, check if it's still valid. */
368 	mtx_enter(&ipo_tdb_mtx);
369 	if (ipo->ipo_tdb != NULL &&
370 	    (ipo->ipo_tdb->tdb_flags & TDBF_INVALID)) {
371 		TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
372 		    ipo_tdb_next);
373 		tdb_unref(ipo->ipo_tdb);
374 		ipo->ipo_tdb = NULL;
375 	}
376 	mtx_leave(&ipo_tdb_mtx);
377 
378 	/* Outgoing packet policy check. */
379 	if (direction == IPSP_DIRECTION_OUT) {
380 		/*
381 		 * If the packet is destined for the policy-specified
382 		 * gateway/endhost, and the socket has the BYPASS
383 		 * option set, skip IPsec processing.
384 		 */
385 		if ((inp != NULL) &&
386 		    (inp->inp_seclevel[SL_ESP_TRANS] == IPSEC_LEVEL_BYPASS) &&
387 		    (inp->inp_seclevel[SL_ESP_NETWORK] ==
388 			IPSEC_LEVEL_BYPASS) &&
389 		    (inp->inp_seclevel[SL_AUTH] == IPSEC_LEVEL_BYPASS)) {
390 			/* Direct match. */
391 			if (dignore ||
392 			    !memcmp(&sdst, &ipo->ipo_dst, sdst.sa.sa_len)) {
393 				if (tdbout != NULL)
394 					*tdbout = NULL;
395 				return 0;
396 			}
397 		}
398 
399 		/* Check that the cached TDB (if present), is appropriate. */
400 		mtx_enter(&ipo_tdb_mtx);
401 		if (ipo->ipo_tdb != NULL) {
402 			if ((ipo->ipo_last_searched <= ipsec_last_added) ||
403 			    (ipo->ipo_sproto != ipo->ipo_tdb->tdb_sproto) ||
404 			    memcmp(dignore ? &sdst : &ipo->ipo_dst,
405 			    &ipo->ipo_tdb->tdb_dst,
406 			    ipo->ipo_tdb->tdb_dst.sa.sa_len))
407 				goto nomatchout;
408 
409 			if (!ipsp_aux_match(ipo->ipo_tdb,
410 			    ipsecflowinfo_ids? ipsecflowinfo_ids: ipo->ipo_ids,
411 			    &ipo->ipo_addr, &ipo->ipo_mask))
412 				goto nomatchout;
413 
414 			/* Cached entry is good. */
415 			error = ipsp_spd_inp(m, inp, ipo, tdbout);
416 			mtx_leave(&ipo_tdb_mtx);
417 			return error;
418 
419   nomatchout:
420 			/* Cached TDB was not good. */
421 			TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
422 			    ipo_tdb_next);
423 			tdb_unref(ipo->ipo_tdb);
424 			ipo->ipo_tdb = NULL;
425 			ipo->ipo_last_searched = 0;
426 		}
427 
428 		/*
429 		 * If no SA has been added since the last time we did a
430 		 * lookup, there's no point searching for one. However, if the
431 		 * destination gateway is left unspecified (or is all-1's),
432 		 * always lookup since this is a generic-match rule
433 		 * (otherwise, we can have situations where SAs to some
434 		 * destinations exist but are not used, possibly leading to an
435 		 * explosion in the number of acquired SAs).
436 		 */
437 		if (ipo->ipo_last_searched <= ipsec_last_added)	{
438 			struct tdb *tdbp_new;
439 
440 			/* "Touch" the entry. */
441 			if (dignore == 0)
442 				ipo->ipo_last_searched = getuptime();
443 
444 			/* gettdb() takes tdb_sadb_mtx, preserve lock order */
445 			mtx_leave(&ipo_tdb_mtx);
446 			/* Find an appropriate SA from the existing ones. */
447 			tdbp_new = gettdbbydst(rdomain,
448 			    dignore ? &sdst : &ipo->ipo_dst,
449 			    ipo->ipo_sproto,
450 			    ipsecflowinfo_ids? ipsecflowinfo_ids: ipo->ipo_ids,
451 			    &ipo->ipo_addr, &ipo->ipo_mask);
452 			ids = NULL;
453 			mtx_enter(&ipo_tdb_mtx);
454 			if ((tdbp_new != NULL) &&
455 			    (tdbp_new->tdb_flags & TDBF_DELETED)) {
456 				/*
457 				 * After tdb_delete() has released ipo_tdb_mtx
458 				 * in tdb_unlink(), never add a new one.
459 				 * tdb_cleanspd() has to catch all of them.
460 				 */
461 				tdb_unref(tdbp_new);
462 				tdbp_new = NULL;
463 			}
464 			if (ipo->ipo_tdb != NULL) {
465 				/* Remove cached TDB from parallel thread. */
466 				TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
467 				    ipo, ipo_tdb_next);
468 				tdb_unref(ipo->ipo_tdb);
469 			}
470 			ipo->ipo_tdb = tdbp_new;
471 			if (ipo->ipo_tdb != NULL) {
472 				/* gettdbbydst() has already refcounted tdb */
473 				TAILQ_INSERT_TAIL(
474 				    &ipo->ipo_tdb->tdb_policy_head,
475 				    ipo, ipo_tdb_next);
476 				error = ipsp_spd_inp(m, inp, ipo, tdbout);
477 				mtx_leave(&ipo_tdb_mtx);
478 				return error;
479 			}
480 		}
481 		mtx_leave(&ipo_tdb_mtx);
482 
483 		/* So, we don't have an SA -- just a policy. */
484 		switch (ipo->ipo_type) {
485 		case IPSP_IPSEC_REQUIRE:
486 			/* Acquire SA through key management. */
487 			if (ipsp_acquire_sa(ipo,
488 			    dignore ? &sdst : &ipo->ipo_dst,
489 			    signore ? NULL : &ipo->ipo_src, ddst, m) != 0) {
490 				return EACCES;
491 			}
492 
493 			/* FALLTHROUGH */
494 		case IPSP_IPSEC_DONTACQ:
495 			return -EINVAL;  /* Silently drop packet. */
496 
497 		case IPSP_IPSEC_ACQUIRE:
498 			/* Acquire SA through key management. */
499 			ipsp_acquire_sa(ipo, dignore ? &sdst : &ipo->ipo_dst,
500 			    signore ? NULL : &ipo->ipo_src, ddst, NULL);
501 
502 			/* FALLTHROUGH */
503 		case IPSP_IPSEC_USE:
504 			return ipsp_spd_inp(m, inp, ipo, tdbout);
505 		}
506 	} else { /* IPSP_DIRECTION_IN */
507 		if (tdbp != NULL) {
508 			/*
509 			 * Special case for bundled IPcomp/ESP SAs:
510 			 * 1) only IPcomp flows are loaded into kernel
511 			 * 2) input processing processes ESP SA first
512 			 * 3) then optional IPcomp processing happens
513 			 * 4) we only update m_tag for ESP
514 			 * => 'tdbp' is always set to ESP SA
515 			 * => flow has ipo_proto for IPcomp
516 			 * So if 'tdbp' points to an ESP SA and this 'tdbp' is
517 			 * bundled with an IPcomp SA, then we replace 'tdbp'
518 			 * with the IPcomp SA at tdbp->tdb_inext.
519 			 */
520 			if (ipo->ipo_sproto == IPPROTO_IPCOMP &&
521 			    tdbp->tdb_sproto == IPPROTO_ESP &&
522 			    tdbp->tdb_inext != NULL &&
523 			    tdbp->tdb_inext->tdb_sproto == IPPROTO_IPCOMP)
524 				tdbp = tdbp->tdb_inext;
525 
526 			/* Direct match in the cache. */
527 			mtx_enter(&ipo_tdb_mtx);
528 			if (ipo->ipo_tdb == tdbp) {
529 				error = ipsp_spd_inp(m, inp, ipo, tdbout);
530 				mtx_leave(&ipo_tdb_mtx);
531 				return error;
532 			}
533 			mtx_leave(&ipo_tdb_mtx);
534 
535 			if (memcmp(dignore ? &ssrc : &ipo->ipo_dst,
536 			    &tdbp->tdb_src, tdbp->tdb_src.sa.sa_len) ||
537 			    (ipo->ipo_sproto != tdbp->tdb_sproto))
538 				goto nomatchin;
539 
540 			/* Match source/dest IDs. */
541 			if (ipo->ipo_ids)
542 				if (tdbp->tdb_ids == NULL ||
543 				    !ipsp_ids_match(ipo->ipo_ids, tdbp->tdb_ids))
544 					goto nomatchin;
545 
546 			/* Add it to the cache. */
547 			mtx_enter(&ipo_tdb_mtx);
548 			if (ipo->ipo_tdb != NULL) {
549 				TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
550 				    ipo, ipo_tdb_next);
551 				tdb_unref(ipo->ipo_tdb);
552 			}
553 			ipo->ipo_tdb = tdb_ref(tdbp);
554 			TAILQ_INSERT_TAIL(&tdbp->tdb_policy_head, ipo,
555 			    ipo_tdb_next);
556 			error = ipsp_spd_inp(m, inp, ipo, tdbout);
557 			mtx_leave(&ipo_tdb_mtx);
558 			return error;
559 
560   nomatchin: /* Nothing needed here, falling through */
561 	;
562 		}
563 
564 		/* Check whether cached entry applies. */
565 		mtx_enter(&ipo_tdb_mtx);
566 		if (ipo->ipo_tdb != NULL) {
567 			/*
568 			 * We only need to check that the correct
569 			 * security protocol and security gateway are
570 			 * set; IDs will be the same since the cached
571 			 * entry is linked on this policy.
572 			 */
573 			if (ipo->ipo_sproto == ipo->ipo_tdb->tdb_sproto &&
574 			    !memcmp(&ipo->ipo_tdb->tdb_src,
575 			    dignore ? &ssrc : &ipo->ipo_dst,
576 			    ipo->ipo_tdb->tdb_src.sa.sa_len))
577 				goto skipinputsearch;
578 
579 			/* Not applicable, unlink. */
580 			TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
581 			    ipo_tdb_next);
582 			tdb_unref(ipo->ipo_tdb);
583 			ipo->ipo_tdb = NULL;
584 			ipo->ipo_last_searched = 0;
585 		}
586 
587 		/* Find whether there exists an appropriate SA. */
588 		if (ipo->ipo_last_searched <= ipsec_last_added)	{
589 			struct tdb *tdbp_new;
590 
591 			if (dignore == 0)
592 				ipo->ipo_last_searched = getuptime();
593 
594 			/* gettdb() takes tdb_sadb_mtx, preserve lock order */
595 			mtx_leave(&ipo_tdb_mtx);
596 			tdbp_new = gettdbbysrc(rdomain,
597 			    dignore ? &ssrc : &ipo->ipo_dst,
598 			    ipo->ipo_sproto, ipo->ipo_ids,
599 			    &ipo->ipo_addr, &ipo->ipo_mask);
600 			mtx_enter(&ipo_tdb_mtx);
601 			if ((tdbp_new != NULL) &&
602 			    (tdbp_new->tdb_flags & TDBF_DELETED)) {
603 				/*
604 				 * After tdb_delete() has released ipo_tdb_mtx
605 				 * in tdb_unlink(), never add a new one.
606 				 * tdb_cleanspd() has to catch all of them.
607 				 */
608 				tdb_unref(tdbp_new);
609 				tdbp_new = NULL;
610 			}
611 			if (ipo->ipo_tdb != NULL) {
612 				/* Remove cached TDB from parallel thread. */
613 				TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
614 				    ipo, ipo_tdb_next);
615 				tdb_unref(ipo->ipo_tdb);
616 			}
617 			ipo->ipo_tdb = tdbp_new;
618 			if (ipo->ipo_tdb != NULL) {
619 				/* gettdbbysrc() has already refcounted tdb */
620 				TAILQ_INSERT_TAIL(
621 				    &ipo->ipo_tdb->tdb_policy_head,
622 				    ipo, ipo_tdb_next);
623 			}
624 		}
625   skipinputsearch:
626 		mtx_leave(&ipo_tdb_mtx);
627 
628 		switch (ipo->ipo_type) {
629 		case IPSP_IPSEC_REQUIRE:
630 			/* If appropriate SA exists, don't acquire another. */
631 			if (ipo->ipo_tdb != NULL)
632 				return -EINVAL;  /* Silently drop packet. */
633 
634 			/* Acquire SA through key management. */
635 			if ((error = ipsp_acquire_sa(ipo,
636 			    dignore ? &ssrc : &ipo->ipo_dst,
637 			    signore ? NULL : &ipo->ipo_src, ddst, m)) != 0)
638 				return error;
639 
640 			/* FALLTHROUGH */
641 		case IPSP_IPSEC_DONTACQ:
642 			return -EINVAL;  /* Silently drop packet. */
643 
644 		case IPSP_IPSEC_ACQUIRE:
645 			/* If appropriate SA exists, don't acquire another. */
646 			if (ipo->ipo_tdb != NULL)
647 				return ipsp_spd_inp(m, inp, ipo, tdbout);
648 
649 			/* Acquire SA through key management. */
650 			ipsp_acquire_sa(ipo, dignore ? &ssrc : &ipo->ipo_dst,
651 			    signore ? NULL : &ipo->ipo_src, ddst, NULL);
652 
653 			/* FALLTHROUGH */
654 		case IPSP_IPSEC_USE:
655 			return ipsp_spd_inp(m, inp, ipo, tdbout);
656 		}
657 	}
658 
659 	/* Shouldn't ever get this far. */
660 	return EINVAL;
661 }
662 
663 /*
664  * Delete a policy from the SPD.
665  */
666 int
667 ipsec_delete_policy(struct ipsec_policy *ipo)
668 {
669 	struct ipsec_acquire *ipa;
670 	struct radix_node_head *rnh;
671 	struct radix_node *rn = (struct radix_node *)ipo;
672 
673 	NET_ASSERT_LOCKED();
674 
675 	if (refcnt_rele(&ipo->ipo_refcnt) == 0)
676 		return 0;
677 
678 	/* Delete from SPD. */
679 	if ((rnh = spd_table_get(ipo->ipo_rdomain)) == NULL ||
680 	    rn_delete(&ipo->ipo_addr, &ipo->ipo_mask, rnh, rn) == NULL)
681 		return (ESRCH);
682 
683 	mtx_enter(&ipo_tdb_mtx);
684 	if (ipo->ipo_tdb != NULL) {
685 		TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
686 		    ipo_tdb_next);
687 		tdb_unref(ipo->ipo_tdb);
688 		ipo->ipo_tdb = NULL;
689 	}
690 	mtx_leave(&ipo_tdb_mtx);
691 
692 	mtx_enter(&ipsec_acquire_mtx);
693 	while ((ipa = TAILQ_FIRST(&ipo->ipo_acquires)) != NULL)
694 		ipsp_delete_acquire_locked(ipa);
695 	mtx_leave(&ipsec_acquire_mtx);
696 
697 	TAILQ_REMOVE(&ipsec_policy_head, ipo, ipo_list);
698 
699 	if (ipo->ipo_ids)
700 		ipsp_ids_free(ipo->ipo_ids);
701 
702 	ipsec_in_use--;
703 
704 	pool_put(&ipsec_policy_pool, ipo);
705 
706 	return 0;
707 }
708 
709 void
710 ipsp_delete_acquire_timer(void *v)
711 {
712 	struct ipsec_acquire *ipa = v;
713 
714 	ipsp_delete_acquire(ipa);
715 }
716 
717 /*
718  * Delete a pending IPsec acquire record.
719  */
720 void
721 ipsp_delete_acquire(struct ipsec_acquire *ipa)
722 {
723 	mtx_enter(&ipsec_acquire_mtx);
724 	ipsp_delete_acquire_locked(ipa);
725 	mtx_leave(&ipsec_acquire_mtx);
726 }
727 
728 void
729 ipsp_delete_acquire_locked(struct ipsec_acquire *ipa)
730 {
731 	if (timeout_del(&ipa->ipa_timeout) == 1)
732 		refcnt_rele(&ipa->ipa_refcnt);
733 	ipsp_unref_acquire_locked(ipa);
734 }
735 
736 void
737 ipsec_unref_acquire(struct ipsec_acquire *ipa)
738 {
739 	mtx_enter(&ipsec_acquire_mtx);
740 	ipsp_unref_acquire_locked(ipa);
741 	mtx_leave(&ipsec_acquire_mtx);
742 }
743 
744 void
745 ipsp_unref_acquire_locked(struct ipsec_acquire *ipa)
746 {
747 	MUTEX_ASSERT_LOCKED(&ipsec_acquire_mtx);
748 
749 	if (refcnt_rele(&ipa->ipa_refcnt) == 0)
750 		return;
751 	TAILQ_REMOVE(&ipsec_acquire_head, ipa, ipa_next);
752 	TAILQ_REMOVE(&ipa->ipa_policy->ipo_acquires, ipa, ipa_ipo_next);
753 	ipa->ipa_policy = NULL;
754 
755 	pool_put(&ipsec_acquire_pool, ipa);
756 }
757 
758 /*
759  * Find out if there's an ACQUIRE pending.
760  * XXX Need a better structure.
761  */
762 int
763 ipsp_pending_acquire(struct ipsec_policy *ipo, union sockaddr_union *gw)
764 {
765 	struct ipsec_acquire *ipa;
766 
767 	NET_ASSERT_LOCKED();
768 
769 	mtx_enter(&ipsec_acquire_mtx);
770 	TAILQ_FOREACH(ipa, &ipo->ipo_acquires, ipa_ipo_next) {
771 		if (!memcmp(gw, &ipa->ipa_addr, gw->sa.sa_len))
772 			break;
773 	}
774 	mtx_leave(&ipsec_acquire_mtx);
775 
776 	return (ipa != NULL);
777 }
778 
779 /*
780  * Signal key management that we need an SA.
781  * XXX For outgoing policies, we could try to hold on to the mbuf.
782  */
783 int
784 ipsp_acquire_sa(struct ipsec_policy *ipo, union sockaddr_union *gw,
785     union sockaddr_union *laddr, struct sockaddr_encap *ddst, struct mbuf *m)
786 {
787 	struct ipsec_acquire *ipa;
788 
789 	NET_ASSERT_LOCKED();
790 
791 	/* Check whether request has been made already. */
792 	if (ipsp_pending_acquire(ipo, gw))
793 		return 0;
794 
795 	/* Add request in cache and proceed. */
796 	ipa = pool_get(&ipsec_acquire_pool, PR_NOWAIT|PR_ZERO);
797 	if (ipa == NULL)
798 		return ENOMEM;
799 
800 	ipa->ipa_addr = *gw;
801 
802 	refcnt_init(&ipa->ipa_refcnt);
803 	timeout_set(&ipa->ipa_timeout, ipsp_delete_acquire_timer, ipa);
804 
805 	ipa->ipa_info.sen_len = ipa->ipa_mask.sen_len = SENT_LEN;
806 	ipa->ipa_info.sen_family = ipa->ipa_mask.sen_family = PF_KEY;
807 
808 	/* Just copy the right information. */
809 	switch (ipo->ipo_addr.sen_type) {
810 	case SENT_IP4:
811 		ipa->ipa_info.sen_type = ipa->ipa_mask.sen_type = SENT_IP4;
812 		ipa->ipa_info.sen_direction = ipo->ipo_addr.sen_direction;
813 		ipa->ipa_mask.sen_direction = ipo->ipo_mask.sen_direction;
814 
815 		if (ipsp_is_unspecified(ipo->ipo_dst)) {
816 			ipa->ipa_info.sen_ip_src = ddst->sen_ip_src;
817 			ipa->ipa_mask.sen_ip_src.s_addr = INADDR_BROADCAST;
818 
819 			ipa->ipa_info.sen_ip_dst = ddst->sen_ip_dst;
820 			ipa->ipa_mask.sen_ip_dst.s_addr = INADDR_BROADCAST;
821 		} else {
822 			ipa->ipa_info.sen_ip_src = ipo->ipo_addr.sen_ip_src;
823 			ipa->ipa_mask.sen_ip_src = ipo->ipo_mask.sen_ip_src;
824 
825 			ipa->ipa_info.sen_ip_dst = ipo->ipo_addr.sen_ip_dst;
826 			ipa->ipa_mask.sen_ip_dst = ipo->ipo_mask.sen_ip_dst;
827 		}
828 
829 		ipa->ipa_info.sen_proto = ipo->ipo_addr.sen_proto;
830 		ipa->ipa_mask.sen_proto = ipo->ipo_mask.sen_proto;
831 
832 		if (ipo->ipo_addr.sen_proto) {
833 			ipa->ipa_info.sen_sport = ipo->ipo_addr.sen_sport;
834 			ipa->ipa_mask.sen_sport = ipo->ipo_mask.sen_sport;
835 
836 			ipa->ipa_info.sen_dport = ipo->ipo_addr.sen_dport;
837 			ipa->ipa_mask.sen_dport = ipo->ipo_mask.sen_dport;
838 		}
839 		break;
840 
841 #ifdef INET6
842 	case SENT_IP6:
843 		ipa->ipa_info.sen_type = ipa->ipa_mask.sen_type = SENT_IP6;
844 		ipa->ipa_info.sen_ip6_direction =
845 		    ipo->ipo_addr.sen_ip6_direction;
846 		ipa->ipa_mask.sen_ip6_direction =
847 		    ipo->ipo_mask.sen_ip6_direction;
848 
849 		if (ipsp_is_unspecified(ipo->ipo_dst)) {
850 			ipa->ipa_info.sen_ip6_src = ddst->sen_ip6_src;
851 			ipa->ipa_mask.sen_ip6_src = in6mask128;
852 
853 			ipa->ipa_info.sen_ip6_dst = ddst->sen_ip6_dst;
854 			ipa->ipa_mask.sen_ip6_dst = in6mask128;
855 		} else {
856 			ipa->ipa_info.sen_ip6_src = ipo->ipo_addr.sen_ip6_src;
857 			ipa->ipa_mask.sen_ip6_src = ipo->ipo_mask.sen_ip6_src;
858 
859 			ipa->ipa_info.sen_ip6_dst = ipo->ipo_addr.sen_ip6_dst;
860 			ipa->ipa_mask.sen_ip6_dst = ipo->ipo_mask.sen_ip6_dst;
861 		}
862 
863 		ipa->ipa_info.sen_ip6_proto = ipo->ipo_addr.sen_ip6_proto;
864 		ipa->ipa_mask.sen_ip6_proto = ipo->ipo_mask.sen_ip6_proto;
865 
866 		if (ipo->ipo_mask.sen_ip6_proto) {
867 			ipa->ipa_info.sen_ip6_sport =
868 			    ipo->ipo_addr.sen_ip6_sport;
869 			ipa->ipa_mask.sen_ip6_sport =
870 			    ipo->ipo_mask.sen_ip6_sport;
871 			ipa->ipa_info.sen_ip6_dport =
872 			    ipo->ipo_addr.sen_ip6_dport;
873 			ipa->ipa_mask.sen_ip6_dport =
874 			    ipo->ipo_mask.sen_ip6_dport;
875 		}
876 		break;
877 #endif /* INET6 */
878 
879 	default:
880 		pool_put(&ipsec_acquire_pool, ipa);
881 		return 0;
882 	}
883 
884 	mtx_enter(&ipsec_acquire_mtx);
885 #ifdef IPSEC
886 	if (timeout_add_sec(&ipa->ipa_timeout, ipsec_expire_acquire) == 1)
887 		refcnt_take(&ipa->ipa_refcnt);
888 #endif
889 	TAILQ_INSERT_TAIL(&ipsec_acquire_head, ipa, ipa_next);
890 	TAILQ_INSERT_TAIL(&ipo->ipo_acquires, ipa, ipa_ipo_next);
891 	ipa->ipa_policy = ipo;
892 	mtx_leave(&ipsec_acquire_mtx);
893 
894 	/* PF_KEYv2 notification message. */
895 	return pfkeyv2_acquire(ipo, gw, laddr, &ipa->ipa_seq, ddst);
896 }
897 
898 /*
899  * Deal with PCB security requirements.
900  */
901 int
902 ipsp_spd_inp(struct mbuf *m, struct inpcb *inp, struct ipsec_policy *ipo,
903     struct tdb **tdbout)
904 {
905 	/* Sanity check. */
906 	if (inp == NULL)
907 		goto justreturn;
908 
909 	/* We only support IPSEC_LEVEL_BYPASS or IPSEC_LEVEL_AVAIL */
910 
911 	if (inp->inp_seclevel[SL_ESP_TRANS] == IPSEC_LEVEL_BYPASS &&
912 	    inp->inp_seclevel[SL_ESP_NETWORK] == IPSEC_LEVEL_BYPASS &&
913 	    inp->inp_seclevel[SL_AUTH] == IPSEC_LEVEL_BYPASS)
914 		goto justreturn;
915 
916 	if (inp->inp_seclevel[SL_ESP_TRANS] == IPSEC_LEVEL_AVAIL &&
917 	    inp->inp_seclevel[SL_ESP_NETWORK] == IPSEC_LEVEL_AVAIL &&
918 	    inp->inp_seclevel[SL_AUTH] == IPSEC_LEVEL_AVAIL)
919 		goto justreturn;
920 
921 	return -EINVAL;  /* Silently drop packet. */
922 
923  justreturn:
924 	if (tdbout != NULL) {
925 		if (ipo != NULL)
926 			*tdbout = tdb_ref(ipo->ipo_tdb);
927 		else
928 			*tdbout = NULL;
929 	}
930 	return 0;
931 }
932 
933 /*
934  * Find a pending ACQUIRE record based on its sequence number.
935  * XXX Need to use a better data structure.
936  */
937 struct ipsec_acquire *
938 ipsec_get_acquire(u_int32_t seq)
939 {
940 	struct ipsec_acquire *ipa;
941 
942 	NET_ASSERT_LOCKED();
943 
944 	mtx_enter(&ipsec_acquire_mtx);
945 	TAILQ_FOREACH(ipa, &ipsec_acquire_head, ipa_next) {
946 		if (ipa->ipa_seq == seq) {
947 			refcnt_take(&ipa->ipa_refcnt);
948 			break;
949 		}
950 	}
951 	mtx_leave(&ipsec_acquire_mtx);
952 
953 	return ipa;
954 }
955