xref: /openbsd-src/sys/net/pf_lb.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 /*	$OpenBSD: pf_lb.c,v 1.70 2022/02/16 08:46:11 sashan Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "bpfilter.h"
39 #include "pflog.h"
40 #include "pfsync.h"
41 #include "pflow.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/filio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/pool.h>
52 #include <sys/rwlock.h>
53 #include <sys/syslog.h>
54 #include <sys/stdint.h>
55 
56 #include <crypto/siphash.h>
57 
58 #include <net/if.h>
59 #include <net/bpf.h>
60 #include <net/route.h>
61 
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/ip_icmp.h>
67 #include <netinet/icmp_var.h>
68 #include <netinet/tcp.h>
69 #include <netinet/tcp_seq.h>
70 #include <netinet/tcp_timer.h>
71 #include <netinet/udp.h>
72 #include <netinet/udp_var.h>
73 #include <netinet/if_ether.h>
74 
75 #ifdef INET6
76 #include <netinet/ip6.h>
77 #include <netinet/icmp6.h>
78 #endif /* INET6 */
79 
80 #include <net/pfvar.h>
81 #include <net/pfvar_priv.h>
82 
83 #if NPFLOG > 0
84 #include <net/if_pflog.h>
85 #endif	/* NPFLOG > 0 */
86 
87 #if NPFLOW > 0
88 #include <net/if_pflow.h>
89 #endif	/* NPFLOW > 0 */
90 
91 #if NPFSYNC > 0
92 #include <net/if_pfsync.h>
93 #endif /* NPFSYNC > 0 */
94 
95 u_int64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
96 			    struct pf_poolhashkey *, sa_family_t);
97 int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
98 			    struct pf_addr *, u_int16_t *, u_int16_t,
99 			    u_int16_t, struct pf_src_node **);
100 int			 pf_map_addr_states_increase(sa_family_t,
101 				struct pf_pool *, struct pf_addr *);
102 int			 pf_get_transaddr_af(struct pf_rule *,
103 			    struct pf_pdesc *, struct pf_src_node **);
104 int			 pf_map_addr_sticky(sa_family_t, struct pf_rule *,
105 			    struct pf_addr *, struct pf_addr *,
106 			    struct pf_src_node **, struct pf_pool *,
107 			    enum pf_sn_types);
108 
109 u_int64_t
110 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
111     struct pf_poolhashkey *key, sa_family_t af)
112 {
113 	uint64_t res = 0;
114 #ifdef INET6
115 	union {
116 		uint64_t hash64;
117 		uint32_t hash32[2];
118 	} h;
119 #endif	/* INET6 */
120 
121 	switch (af) {
122 	case AF_INET:
123 		res = SipHash24((SIPHASH_KEY *)key,
124 		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
125 		hash->addr32[0] = res;
126 		break;
127 #ifdef INET6
128 	case AF_INET6:
129 		res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0],
130 		    4 * sizeof(inaddr->addr32[0]));
131 		h.hash64 = res;
132 		hash->addr32[0] = h.hash32[0];
133 		hash->addr32[1] = h.hash32[1];
134 		/*
135 		 * siphash isn't big enough, but flipping it around is
136 		 * good enough here.
137 		 */
138 		hash->addr32[2] = ~h.hash32[1];
139 		hash->addr32[3] = ~h.hash32[0];
140 		break;
141 #endif /* INET6 */
142 	default:
143 		unhandled_af(af);
144 	}
145 	return (res);
146 }
147 
148 int
149 pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
150     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
151     struct pf_src_node **sn)
152 {
153 	struct pf_state_key_cmp	key;
154 	struct pf_addr		init_addr;
155 	u_int16_t		cut;
156 	int			dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
157 	int			sidx = pd->sidx;
158 	int			didx = pd->didx;
159 
160 	memset(&init_addr, 0, sizeof(init_addr));
161 	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
162 	    PF_SN_NAT))
163 		return (1);
164 
165 	if (pd->proto == IPPROTO_ICMP) {
166 		if (pd->ndport == htons(ICMP_ECHO)) {
167 			low = 1;
168 			high = 65535;
169 		} else
170 			return (0);	/* Don't try to modify non-echo ICMP */
171 	}
172 #ifdef INET6
173 	if (pd->proto == IPPROTO_ICMPV6) {
174 		if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
175 			low = 1;
176 			high = 65535;
177 		} else
178 			return (0);	/* Don't try to modify non-echo ICMP */
179 	}
180 #endif /* INET6 */
181 
182 	do {
183 		key.af = pd->naf;
184 		key.proto = pd->proto;
185 		key.rdomain = pd->rdomain;
186 		pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af);
187 		pf_addrcpy(&key.addr[sidx], naddr, key.af);
188 		key.port[didx] = pd->ndport;
189 
190 		/*
191 		 * port search; start random, step;
192 		 * similar 2 portloop in in_pcbbind
193 		 */
194 		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
195 		    pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
196 			/* XXX bug: icmp states dont use the id on both
197 			 * XXX sides (traceroute -I through nat) */
198 			key.port[sidx] = pd->nsport;
199 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
200 				*nport = pd->nsport;
201 				return (0);
202 			}
203 		} else if (low == 0 && high == 0) {
204 			key.port[sidx] = pd->nsport;
205 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
206 				*nport = pd->nsport;
207 				return (0);
208 			}
209 		} else if (low == high) {
210 			key.port[sidx] = htons(low);
211 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
212 				*nport = htons(low);
213 				return (0);
214 			}
215 		} else {
216 			u_int32_t tmp;
217 
218 			if (low > high) {
219 				tmp = low;
220 				low = high;
221 				high = tmp;
222 			}
223 			/* low < high */
224 			cut = arc4random_uniform(1 + high - low) + low;
225 			/* low <= cut <= high */
226 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
227 				key.port[sidx] = htons(tmp);
228 				if (pf_find_state_all(&key, dir, NULL) ==
229 				    NULL && !in_baddynamic(tmp, pd->proto)) {
230 					*nport = htons(tmp);
231 					return (0);
232 				}
233 			}
234 			tmp = cut;
235 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
236 				key.port[sidx] = htons(tmp);
237 				if (pf_find_state_all(&key, dir, NULL) ==
238 				    NULL && !in_baddynamic(tmp, pd->proto)) {
239 					*nport = htons(tmp);
240 					return (0);
241 				}
242 			}
243 		}
244 
245 		switch (r->nat.opts & PF_POOL_TYPEMASK) {
246 		case PF_POOL_RANDOM:
247 		case PF_POOL_ROUNDROBIN:
248 		case PF_POOL_LEASTSTATES:
249 			/*
250 			 * pick a different source address since we're out
251 			 * of free port choices for the current one.
252 			 */
253 			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
254 			    &init_addr, sn, &r->nat, PF_SN_NAT))
255 				return (1);
256 			break;
257 		case PF_POOL_NONE:
258 		case PF_POOL_SRCHASH:
259 		case PF_POOL_BITMASK:
260 		default:
261 			return (1);
262 		}
263 	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
264 	return (1);					/* none available */
265 }
266 
267 int
268 pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
269     struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
270     enum pf_sn_types type)
271 {
272 	struct pf_addr		*raddr, *rmask, *cached;
273 	struct pf_state		*s;
274 	struct pf_src_node	 k;
275 	int			 valid;
276 
277 	k.af = af;
278 	k.type = type;
279 	pf_addrcpy(&k.addr, saddr, af);
280 	k.rule.ptr = r;
281 	pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
282 	sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
283 	if (sns[type] == NULL)
284 		return (-1);
285 
286 	/* check if the cached entry is still valid */
287 	cached = &(sns[type])->raddr;
288 	valid = 0;
289 	if (PF_AZERO(cached, af)) {
290 		valid = 1;
291 	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
292 		if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
293 		    af, 0))
294 			valid = 1;
295 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
296 		if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
297 			valid = 1;
298 	} else if (rpool->addr.type != PF_ADDR_NOROUTE) {
299 		raddr = &rpool->addr.v.a.addr;
300 		rmask = &rpool->addr.v.a.mask;
301 		valid = pf_match_addr(0, raddr, rmask, cached, af);
302 	}
303 	if (!valid) {
304 		if (pf_status.debug >= LOG_DEBUG) {
305 			log(LOG_DEBUG, "pf: pf_map_addr: "
306 			    "stale src tracking (%u) ", type);
307 			pf_print_host(&k.addr, 0, af);
308 			addlog(" to ");
309 			pf_print_host(cached, 0, af);
310 			addlog("\n");
311 		}
312 		if (sns[type]->states != 0) {
313 			/* XXX expensive */
314 			RB_FOREACH(s, pf_state_tree_id,
315 			   &tree_id)
316 				pf_state_rm_src_node(s,
317 				    sns[type]);
318 		}
319 		sns[type]->expire = 1;
320 		pf_remove_src_node(sns[type]);
321 		sns[type] = NULL;
322 		return (-1);
323 	}
324 
325 
326 	if (!PF_AZERO(cached, af)) {
327 		pf_addrcpy(naddr, cached, af);
328 		if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES &&
329 		    pf_map_addr_states_increase(af, rpool, cached) == -1)
330 			return (-1);
331 	}
332 	if (pf_status.debug >= LOG_DEBUG) {
333 		log(LOG_DEBUG, "pf: pf_map_addr: "
334 		    "src tracking (%u) maps ", type);
335 		pf_print_host(&k.addr, 0, af);
336 		addlog(" to ");
337 		pf_print_host(naddr, 0, af);
338 		addlog("\n");
339 	}
340 
341 	if (sns[type]->kif != NULL)
342 		rpool->kif = sns[type]->kif;
343 
344 	return (0);
345 }
346 
347 int
348 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
349     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
350     struct pf_pool *rpool, enum pf_sn_types type)
351 {
352 	struct pf_addr		 hash;
353 	struct pf_addr		 faddr;
354 	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
355 	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
356 	struct pfr_ktable	*kt;
357 	struct pfi_kif		*kif;
358 	u_int64_t		 states;
359 	u_int16_t		 weight;
360 	u_int64_t		 load;
361 	u_int64_t		 cload;
362 	u_int64_t		 hashidx;
363 	int			 cnt;
364 
365 	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
366 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
367 	    pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
368 		return (0);
369 
370 	if (rpool->addr.type == PF_ADDR_NOROUTE)
371 		return (1);
372 	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
373 		switch (af) {
374 		case AF_INET:
375 			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
376 			    !PF_POOL_DYNTYPE(rpool->opts))
377 				return (1);
378 			raddr = &rpool->addr.p.dyn->pfid_addr4;
379 			rmask = &rpool->addr.p.dyn->pfid_mask4;
380 			break;
381 #ifdef INET6
382 		case AF_INET6:
383 			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
384 			    !PF_POOL_DYNTYPE(rpool->opts))
385 				return (1);
386 			raddr = &rpool->addr.p.dyn->pfid_addr6;
387 			rmask = &rpool->addr.p.dyn->pfid_mask6;
388 			break;
389 #endif /* INET6 */
390 		default:
391 			unhandled_af(af);
392 		}
393 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
394 		if (!PF_POOL_DYNTYPE(rpool->opts))
395 			return (1); /* unsupported */
396 	} else {
397 		raddr = &rpool->addr.v.a.addr;
398 		rmask = &rpool->addr.v.a.mask;
399 	}
400 
401 	switch (rpool->opts & PF_POOL_TYPEMASK) {
402 	case PF_POOL_NONE:
403 		pf_addrcpy(naddr, raddr, af);
404 		break;
405 	case PF_POOL_BITMASK:
406 		pf_poolmask(naddr, raddr, rmask, saddr, af);
407 		break;
408 	case PF_POOL_RANDOM:
409 		if (rpool->addr.type == PF_ADDR_TABLE ||
410 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
411 			if (rpool->addr.type == PF_ADDR_TABLE)
412 				kt = rpool->addr.p.tbl;
413 			else
414 				kt = rpool->addr.p.dyn->pfid_kt;
415 			kt = pfr_ktable_select_active(kt);
416 			if (kt == NULL)
417 				return (1);
418 
419 			cnt = kt->pfrkt_cnt;
420 			if (cnt == 0)
421 				rpool->tblidx = 0;
422 			else
423 				rpool->tblidx = (int)arc4random_uniform(cnt);
424 			memset(&rpool->counter, 0, sizeof(rpool->counter));
425 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
426 				return (1);
427 			pf_addrcpy(naddr, &rpool->counter, af);
428 		} else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
429 			switch (af) {
430 			case AF_INET:
431 				rpool->counter.addr32[0] = arc4random();
432 				break;
433 #ifdef INET6
434 			case AF_INET6:
435 				if (rmask->addr32[3] != 0xffffffff)
436 					rpool->counter.addr32[3] = arc4random();
437 				else
438 					break;
439 				if (rmask->addr32[2] != 0xffffffff)
440 					rpool->counter.addr32[2] = arc4random();
441 				else
442 					break;
443 				if (rmask->addr32[1] != 0xffffffff)
444 					rpool->counter.addr32[1] = arc4random();
445 				else
446 					break;
447 				if (rmask->addr32[0] != 0xffffffff)
448 					rpool->counter.addr32[0] = arc4random();
449 				break;
450 #endif /* INET6 */
451 			default:
452 				unhandled_af(af);
453 			}
454 			pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
455 			pf_addrcpy(init_addr, naddr, af);
456 
457 		} else {
458 			pf_addr_inc(&rpool->counter, af);
459 			pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
460 		}
461 		break;
462 	case PF_POOL_SRCHASH:
463 		hashidx = pf_hash(saddr, &hash, &rpool->key, af);
464 
465 		if (rpool->addr.type == PF_ADDR_TABLE ||
466 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
467 			if (rpool->addr.type == PF_ADDR_TABLE)
468 				kt = rpool->addr.p.tbl;
469 			else
470 				kt = rpool->addr.p.dyn->pfid_kt;
471 			kt = pfr_ktable_select_active(kt);
472 			if (kt == NULL)
473 				return (1);
474 
475 			cnt = kt->pfrkt_cnt;
476 			if (cnt == 0)
477 				rpool->tblidx = 0;
478 			else
479 				rpool->tblidx = (int)(hashidx % cnt);
480 			memset(&rpool->counter, 0, sizeof(rpool->counter));
481 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
482 				return (1);
483 			pf_addrcpy(naddr, &rpool->counter, af);
484 		} else {
485 			pf_poolmask(naddr, raddr, rmask, &hash, af);
486 		}
487 		break;
488 	case PF_POOL_ROUNDROBIN:
489 		if (rpool->addr.type == PF_ADDR_TABLE ||
490 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
491 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
492 				/*
493 				 * reset counter in case its value
494 				 * has been removed from the pool.
495 				 */
496 				memset(&rpool->counter, 0,
497 				    sizeof(rpool->counter));
498 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
499 					return (1);
500 			}
501 		} else if (PF_AZERO(&rpool->counter, af)) {
502 			/*
503 			 * fall back to POOL_NONE if there are no addresses in
504 			 * pool
505 			 */
506 			pf_addrcpy(naddr, raddr, af);
507 			break;
508 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
509 			return (1);
510 
511 		/* iterate over table if it contains entries which are weighted */
512 		if ((rpool->addr.type == PF_ADDR_TABLE &&
513 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
514 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
515 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
516 			do {
517 				if (rpool->addr.type == PF_ADDR_TABLE ||
518 				    rpool->addr.type == PF_ADDR_DYNIFTL) {
519 					if (pfr_pool_get(rpool,
520 					    &raddr, &rmask, af))
521 						return (1);
522 				} else {
523 					log(LOG_ERR, "pf: pf_map_addr: "
524 					    "weighted RR failure");
525 					return (1);
526 				}
527 				if (rpool->weight >= rpool->curweight)
528 					break;
529 				pf_addr_inc(&rpool->counter, af);
530 			} while (1);
531 
532 			weight = rpool->weight;
533 		}
534 
535 		pf_addrcpy(naddr, &rpool->counter, af);
536 		if (init_addr != NULL && PF_AZERO(init_addr, af))
537 			pf_addrcpy(init_addr, naddr, af);
538 		pf_addr_inc(&rpool->counter, af);
539 		break;
540 	case PF_POOL_LEASTSTATES:
541 		/* retrieve an address first */
542 		if (rpool->addr.type == PF_ADDR_TABLE ||
543 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
544 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
545 				/* see PF_POOL_ROUNDROBIN */
546 				memset(&rpool->counter, 0,
547 				    sizeof(rpool->counter));
548 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
549 					return (1);
550 			}
551 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
552 			return (1);
553 
554 		states = rpool->states;
555 		weight = rpool->weight;
556 		kif = rpool->kif;
557 
558 		if ((rpool->addr.type == PF_ADDR_TABLE &&
559 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
560 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
561 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
562 			load = ((UINT16_MAX * rpool->states) / rpool->weight);
563 		else
564 			load = states;
565 
566 		pf_addrcpy(&faddr, &rpool->counter, af);
567 
568 		pf_addrcpy(naddr, &rpool->counter, af);
569 		if (init_addr != NULL && PF_AZERO(init_addr, af))
570 			pf_addrcpy(init_addr, naddr, af);
571 
572 		/*
573 		 * iterate *once* over whole table and find destination with
574 		 * least connection
575 		 */
576 		do  {
577 			pf_addr_inc(&rpool->counter, af);
578 			if (rpool->addr.type == PF_ADDR_TABLE ||
579 			    rpool->addr.type == PF_ADDR_DYNIFTL) {
580 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
581 					return (1);
582 			} else if (pf_match_addr(0, raddr, rmask,
583 			    &rpool->counter, af))
584 				return (1);
585 
586 			if ((rpool->addr.type == PF_ADDR_TABLE &&
587 			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
588 			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
589 			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
590 				cload = ((UINT16_MAX * rpool->states)
591 					/ rpool->weight);
592 			else
593 				cload = rpool->states;
594 
595 			/* find lc minimum */
596 			if (cload < load) {
597 				states = rpool->states;
598 				weight = rpool->weight;
599 				kif = rpool->kif;
600 				load = cload;
601 
602 				pf_addrcpy(naddr, &rpool->counter, af);
603 				if (init_addr != NULL &&
604 				    PF_AZERO(init_addr, af))
605 				    pf_addrcpy(init_addr, naddr, af);
606 			}
607 		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
608 		    (states > 0));
609 
610 		if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
611 			return (1);
612 		/* revert the kif which was set by pfr_pool_get() */
613 		rpool->kif = kif;
614 		break;
615 	}
616 
617 	if (rpool->opts & PF_POOL_STICKYADDR) {
618 		if (sns[type] != NULL) {
619 			pf_remove_src_node(sns[type]);
620 			sns[type] = NULL;
621 		}
622 		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
623 		    rpool->kif))
624 			return (1);
625 	}
626 
627 	if (pf_status.debug >= LOG_INFO &&
628 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
629 		log(LOG_INFO, "pf: pf_map_addr: selected address ");
630 		pf_print_host(naddr, 0, af);
631 		if ((rpool->opts & PF_POOL_TYPEMASK) ==
632 		    PF_POOL_LEASTSTATES)
633 			addlog(" with state count %llu", states);
634 		if ((rpool->addr.type == PF_ADDR_TABLE &&
635 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
636 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
637 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
638 			addlog(" with weight %u", weight);
639 		addlog("\n");
640 	}
641 
642 	return (0);
643 }
644 
645 int
646 pf_map_addr_states_increase(sa_family_t af, struct pf_pool *rpool,
647     struct pf_addr *naddr)
648 {
649 	if (rpool->addr.type == PF_ADDR_TABLE) {
650 		if (pfr_states_increase(rpool->addr.p.tbl,
651 		    naddr, af) == -1) {
652 			if (pf_status.debug >= LOG_DEBUG) {
653 				log(LOG_DEBUG,
654 				    "pf: pf_map_addr_states_increase: "
655 				    "selected address ");
656 				pf_print_host(naddr, 0, af);
657 				addlog(". Failed to increase count!\n");
658 			}
659 			return (-1);
660 		}
661 	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
662 		if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
663 		    naddr, af) == -1) {
664 			if (pf_status.debug >= LOG_DEBUG) {
665 				log(LOG_DEBUG,
666 				    "pf: pf_map_addr_states_increase: "
667 				    "selected address ");
668 				pf_print_host(naddr, 0, af);
669 				addlog(". Failed to increase count!\n");
670 			}
671 			return (-1);
672 		}
673 	}
674 	return (0);
675 }
676 
677 int
678 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
679     struct pf_src_node **sns, struct pf_rule **nr)
680 {
681 	struct pf_addr	naddr;
682 	u_int16_t	nport;
683 
684 #ifdef INET6
685 	if (pd->af != pd->naf)
686 		return (pf_get_transaddr_af(r, pd, sns));
687 #endif /* INET6 */
688 
689 	if (r->nat.addr.type != PF_ADDR_NONE) {
690 		/* XXX is this right? what if rtable is changed at the same
691 		 * XXX time? where do I need to figure out the sport? */
692 		nport = 0;
693 		if (pf_get_sport(pd, r, &naddr, &nport,
694 		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
695 			DPFPRINTF(LOG_NOTICE,
696 			    "pf: NAT proxy port allocation (%u-%u) failed",
697 			    r->nat.proxy_port[0],
698 			    r->nat.proxy_port[1]);
699 			return (-1);
700 		}
701 		*nr = r;
702 		pf_addrcpy(&pd->nsaddr, &naddr, pd->af);
703 		pd->nsport = nport;
704 	}
705 	if (r->rdr.addr.type != PF_ADDR_NONE) {
706 		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
707 		    &r->rdr, PF_SN_RDR))
708 			return (-1);
709 		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
710 			pf_poolmask(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
711 			    &pd->ndaddr, pd->af);
712 
713 		nport = 0;
714 		if (r->rdr.proxy_port[1]) {
715 			u_int32_t	tmp_nport;
716 			u_int16_t	div;
717 
718 			div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1;
719 			div = (div == 0) ? 1 : div;
720 
721 			tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) +
722 			    r->rdr.proxy_port[0];
723 
724 			/* wrap around if necessary */
725 			if (tmp_nport > 65535)
726 				tmp_nport -= 65535;
727 			nport = htons((u_int16_t)tmp_nport);
728 		} else if (r->rdr.proxy_port[0])
729 			nport = htons(r->rdr.proxy_port[0]);
730 		*nr = r;
731 		pf_addrcpy(&pd->ndaddr, &naddr, pd->af);
732 		if (nport)
733 			pd->ndport = nport;
734 	}
735 
736 	return (0);
737 }
738 
739 #ifdef INET6
740 int
741 pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
742     struct pf_src_node **sns)
743 {
744 	struct pf_addr	ndaddr, nsaddr, naddr;
745 	u_int16_t	nport;
746 	int		prefixlen = 96;
747 
748 	if (pf_status.debug >= LOG_INFO) {
749 		log(LOG_INFO, "pf: af-to %s %s, ",
750 		    pd->naf == AF_INET ? "inet" : "inet6",
751 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
752 		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
753 		addlog(" -> ");
754 		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
755 		addlog("\n");
756 	}
757 
758 	if (r->nat.addr.type == PF_ADDR_NONE)
759 		panic("pf_get_transaddr_af: no nat pool for source address");
760 
761 	/* get source address and port */
762 	nport = 0;
763 	if (pf_get_sport(pd, r, &nsaddr, &nport,
764 	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
765 		DPFPRINTF(LOG_NOTICE,
766 		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
767 		    r->nat.proxy_port[0],
768 		    r->nat.proxy_port[1]);
769 		return (-1);
770 	}
771 	pd->nsport = nport;
772 
773 	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
774 		if (pd->dir == PF_IN) {
775 			pd->ndport = ntohs(pd->ndport);
776 			if (pd->ndport == ICMP6_ECHO_REQUEST)
777 				pd->ndport = ICMP_ECHO;
778 			else if (pd->ndport == ICMP6_ECHO_REPLY)
779 				pd->ndport = ICMP_ECHOREPLY;
780 			pd->ndport = htons(pd->ndport);
781 		} else {
782 			pd->nsport = ntohs(pd->nsport);
783 			if (pd->nsport == ICMP6_ECHO_REQUEST)
784 				pd->nsport = ICMP_ECHO;
785 			else if (pd->nsport == ICMP6_ECHO_REPLY)
786 				pd->nsport = ICMP_ECHOREPLY;
787 			pd->nsport = htons(pd->nsport);
788 		}
789 	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
790 		if (pd->dir == PF_IN) {
791 			pd->ndport = ntohs(pd->ndport);
792 			if (pd->ndport == ICMP_ECHO)
793 				pd->ndport = ICMP6_ECHO_REQUEST;
794 			else if (pd->ndport == ICMP_ECHOREPLY)
795 				pd->ndport = ICMP6_ECHO_REPLY;
796 			pd->ndport = htons(pd->ndport);
797 		} else {
798 			pd->nsport = ntohs(pd->nsport);
799 			if (pd->nsport == ICMP_ECHO)
800 				pd->nsport = ICMP6_ECHO_REQUEST;
801 			else if (pd->nsport == ICMP_ECHOREPLY)
802 				pd->nsport = ICMP6_ECHO_REPLY;
803 			pd->nsport = htons(pd->nsport);
804 		}
805 	}
806 
807 	/* get the destination address and port */
808 	if (r->rdr.addr.type != PF_ADDR_NONE) {
809 		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
810 		    &r->rdr, PF_SN_RDR))
811 			return (-1);
812 		if (r->rdr.proxy_port[0])
813 			pd->ndport = htons(r->rdr.proxy_port[0]);
814 
815 		if (pd->naf == AF_INET) {
816 			/* The prefix is the IPv4 rdr address */
817 			prefixlen = in_mask2len((struct in_addr *)
818 			    &r->rdr.addr.v.a.mask);
819 			inet_nat46(pd->naf, &pd->ndaddr,
820 			    &ndaddr, &naddr, prefixlen);
821 		} else {
822 			/* The prefix is the IPv6 rdr address */
823 			prefixlen =
824 			    in6_mask2len((struct in6_addr *)
825 			    &r->rdr.addr.v.a.mask, NULL);
826 			inet_nat64(pd->naf, &pd->ndaddr,
827 			    &ndaddr, &naddr, prefixlen);
828 		}
829 	} else {
830 		if (pd->naf == AF_INET) {
831 			/* The prefix is the IPv6 dst address */
832 			prefixlen =
833 			    in6_mask2len((struct in6_addr *)
834 			    &r->dst.addr.v.a.mask, NULL);
835 			if (prefixlen < 32)
836 				prefixlen = 96;
837 			inet_nat64(pd->naf, &pd->ndaddr,
838 			    &ndaddr, &pd->ndaddr, prefixlen);
839 		} else {
840 			/*
841 			 * The prefix is the IPv6 nat address
842 			 * (that was stored in pd->nsaddr)
843 			 */
844 			prefixlen = in6_mask2len((struct in6_addr *)
845 			    &r->nat.addr.v.a.mask, NULL);
846 			if (prefixlen > 96)
847 				prefixlen = 96;
848 			inet_nat64(pd->naf, &pd->ndaddr,
849 			    &ndaddr, &nsaddr, prefixlen);
850 		}
851 	}
852 
853 	pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf);
854 	pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf);
855 
856 	if (pf_status.debug >= LOG_INFO) {
857 		log(LOG_INFO, "pf: af-to %s %s done, prefixlen %d, ",
858 		    pd->naf == AF_INET ? "inet" : "inet6",
859 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
860 		    prefixlen);
861 		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
862 		addlog(" -> ");
863 		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
864 		addlog("\n");
865 	}
866 
867 	return (0);
868 }
869 #endif /* INET6 */
870 
871 int
872 pf_postprocess_addr(struct pf_state *cur)
873 {
874 	struct pf_rule		*nr;
875 	struct pf_state_key	*sks;
876 	struct pf_pool		 rpool;
877 	struct pf_addr		 lookup_addr;
878 	int			 slbcount = -1;
879 
880 	nr = cur->natrule.ptr;
881 
882 	if (nr == NULL)
883 		return (0);
884 
885 	/* decrease counter */
886 
887 	sks = cur->key[PF_SK_STACK];
888 
889 	/* check for outgoing or ingoing balancing */
890 	if (nr->rt == PF_ROUTETO)
891 		lookup_addr = cur->rt_addr;
892 	else if (sks != NULL)
893 		lookup_addr = sks->addr[1];
894 	else {
895 		if (pf_status.debug >= LOG_DEBUG) {
896 			log(LOG_DEBUG, "pf: %s: unable to obtain address",
897 			    __func__);
898 		}
899 		return (1);
900 	}
901 
902 	/* check for appropriate pool */
903 	if (nr->rdr.addr.type != PF_ADDR_NONE)
904 		rpool = nr->rdr;
905 	else if (nr->nat.addr.type != PF_ADDR_NONE)
906 		rpool = nr->nat;
907 	else if (nr->route.addr.type != PF_ADDR_NONE)
908 		rpool = nr->route;
909 	else
910 		return (0);
911 
912 	if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
913 		return (0);
914 
915 	if (rpool.addr.type == PF_ADDR_TABLE) {
916 		if ((slbcount = pfr_states_decrease(
917 		    rpool.addr.p.tbl,
918 		    &lookup_addr, sks->af)) == -1) {
919 			if (pf_status.debug >= LOG_DEBUG) {
920 				log(LOG_DEBUG, "pf: %s: selected address ",
921 				    __func__);
922 				pf_print_host(&lookup_addr,
923 				    sks->port[0], sks->af);
924 				addlog(". Failed to "
925 				    "decrease count!\n");
926 			}
927 			return (1);
928 		}
929 	} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
930 		if ((slbcount = pfr_states_decrease(
931 		    rpool.addr.p.dyn->pfid_kt,
932 		    &lookup_addr, sks->af)) == -1) {
933 			if (pf_status.debug >= LOG_DEBUG) {
934 				log(LOG_DEBUG, "pf: %s: selected address ",
935 				    __func__);
936 				pf_print_host(&lookup_addr,
937 				    sks->port[0], sks->af);
938 				addlog(". Failed to "
939 				    "decrease count!\n");
940 			}
941 			return (1);
942 		}
943 	}
944 	if (slbcount > -1) {
945 		if (pf_status.debug >= LOG_INFO) {
946 			log(LOG_INFO, "pf: %s: selected address ", __func__);
947 			pf_print_host(&lookup_addr, sks->port[0],
948 			    sks->af);
949 			addlog(" decreased state count to %u\n",
950 			    slbcount);
951 		}
952 	}
953 	return (0);
954 }
955