xref: /openbsd-src/sys/net/pf_lb.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: pf_lb.c,v 1.68 2020/12/12 22:59:21 jan Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "bpfilter.h"
39 #include "pflog.h"
40 #include "pfsync.h"
41 #include "pflow.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/filio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/pool.h>
52 #include <sys/rwlock.h>
53 #include <sys/syslog.h>
54 #include <sys/stdint.h>
55 
56 #include <crypto/siphash.h>
57 
58 #include <net/if.h>
59 #include <net/bpf.h>
60 #include <net/route.h>
61 
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/ip_icmp.h>
67 #include <netinet/icmp_var.h>
68 #include <netinet/tcp.h>
69 #include <netinet/tcp_seq.h>
70 #include <netinet/tcp_timer.h>
71 #include <netinet/udp.h>
72 #include <netinet/udp_var.h>
73 #include <netinet/if_ether.h>
74 
75 #ifdef INET6
76 #include <netinet/ip6.h>
77 #include <netinet/icmp6.h>
78 #endif /* INET6 */
79 
80 #include <net/pfvar.h>
81 #include <net/pfvar_priv.h>
82 
83 #if NPFLOG > 0
84 #include <net/if_pflog.h>
85 #endif	/* NPFLOG > 0 */
86 
87 #if NPFLOW > 0
88 #include <net/if_pflow.h>
89 #endif	/* NPFLOW > 0 */
90 
91 #if NPFSYNC > 0
92 #include <net/if_pfsync.h>
93 #endif /* NPFSYNC > 0 */
94 
95 u_int64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
96 			    struct pf_poolhashkey *, sa_family_t);
97 int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
98 			    struct pf_addr *, u_int16_t *, u_int16_t,
99 			    u_int16_t, struct pf_src_node **);
100 int			 pf_map_addr_states_increase(sa_family_t,
101 				struct pf_pool *, struct pf_addr *);
102 int			 pf_get_transaddr_af(struct pf_rule *,
103 			    struct pf_pdesc *, struct pf_src_node **);
104 int			 pf_map_addr_sticky(sa_family_t, struct pf_rule *,
105 			    struct pf_addr *, struct pf_addr *,
106 			    struct pf_src_node **, struct pf_pool *,
107 			    enum pf_sn_types);
108 
109 u_int64_t
110 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
111     struct pf_poolhashkey *key, sa_family_t af)
112 {
113 	uint64_t res = 0;
114 #ifdef INET6
115 	union {
116 		uint64_t hash64;
117 		uint32_t hash32[2];
118 	} h;
119 #endif	/* INET6 */
120 
121 	switch (af) {
122 	case AF_INET:
123 		res = SipHash24((SIPHASH_KEY *)key,
124 		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
125 		hash->addr32[0] = res;
126 		break;
127 #ifdef INET6
128 	case AF_INET6:
129 		res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0],
130 		    4 * sizeof(inaddr->addr32[0]));
131 		h.hash64 = res;
132 		hash->addr32[0] = h.hash32[0];
133 		hash->addr32[1] = h.hash32[1];
134 		/*
135 		 * siphash isn't big enough, but flipping it around is
136 		 * good enough here.
137 		 */
138 		hash->addr32[2] = ~h.hash32[1];
139 		hash->addr32[3] = ~h.hash32[0];
140 		break;
141 #endif /* INET6 */
142 	default:
143 		unhandled_af(af);
144 	}
145 	return (res);
146 }
147 
148 int
149 pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
150     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
151     struct pf_src_node **sn)
152 {
153 	struct pf_state_key_cmp	key;
154 	struct pf_addr		init_addr;
155 	u_int16_t		cut;
156 	int			dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
157 	int			sidx = pd->sidx;
158 	int			didx = pd->didx;
159 
160 	memset(&init_addr, 0, sizeof(init_addr));
161 	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
162 	    PF_SN_NAT))
163 		return (1);
164 
165 	if (pd->proto == IPPROTO_ICMP) {
166 		if (pd->ndport == htons(ICMP_ECHO)) {
167 			low = 1;
168 			high = 65535;
169 		} else
170 			return (0);	/* Don't try to modify non-echo ICMP */
171 	}
172 #ifdef INET6
173 	if (pd->proto == IPPROTO_ICMPV6) {
174 		if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
175 			low = 1;
176 			high = 65535;
177 		} else
178 			return (0);	/* Don't try to modify non-echo ICMP */
179 	}
180 #endif /* INET6 */
181 
182 	do {
183 		key.af = pd->naf;
184 		key.proto = pd->proto;
185 		key.rdomain = pd->rdomain;
186 		pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af);
187 		pf_addrcpy(&key.addr[sidx], naddr, key.af);
188 		key.port[didx] = pd->ndport;
189 
190 		/*
191 		 * port search; start random, step;
192 		 * similar 2 portloop in in_pcbbind
193 		 */
194 		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
195 		    pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
196 			/* XXX bug: icmp states dont use the id on both
197 			 * XXX sides (traceroute -I through nat) */
198 			key.port[sidx] = pd->nsport;
199 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
200 				*nport = pd->nsport;
201 				return (0);
202 			}
203 		} else if (low == 0 && high == 0) {
204 			key.port[sidx] = pd->nsport;
205 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
206 				*nport = pd->nsport;
207 				return (0);
208 			}
209 		} else if (low == high) {
210 			key.port[sidx] = htons(low);
211 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
212 				*nport = htons(low);
213 				return (0);
214 			}
215 		} else {
216 			u_int32_t tmp;
217 
218 			if (low > high) {
219 				tmp = low;
220 				low = high;
221 				high = tmp;
222 			}
223 			/* low < high */
224 			cut = arc4random_uniform(1 + high - low) + low;
225 			/* low <= cut <= high */
226 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
227 				key.port[sidx] = htons(tmp);
228 				if (pf_find_state_all(&key, dir, NULL) ==
229 				    NULL && !in_baddynamic(tmp, pd->proto)) {
230 					*nport = htons(tmp);
231 					return (0);
232 				}
233 			}
234 			tmp = cut;
235 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
236 				key.port[sidx] = htons(tmp);
237 				if (pf_find_state_all(&key, dir, NULL) ==
238 				    NULL && !in_baddynamic(tmp, pd->proto)) {
239 					*nport = htons(tmp);
240 					return (0);
241 				}
242 			}
243 		}
244 
245 		switch (r->nat.opts & PF_POOL_TYPEMASK) {
246 		case PF_POOL_RANDOM:
247 		case PF_POOL_ROUNDROBIN:
248 		case PF_POOL_LEASTSTATES:
249 			/*
250 			 * pick a different source address since we're out
251 			 * of free port choices for the current one.
252 			 */
253 			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
254 			    &init_addr, sn, &r->nat, PF_SN_NAT))
255 				return (1);
256 			break;
257 		case PF_POOL_NONE:
258 		case PF_POOL_SRCHASH:
259 		case PF_POOL_BITMASK:
260 		default:
261 			return (1);
262 		}
263 	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
264 	return (1);					/* none available */
265 }
266 
267 int
268 pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
269     struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
270     enum pf_sn_types type)
271 {
272 	struct pf_addr		*raddr, *rmask, *cached;
273 	struct pf_state		*s;
274 	struct pf_src_node	 k;
275 	int			 valid;
276 
277 	k.af = af;
278 	k.type = type;
279 	pf_addrcpy(&k.addr, saddr, af);
280 	k.rule.ptr = r;
281 	pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
282 	sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
283 	if (sns[type] == NULL)
284 		return (-1);
285 
286 	/* check if the cached entry is still valid */
287 	cached = &(sns[type])->raddr;
288 	valid = 0;
289 	if (PF_AZERO(cached, af)) {
290 		valid = 1;
291 	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
292 		if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
293 		    af, 0))
294 			valid = 1;
295 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
296 		if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
297 			valid = 1;
298 	} else if (rpool->addr.type != PF_ADDR_NOROUTE) {
299 		raddr = &rpool->addr.v.a.addr;
300 		rmask = &rpool->addr.v.a.mask;
301 		valid = pf_match_addr(0, raddr, rmask, cached, af);
302 	}
303 	if (!valid) {
304 		if (pf_status.debug >= LOG_DEBUG) {
305 			log(LOG_DEBUG, "pf: pf_map_addr: "
306 			    "stale src tracking (%u) ", type);
307 			pf_print_host(&k.addr, 0, af);
308 			addlog(" to ");
309 			pf_print_host(cached, 0, af);
310 			addlog("\n");
311 		}
312 		if (sns[type]->states != 0) {
313 			/* XXX expensive */
314 			RB_FOREACH(s, pf_state_tree_id,
315 			   &tree_id)
316 				pf_state_rm_src_node(s,
317 				    sns[type]);
318 		}
319 		sns[type]->expire = 1;
320 		pf_remove_src_node(sns[type]);
321 		sns[type] = NULL;
322 		return (-1);
323 	}
324 
325 
326 	if (!PF_AZERO(cached, af)) {
327 		pf_addrcpy(naddr, cached, af);
328 		if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES &&
329 		    pf_map_addr_states_increase(af, rpool, cached) == -1)
330 			return (-1);
331 	}
332 	if (pf_status.debug >= LOG_DEBUG) {
333 		log(LOG_DEBUG, "pf: pf_map_addr: "
334 		    "src tracking (%u) maps ", type);
335 		pf_print_host(&k.addr, 0, af);
336 		addlog(" to ");
337 		pf_print_host(naddr, 0, af);
338 		addlog("\n");
339 	}
340 
341 	if (sns[type]->kif != NULL)
342 		rpool->kif = sns[type]->kif;
343 
344 	return (0);
345 }
346 
347 int
348 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
349     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
350     struct pf_pool *rpool, enum pf_sn_types type)
351 {
352 	struct pf_addr		 hash;
353 	struct pf_addr		 faddr;
354 	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
355 	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
356 	struct pfr_ktable	*kt;
357 	struct pfi_kif		*kif;
358 	u_int64_t		 states;
359 	u_int16_t		 weight;
360 	u_int64_t		 load;
361 	u_int64_t		 cload;
362 	u_int64_t		 hashidx;
363 	int			 cnt;
364 
365 	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
366 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
367 	    pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
368 		return (0);
369 
370 	if (rpool->addr.type == PF_ADDR_NOROUTE)
371 		return (1);
372 	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
373 		switch (af) {
374 		case AF_INET:
375 			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
376 			    !PF_POOL_DYNTYPE(rpool->opts))
377 				return (1);
378 			raddr = &rpool->addr.p.dyn->pfid_addr4;
379 			rmask = &rpool->addr.p.dyn->pfid_mask4;
380 			break;
381 #ifdef INET6
382 		case AF_INET6:
383 			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
384 			    !PF_POOL_DYNTYPE(rpool->opts))
385 				return (1);
386 			raddr = &rpool->addr.p.dyn->pfid_addr6;
387 			rmask = &rpool->addr.p.dyn->pfid_mask6;
388 			break;
389 #endif /* INET6 */
390 		default:
391 			unhandled_af(af);
392 		}
393 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
394 		if (!PF_POOL_DYNTYPE(rpool->opts))
395 			return (1); /* unsupported */
396 	} else {
397 		raddr = &rpool->addr.v.a.addr;
398 		rmask = &rpool->addr.v.a.mask;
399 	}
400 
401 	switch (rpool->opts & PF_POOL_TYPEMASK) {
402 	case PF_POOL_NONE:
403 		pf_addrcpy(naddr, raddr, af);
404 		break;
405 	case PF_POOL_BITMASK:
406 		pf_poolmask(naddr, raddr, rmask, saddr, af);
407 		break;
408 	case PF_POOL_RANDOM:
409 		if (rpool->addr.type == PF_ADDR_TABLE ||
410 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
411 			if (rpool->addr.type == PF_ADDR_TABLE)
412 				kt = rpool->addr.p.tbl;
413 			else
414 				kt = rpool->addr.p.dyn->pfid_kt;
415 			kt = pfr_ktable_select_active(kt);
416 			if (kt == NULL)
417 				return (1);
418 
419 			cnt = kt->pfrkt_cnt;
420 			if (cnt == 0)
421 				rpool->tblidx = 0;
422 			else
423 				rpool->tblidx = (int)arc4random_uniform(cnt);
424 			memset(&rpool->counter, 0, sizeof(rpool->counter));
425 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
426 				return (1);
427 			pf_addrcpy(naddr, &rpool->counter, af);
428 		} else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
429 			switch (af) {
430 			case AF_INET:
431 				rpool->counter.addr32[0] = arc4random();
432 				break;
433 #ifdef INET6
434 			case AF_INET6:
435 				if (rmask->addr32[3] != 0xffffffff)
436 					rpool->counter.addr32[3] = arc4random();
437 				else
438 					break;
439 				if (rmask->addr32[2] != 0xffffffff)
440 					rpool->counter.addr32[2] = arc4random();
441 				else
442 					break;
443 				if (rmask->addr32[1] != 0xffffffff)
444 					rpool->counter.addr32[1] = arc4random();
445 				else
446 					break;
447 				if (rmask->addr32[0] != 0xffffffff)
448 					rpool->counter.addr32[0] = arc4random();
449 				break;
450 #endif /* INET6 */
451 			default:
452 				unhandled_af(af);
453 			}
454 			pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
455 			pf_addrcpy(init_addr, naddr, af);
456 
457 		} else {
458 			pf_addr_inc(&rpool->counter, af);
459 			pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
460 		}
461 		break;
462 	case PF_POOL_SRCHASH:
463 		hashidx = pf_hash(saddr, &hash, &rpool->key, af);
464 
465 		if (rpool->addr.type == PF_ADDR_TABLE ||
466 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
467 			if (rpool->addr.type == PF_ADDR_TABLE)
468 				kt = rpool->addr.p.tbl;
469 			else
470 				kt = rpool->addr.p.dyn->pfid_kt;
471 			kt = pfr_ktable_select_active(kt);
472 			if (kt == NULL)
473 				return (1);
474 
475 			cnt = kt->pfrkt_cnt;
476 			if (cnt == 0)
477 				rpool->tblidx = 0;
478 			else
479 				rpool->tblidx = (int)(hashidx % cnt);
480 			memset(&rpool->counter, 0, sizeof(rpool->counter));
481 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
482 				return (1);
483 			pf_addrcpy(naddr, &rpool->counter, af);
484 		} else {
485 			pf_poolmask(naddr, raddr, rmask, &hash, af);
486 		}
487 		break;
488 	case PF_POOL_ROUNDROBIN:
489 		if (rpool->addr.type == PF_ADDR_TABLE ||
490 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
491 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
492 				/*
493 				 * reset counter in case its value
494 				 * has been removed from the pool.
495 				 */
496 				memset(&rpool->counter, 0,
497 				    sizeof(rpool->counter));
498 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
499 					return (1);
500 			}
501 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
502 			return (1);
503 
504 		/* iterate over table if it contains entries which are weighted */
505 		if ((rpool->addr.type == PF_ADDR_TABLE &&
506 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
507 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
508 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
509 			do {
510 				if (rpool->addr.type == PF_ADDR_TABLE ||
511 				    rpool->addr.type == PF_ADDR_DYNIFTL) {
512 					if (pfr_pool_get(rpool,
513 					    &raddr, &rmask, af))
514 						return (1);
515 				} else {
516 					log(LOG_ERR, "pf: pf_map_addr: "
517 					    "weighted RR failure");
518 					return (1);
519 				}
520 				if (rpool->weight >= rpool->curweight)
521 					break;
522 				pf_addr_inc(&rpool->counter, af);
523 			} while (1);
524 
525 			weight = rpool->weight;
526 		}
527 
528 		pf_addrcpy(naddr, &rpool->counter, af);
529 		if (init_addr != NULL && PF_AZERO(init_addr, af))
530 			pf_addrcpy(init_addr, naddr, af);
531 		pf_addr_inc(&rpool->counter, af);
532 		break;
533 	case PF_POOL_LEASTSTATES:
534 		/* retrieve an address first */
535 		if (rpool->addr.type == PF_ADDR_TABLE ||
536 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
537 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
538 				/* see PF_POOL_ROUNDROBIN */
539 				memset(&rpool->counter, 0,
540 				    sizeof(rpool->counter));
541 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
542 					return (1);
543 			}
544 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
545 			return (1);
546 
547 		states = rpool->states;
548 		weight = rpool->weight;
549 		kif = rpool->kif;
550 
551 		if ((rpool->addr.type == PF_ADDR_TABLE &&
552 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
553 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
554 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
555 			load = ((UINT16_MAX * rpool->states) / rpool->weight);
556 		else
557 			load = states;
558 
559 		pf_addrcpy(&faddr, &rpool->counter, af);
560 
561 		pf_addrcpy(naddr, &rpool->counter, af);
562 		if (init_addr != NULL && PF_AZERO(init_addr, af))
563 			pf_addrcpy(init_addr, naddr, af);
564 
565 		/*
566 		 * iterate *once* over whole table and find destination with
567 		 * least connection
568 		 */
569 		do  {
570 			pf_addr_inc(&rpool->counter, af);
571 			if (rpool->addr.type == PF_ADDR_TABLE ||
572 			    rpool->addr.type == PF_ADDR_DYNIFTL) {
573 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
574 					return (1);
575 			} else if (pf_match_addr(0, raddr, rmask,
576 			    &rpool->counter, af))
577 				return (1);
578 
579 			if ((rpool->addr.type == PF_ADDR_TABLE &&
580 			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
581 			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
582 			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
583 				cload = ((UINT16_MAX * rpool->states)
584 					/ rpool->weight);
585 			else
586 				cload = rpool->states;
587 
588 			/* find lc minimum */
589 			if (cload < load) {
590 				states = rpool->states;
591 				weight = rpool->weight;
592 				kif = rpool->kif;
593 				load = cload;
594 
595 				pf_addrcpy(naddr, &rpool->counter, af);
596 				if (init_addr != NULL &&
597 				    PF_AZERO(init_addr, af))
598 				    pf_addrcpy(init_addr, naddr, af);
599 			}
600 		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
601 		    (states > 0));
602 
603 		if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
604 			return (1);
605 		/* revert the kif which was set by pfr_pool_get() */
606 		rpool->kif = kif;
607 		break;
608 	}
609 
610 	if (rpool->opts & PF_POOL_STICKYADDR) {
611 		if (sns[type] != NULL) {
612 			pf_remove_src_node(sns[type]);
613 			sns[type] = NULL;
614 		}
615 		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
616 		    rpool->kif))
617 			return (1);
618 	}
619 
620 	if (pf_status.debug >= LOG_INFO &&
621 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
622 		log(LOG_INFO, "pf: pf_map_addr: selected address ");
623 		pf_print_host(naddr, 0, af);
624 		if ((rpool->opts & PF_POOL_TYPEMASK) ==
625 		    PF_POOL_LEASTSTATES)
626 			addlog(" with state count %llu", states);
627 		if ((rpool->addr.type == PF_ADDR_TABLE &&
628 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
629 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
630 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
631 			addlog(" with weight %u", weight);
632 		addlog("\n");
633 	}
634 
635 	return (0);
636 }
637 
638 int
639 pf_map_addr_states_increase(sa_family_t af, struct pf_pool *rpool,
640     struct pf_addr *naddr)
641 {
642 	if (rpool->addr.type == PF_ADDR_TABLE) {
643 		if (pfr_states_increase(rpool->addr.p.tbl,
644 		    naddr, af) == -1) {
645 			if (pf_status.debug >= LOG_DEBUG) {
646 				log(LOG_DEBUG,
647 				    "pf: pf_map_addr_states_increase: "
648 				    "selected address ");
649 				pf_print_host(naddr, 0, af);
650 				addlog(". Failed to increase count!\n");
651 			}
652 			return (-1);
653 		}
654 	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
655 		if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
656 		    naddr, af) == -1) {
657 			if (pf_status.debug >= LOG_DEBUG) {
658 				log(LOG_DEBUG,
659 				    "pf: pf_map_addr_states_increase: "
660 				    "selected address ");
661 				pf_print_host(naddr, 0, af);
662 				addlog(". Failed to increase count!\n");
663 			}
664 			return (-1);
665 		}
666 	}
667 	return (0);
668 }
669 
670 int
671 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
672     struct pf_src_node **sns, struct pf_rule **nr)
673 {
674 	struct pf_addr	naddr;
675 	u_int16_t	nport;
676 
677 #ifdef INET6
678 	if (pd->af != pd->naf)
679 		return (pf_get_transaddr_af(r, pd, sns));
680 #endif /* INET6 */
681 
682 	if (r->nat.addr.type != PF_ADDR_NONE) {
683 		/* XXX is this right? what if rtable is changed at the same
684 		 * XXX time? where do I need to figure out the sport? */
685 		nport = 0;
686 		if (pf_get_sport(pd, r, &naddr, &nport,
687 		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
688 			DPFPRINTF(LOG_NOTICE,
689 			    "pf: NAT proxy port allocation (%u-%u) failed",
690 			    r->nat.proxy_port[0],
691 			    r->nat.proxy_port[1]);
692 			return (-1);
693 		}
694 		*nr = r;
695 		pf_addrcpy(&pd->nsaddr, &naddr, pd->af);
696 		pd->nsport = nport;
697 	}
698 	if (r->rdr.addr.type != PF_ADDR_NONE) {
699 		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
700 		    &r->rdr, PF_SN_RDR))
701 			return (-1);
702 		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
703 			pf_poolmask(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
704 			    &pd->ndaddr, pd->af);
705 
706 		nport = 0;
707 		if (r->rdr.proxy_port[1]) {
708 			u_int32_t	tmp_nport;
709 
710 			tmp_nport = ((ntohs(pd->ndport) -
711 			    ntohs(r->dst.port[0])) %
712 			    (r->rdr.proxy_port[1] -
713 			    r->rdr.proxy_port[0] + 1)) +
714 			    r->rdr.proxy_port[0];
715 
716 			/* wrap around if necessary */
717 			if (tmp_nport > 65535)
718 				tmp_nport -= 65535;
719 			nport = htons((u_int16_t)tmp_nport);
720 		} else if (r->rdr.proxy_port[0])
721 			nport = htons(r->rdr.proxy_port[0]);
722 		*nr = r;
723 		pf_addrcpy(&pd->ndaddr, &naddr, pd->af);
724 		if (nport)
725 			pd->ndport = nport;
726 	}
727 
728 	return (0);
729 }
730 
731 #ifdef INET6
732 int
733 pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
734     struct pf_src_node **sns)
735 {
736 	struct pf_addr	ndaddr, nsaddr, naddr;
737 	u_int16_t	nport;
738 	int		prefixlen = 96;
739 
740 	if (pf_status.debug >= LOG_INFO) {
741 		log(LOG_INFO, "pf: af-to %s %s, ",
742 		    pd->naf == AF_INET ? "inet" : "inet6",
743 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
744 		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
745 		addlog(" -> ");
746 		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
747 		addlog("\n");
748 	}
749 
750 	if (r->nat.addr.type == PF_ADDR_NONE)
751 		panic("pf_get_transaddr_af: no nat pool for source address");
752 
753 	/* get source address and port */
754 	nport = 0;
755 	if (pf_get_sport(pd, r, &nsaddr, &nport,
756 	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
757 		DPFPRINTF(LOG_NOTICE,
758 		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
759 		    r->nat.proxy_port[0],
760 		    r->nat.proxy_port[1]);
761 		return (-1);
762 	}
763 	pd->nsport = nport;
764 
765 	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
766 		if (pd->dir == PF_IN) {
767 			pd->ndport = ntohs(pd->ndport);
768 			if (pd->ndport == ICMP6_ECHO_REQUEST)
769 				pd->ndport = ICMP_ECHO;
770 			else if (pd->ndport == ICMP6_ECHO_REPLY)
771 				pd->ndport = ICMP_ECHOREPLY;
772 			pd->ndport = htons(pd->ndport);
773 		} else {
774 			pd->nsport = ntohs(pd->nsport);
775 			if (pd->nsport == ICMP6_ECHO_REQUEST)
776 				pd->nsport = ICMP_ECHO;
777 			else if (pd->nsport == ICMP6_ECHO_REPLY)
778 				pd->nsport = ICMP_ECHOREPLY;
779 			pd->nsport = htons(pd->nsport);
780 		}
781 	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
782 		if (pd->dir == PF_IN) {
783 			pd->ndport = ntohs(pd->ndport);
784 			if (pd->ndport == ICMP_ECHO)
785 				pd->ndport = ICMP6_ECHO_REQUEST;
786 			else if (pd->ndport == ICMP_ECHOREPLY)
787 				pd->ndport = ICMP6_ECHO_REPLY;
788 			pd->ndport = htons(pd->ndport);
789 		} else {
790 			pd->nsport = ntohs(pd->nsport);
791 			if (pd->nsport == ICMP_ECHO)
792 				pd->nsport = ICMP6_ECHO_REQUEST;
793 			else if (pd->nsport == ICMP_ECHOREPLY)
794 				pd->nsport = ICMP6_ECHO_REPLY;
795 			pd->nsport = htons(pd->nsport);
796 		}
797 	}
798 
799 	/* get the destination address and port */
800 	if (r->rdr.addr.type != PF_ADDR_NONE) {
801 		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
802 		    &r->rdr, PF_SN_RDR))
803 			return (-1);
804 		if (r->rdr.proxy_port[0])
805 			pd->ndport = htons(r->rdr.proxy_port[0]);
806 
807 		if (pd->naf == AF_INET) {
808 			/* The prefix is the IPv4 rdr address */
809 			prefixlen = in_mask2len((struct in_addr *)
810 			    &r->rdr.addr.v.a.mask);
811 			inet_nat46(pd->naf, &pd->ndaddr,
812 			    &ndaddr, &naddr, prefixlen);
813 		} else {
814 			/* The prefix is the IPv6 rdr address */
815 			prefixlen =
816 			    in6_mask2len((struct in6_addr *)
817 			    &r->rdr.addr.v.a.mask, NULL);
818 			inet_nat64(pd->naf, &pd->ndaddr,
819 			    &ndaddr, &naddr, prefixlen);
820 		}
821 	} else {
822 		if (pd->naf == AF_INET) {
823 			/* The prefix is the IPv6 dst address */
824 			prefixlen =
825 			    in6_mask2len((struct in6_addr *)
826 			    &r->dst.addr.v.a.mask, NULL);
827 			if (prefixlen < 32)
828 				prefixlen = 96;
829 			inet_nat64(pd->naf, &pd->ndaddr,
830 			    &ndaddr, &pd->ndaddr, prefixlen);
831 		} else {
832 			/*
833 			 * The prefix is the IPv6 nat address
834 			 * (that was stored in pd->nsaddr)
835 			 */
836 			prefixlen = in6_mask2len((struct in6_addr *)
837 			    &r->nat.addr.v.a.mask, NULL);
838 			if (prefixlen > 96)
839 				prefixlen = 96;
840 			inet_nat64(pd->naf, &pd->ndaddr,
841 			    &ndaddr, &nsaddr, prefixlen);
842 		}
843 	}
844 
845 	pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf);
846 	pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf);
847 
848 	if (pf_status.debug >= LOG_INFO) {
849 		log(LOG_INFO, "pf: af-to %s %s done, prefixlen %d, ",
850 		    pd->naf == AF_INET ? "inet" : "inet6",
851 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
852 		    prefixlen);
853 		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
854 		addlog(" -> ");
855 		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
856 		addlog("\n");
857 	}
858 
859 	return (0);
860 }
861 #endif /* INET6 */
862 
863 int
864 pf_postprocess_addr(struct pf_state *cur)
865 {
866 	struct pf_rule		*nr;
867 	struct pf_state_key	*sks;
868 	struct pf_pool		 rpool;
869 	struct pf_addr		 lookup_addr;
870 	int			 slbcount = -1;
871 
872 	nr = cur->natrule.ptr;
873 
874 	if (nr == NULL)
875 		return (0);
876 
877 	/* decrease counter */
878 
879 	sks = cur->key[PF_SK_STACK];
880 
881 	/* check for outgoing or ingoing balancing */
882 	if (nr->rt == PF_ROUTETO)
883 		lookup_addr = cur->rt_addr;
884 	else if (sks != NULL)
885 		lookup_addr = sks->addr[1];
886 	else {
887 		if (pf_status.debug >= LOG_DEBUG) {
888 			log(LOG_DEBUG, "pf: %s: unable to obtain address",
889 			    __func__);
890 		}
891 		return (1);
892 	}
893 
894 	/* check for appropriate pool */
895 	if (nr->rdr.addr.type != PF_ADDR_NONE)
896 		rpool = nr->rdr;
897 	else if (nr->nat.addr.type != PF_ADDR_NONE)
898 		rpool = nr->nat;
899 	else if (nr->route.addr.type != PF_ADDR_NONE)
900 		rpool = nr->route;
901 	else
902 		return (0);
903 
904 	if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
905 		return (0);
906 
907 	if (rpool.addr.type == PF_ADDR_TABLE) {
908 		if ((slbcount = pfr_states_decrease(
909 		    rpool.addr.p.tbl,
910 		    &lookup_addr, sks->af)) == -1) {
911 			if (pf_status.debug >= LOG_DEBUG) {
912 				log(LOG_DEBUG, "pf: %s: selected address ",
913 				    __func__);
914 				pf_print_host(&lookup_addr,
915 				    sks->port[0], sks->af);
916 				addlog(". Failed to "
917 				    "decrease count!\n");
918 			}
919 			return (1);
920 		}
921 	} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
922 		if ((slbcount = pfr_states_decrease(
923 		    rpool.addr.p.dyn->pfid_kt,
924 		    &lookup_addr, sks->af)) == -1) {
925 			if (pf_status.debug >= LOG_DEBUG) {
926 				log(LOG_DEBUG, "pf: %s: selected address ",
927 				    __func__);
928 				pf_print_host(&lookup_addr,
929 				    sks->port[0], sks->af);
930 				addlog(". Failed to "
931 				    "decrease count!\n");
932 			}
933 			return (1);
934 		}
935 	}
936 	if (slbcount > -1) {
937 		if (pf_status.debug >= LOG_INFO) {
938 			log(LOG_INFO, "pf: %s: selected address ", __func__);
939 			pf_print_host(&lookup_addr, sks->port[0],
940 			    sks->af);
941 			addlog(" decreased state count to %u\n",
942 			    slbcount);
943 		}
944 	}
945 	return (0);
946 }
947