xref: /openbsd-src/sys/net/pf_lb.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: pf_lb.c,v 1.21 2012/07/09 15:20:57 zinke Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "bpfilter.h"
39 #include "pflog.h"
40 #include "pfsync.h"
41 #include "pflow.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/filio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/pool.h>
52 #include <sys/proc.h>
53 #include <sys/rwlock.h>
54 #include <sys/syslog.h>
55 #include <sys/stdint.h>
56 
57 #include <crypto/md5.h>
58 
59 #include <net/if.h>
60 #include <net/if_types.h>
61 #include <net/bpf.h>
62 #include <net/route.h>
63 #include <net/radix_mpath.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/in_systm.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/tcp.h>
71 #include <netinet/tcp_seq.h>
72 #include <netinet/udp.h>
73 #include <netinet/ip_icmp.h>
74 #include <netinet/in_pcb.h>
75 #include <netinet/tcp_timer.h>
76 #include <netinet/tcp_var.h>
77 #include <netinet/udp_var.h>
78 #include <netinet/icmp_var.h>
79 #include <netinet/if_ether.h>
80 
81 #include <dev/rndvar.h>
82 #include <net/pfvar.h>
83 #include <net/if_pflog.h>
84 #include <net/if_pflow.h>
85 
86 #if NPFSYNC > 0
87 #include <net/if_pfsync.h>
88 #endif /* NPFSYNC > 0 */
89 
90 #ifdef INET6
91 #include <netinet/ip6.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/icmp6.h>
94 #include <netinet6/nd6.h>
95 #endif /* INET6 */
96 
97 
98 /*
99  * Global variables
100  */
101 
102 void			 pf_hash(struct pf_addr *, struct pf_addr *,
103 			    struct pf_poolhashkey *, sa_family_t);
104 int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
105 			    struct pf_addr *, u_int16_t *, u_int16_t,
106 			    u_int16_t, struct pf_src_node **);
107 int			 pf_get_transaddr_af(struct pf_rule *,
108 			    struct pf_pdesc *, struct pf_src_node **);
109 int			 pf_islinklocal(sa_family_t, struct pf_addr *);
110 
111 #define mix(a,b,c) \
112 	do {					\
113 		a -= b; a -= c; a ^= (c >> 13);	\
114 		b -= c; b -= a; b ^= (a << 8);	\
115 		c -= a; c -= b; c ^= (b >> 13);	\
116 		a -= b; a -= c; a ^= (c >> 12);	\
117 		b -= c; b -= a; b ^= (a << 16);	\
118 		c -= a; c -= b; c ^= (b >> 5);	\
119 		a -= b; a -= c; a ^= (c >> 3);	\
120 		b -= c; b -= a; b ^= (a << 10);	\
121 		c -= a; c -= b; c ^= (b >> 15);	\
122 	} while (0)
123 
124 /*
125  * hash function based on bridge_hash in if_bridge.c
126  */
127 void
128 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
129     struct pf_poolhashkey *key, sa_family_t af)
130 {
131 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
132 
133 	switch (af) {
134 #ifdef INET
135 	case AF_INET:
136 		a += inaddr->addr32[0];
137 		b += key->key32[1];
138 		mix(a, b, c);
139 		hash->addr32[0] = c + key->key32[2];
140 		break;
141 #endif /* INET */
142 #ifdef INET6
143 	case AF_INET6:
144 		a += inaddr->addr32[0];
145 		b += inaddr->addr32[2];
146 		mix(a, b, c);
147 		hash->addr32[0] = c;
148 		a += inaddr->addr32[1];
149 		b += inaddr->addr32[3];
150 		c += key->key32[1];
151 		mix(a, b, c);
152 		hash->addr32[1] = c;
153 		a += inaddr->addr32[2];
154 		b += inaddr->addr32[1];
155 		c += key->key32[2];
156 		mix(a, b, c);
157 		hash->addr32[2] = c;
158 		a += inaddr->addr32[3];
159 		b += inaddr->addr32[0];
160 		c += key->key32[3];
161 		mix(a, b, c);
162 		hash->addr32[3] = c;
163 		break;
164 #endif /* INET6 */
165 	}
166 }
167 
168 int
169 pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
170     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
171     struct pf_src_node **sn)
172 {
173 	struct pf_state_key_cmp	key;
174 	struct pf_addr		init_addr;
175 	u_int16_t		cut;
176 
177 	bzero(&init_addr, sizeof(init_addr));
178 	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
179 	    PF_SN_NAT))
180 		return (1);
181 
182 	if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
183 		if (pd->ndport == htons(ICMP6_ECHO_REQUEST) ||
184 		    pd->ndport == htons(ICMP_ECHO)) {
185 			low = 1;
186 			high = 65535;
187 		} else
188 			return (0);	/* Don't try to modify non-echo ICMP */
189 	}
190 
191 	do {
192 		key.af = pd->naf;
193 		key.proto = pd->proto;
194 		key.rdomain = pd->rdomain;
195 		PF_ACPY(&key.addr[0], &pd->ndaddr, key.af);
196 		PF_ACPY(&key.addr[1], naddr, key.af);
197 		key.port[0] = pd->ndport;
198 
199 		/*
200 		 * port search; start random, step;
201 		 * similar 2 portloop in in_pcbbind
202 		 */
203 		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
204 		    pd->proto == IPPROTO_ICMP)) {
205 			/* XXX bug: icmp states dont use the id on both
206 			 * XXX sides (traceroute -I through nat) */
207 			key.port[1] = pd->nsport;
208 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
209 				*nport = pd->nsport;
210 				return (0);
211 			}
212 		} else if (low == 0 && high == 0) {
213 			key.port[1] = pd->nsport;
214 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
215 				*nport = pd->nsport;
216 				return (0);
217 			}
218 		} else if (low == high) {
219 			key.port[1] = htons(low);
220 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
221 				*nport = htons(low);
222 				return (0);
223 			}
224 		} else {
225 			u_int16_t tmp;
226 
227 			if (low > high) {
228 				tmp = low;
229 				low = high;
230 				high = tmp;
231 			}
232 			/* low < high */
233 			cut = arc4random_uniform(1 + high - low) + low;
234 			/* low <= cut <= high */
235 			for (tmp = cut; tmp <= high; ++(tmp)) {
236 				key.port[1] = htons(tmp);
237 				if (pf_find_state_all(&key, PF_IN, NULL) ==
238 				    NULL && !in_baddynamic(tmp, pd->proto)) {
239 					*nport = htons(tmp);
240 					return (0);
241 				}
242 			}
243 			for (tmp = cut - 1; tmp >= low; --(tmp)) {
244 				key.port[1] = htons(tmp);
245 				if (pf_find_state_all(&key, PF_IN, NULL) ==
246 				    NULL && !in_baddynamic(tmp, pd->proto)) {
247 					*nport = htons(tmp);
248 					return (0);
249 				}
250 			}
251 		}
252 
253 		switch (r->nat.opts & PF_POOL_TYPEMASK) {
254 		case PF_POOL_RANDOM:
255 		case PF_POOL_ROUNDROBIN:
256 		case PF_POOL_LEASTSTATES:
257 			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
258 			    &init_addr, sn, &r->nat, PF_SN_NAT))
259 				return (1);
260 			break;
261 		case PF_POOL_NONE:
262 		case PF_POOL_SRCHASH:
263 		case PF_POOL_BITMASK:
264 		default:
265 			return (1);
266 		}
267 	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
268 	return (1);					/* none available */
269 }
270 
271 int
272 pf_islinklocal(sa_family_t af, struct pf_addr *addr)
273 {
274 	if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
275 		return (1);
276 	return (0);
277 }
278 
279 int
280 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
281     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
282     struct pf_pool *rpool, enum pf_sn_types type)
283 {
284 	unsigned char		 hash[16];
285 	struct pf_addr		 faddr;
286 	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
287 	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
288 	struct pf_src_node	 k;
289 	u_int64_t		 states;
290 	u_int16_t		 weight;
291 	u_int64_t		 load;
292 	u_int64_t		 cload;
293 
294 	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
295 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
296 		k.af = af;
297 		k.type = type;
298 		PF_ACPY(&k.addr, saddr, af);
299 		k.rule.ptr = r;
300 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
301 		sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
302 		if (sns[type] != NULL) {
303 			if (!PF_AZERO(&(sns[type])->raddr, af))
304 				PF_ACPY(naddr, &(sns[type])->raddr, af);
305 			if (pf_status.debug >= LOG_DEBUG) {
306 				log(LOG_DEBUG, "pf: pf_map_addr: "
307 				    "src tracking (%u) maps ", type);
308 				pf_print_host(&k.addr, 0, af);
309 				addlog(" to ");
310 				pf_print_host(naddr, 0, af);
311 				addlog("\n");
312 			}
313 			return (0);
314 		}
315 	}
316 
317 	if (rpool->addr.type == PF_ADDR_NOROUTE)
318 		return (1);
319 	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
320 		switch (af) {
321 #ifdef INET
322 		case AF_INET:
323 			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
324 			    ((rpool->opts & PF_POOL_TYPEMASK) !=
325 			    PF_POOL_ROUNDROBIN) &&
326 			    ((rpool->opts & PF_POOL_TYPEMASK) !=
327 			    PF_POOL_LEASTSTATES))
328 				return (1);
329 			raddr = &rpool->addr.p.dyn->pfid_addr4;
330 			rmask = &rpool->addr.p.dyn->pfid_mask4;
331 			break;
332 #endif /* INET */
333 #ifdef INET6
334 		case AF_INET6:
335 			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
336 			    ((rpool->opts & PF_POOL_TYPEMASK) !=
337 			    PF_POOL_ROUNDROBIN) &&
338 			    ((rpool->opts & PF_POOL_TYPEMASK) !=
339 			    PF_POOL_LEASTSTATES))
340 				return (1);
341 			raddr = &rpool->addr.p.dyn->pfid_addr6;
342 			rmask = &rpool->addr.p.dyn->pfid_mask6;
343 			break;
344 #endif /* INET6 */
345 		}
346 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
347 		if (((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) &&
348 		    ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
349 			return (1); /* unsupported */
350 	} else {
351 		raddr = &rpool->addr.v.a.addr;
352 		rmask = &rpool->addr.v.a.mask;
353 	}
354 
355 	switch (rpool->opts & PF_POOL_TYPEMASK) {
356 	case PF_POOL_NONE:
357 		PF_ACPY(naddr, raddr, af);
358 		break;
359 	case PF_POOL_BITMASK:
360 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
361 		break;
362 	case PF_POOL_RANDOM:
363 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
364 			switch (af) {
365 #ifdef INET
366 			case AF_INET:
367 				rpool->counter.addr32[0] = htonl(arc4random());
368 				break;
369 #endif /* INET */
370 #ifdef INET6
371 			case AF_INET6:
372 				if (rmask->addr32[3] != 0xffffffff)
373 					rpool->counter.addr32[3] =
374 					    htonl(arc4random());
375 				else
376 					break;
377 				if (rmask->addr32[2] != 0xffffffff)
378 					rpool->counter.addr32[2] =
379 					    htonl(arc4random());
380 				else
381 					break;
382 				if (rmask->addr32[1] != 0xffffffff)
383 					rpool->counter.addr32[1] =
384 					    htonl(arc4random());
385 				else
386 					break;
387 				if (rmask->addr32[0] != 0xffffffff)
388 					rpool->counter.addr32[0] =
389 					    htonl(arc4random());
390 				break;
391 #endif /* INET6 */
392 			}
393 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
394 			PF_ACPY(init_addr, naddr, af);
395 
396 		} else {
397 			PF_AINC(&rpool->counter, af);
398 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
399 		}
400 		break;
401 	case PF_POOL_SRCHASH:
402 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
403 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
404 		break;
405 	case PF_POOL_ROUNDROBIN:
406 		if (rpool->addr.type == PF_ADDR_TABLE) {
407 			if (pfr_pool_get(rpool->addr.p.tbl,
408 			    &rpool->tblidx, &rpool->counter,
409 			    &raddr, &rmask, &rpool->kif,
410 			    &rpool->states, &rpool->weight,
411 			    &rpool->curweight, af, NULL))
412 				return (1);
413 		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
414 			if (pfr_pool_get(rpool->addr.p.dyn->pfid_kt,
415 			    &rpool->tblidx, &rpool->counter,
416 			    &raddr, &rmask, &rpool->kif,
417 			    &rpool->states, &rpool->weight,
418 			    &rpool->curweight, af, pf_islinklocal))
419 				return (1);
420 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
421 			return (1);
422 
423 		/* iterate over table if it contains entries which are weighted */
424 		if ((rpool->addr.type == PF_ADDR_TABLE &&
425 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
426 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
427 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
428 			do {
429 				if (rpool->addr.type == PF_ADDR_TABLE) {
430 					if (pfr_pool_get(rpool->addr.p.tbl,
431 					    &rpool->tblidx, &rpool->counter,
432 					    &raddr, &rmask, &rpool->kif,
433 					    &rpool->states, &rpool->weight,
434 					    &rpool->curweight, af, NULL))
435 						return (1);
436 				} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
437 					if (pfr_pool_get(
438 					    rpool->addr.p.dyn->pfid_kt,
439 					    &rpool->tblidx, &rpool->counter,
440 					    &raddr, &rmask, &rpool->kif,
441 					    &rpool->states, &rpool->weight,
442 					    &rpool->curweight, af,
443 					    pf_islinklocal))
444 						return (1);
445 				} else {
446 					log(LOG_ERR, "pf: pf_map_addr: "
447 					    "weighted RR failure");
448 					return (1);
449 				}
450 				if (rpool->weight >= rpool->curweight)
451 					break;
452 				PF_AINC(&rpool->counter, af);
453 			} while (1);
454 
455 			weight = rpool->weight;
456 		}
457 
458 		PF_ACPY(naddr, &rpool->counter, af);
459 		if (init_addr != NULL && PF_AZERO(init_addr, af))
460 			PF_ACPY(init_addr, naddr, af);
461 		PF_AINC(&rpool->counter, af);
462 		break;
463 	case PF_POOL_LEASTSTATES:
464 		/* retrieve an address first */
465 		if (rpool->addr.type == PF_ADDR_TABLE) {
466 			if (pfr_pool_get(rpool->addr.p.tbl,
467 			    &rpool->tblidx, &rpool->counter,
468 			    &raddr, &rmask, &rpool->kif,
469 			    &rpool->states, &rpool->weight,
470 			    &rpool->curweight, af, NULL))
471 				return (1);
472 		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
473 			if (pfr_pool_get(rpool->addr.p.dyn->pfid_kt,
474 			    &rpool->tblidx, &rpool->counter,
475 			    &raddr, &rmask, &rpool->kif,
476 			    &rpool->states, &rpool->weight,
477 			    &rpool->curweight, af, pf_islinklocal))
478 				return (1);
479 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
480 			return (1);
481 
482 		states = rpool->states;
483 		weight = rpool->weight;
484 
485 		if ((rpool->addr.type == PF_ADDR_TABLE &&
486 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
487 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
488 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
489 			load = ((UINT16_MAX * rpool->states) / rpool->weight);
490 		else
491 			load = states;
492 
493 		PF_ACPY(&faddr, &rpool->counter, af);
494 
495 		PF_ACPY(naddr, &rpool->counter, af);
496 		if (init_addr != NULL && PF_AZERO(init_addr, af))
497 			PF_ACPY(init_addr, naddr, af);
498 
499 		/*
500 		 * iterate *once* over whole table and find destination with
501 		 * least connection
502 		 */
503 		do  {
504 			PF_AINC(&rpool->counter, af);
505 			if (rpool->addr.type == PF_ADDR_TABLE) {
506 				if (pfr_pool_get(rpool->addr.p.tbl,
507 				    &rpool->tblidx, &rpool->counter,
508 				    &raddr, &rmask, &rpool->kif,
509 				    &rpool->states, &rpool->weight,
510 				    &rpool->curweight, af, NULL))
511 					return (1);
512 			} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
513 				if (pfr_pool_get(rpool->addr.p.dyn->pfid_kt,
514 				    &rpool->tblidx, &rpool->counter,
515 				    &raddr, &rmask, &rpool->kif,
516 				    &rpool->states, &rpool->weight,
517 				    &rpool->curweight, af, pf_islinklocal))
518 					return (1);
519 			} else if (pf_match_addr(0, raddr, rmask,
520 			    &rpool->counter, af))
521 				return (1);
522 
523 			if ((rpool->addr.type == PF_ADDR_TABLE &&
524 			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
525 			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
526 			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
527 				cload = ((UINT16_MAX * rpool->states)
528 					/ rpool->weight);
529 			else
530 				cload = rpool->states;
531 
532 			/* find lc minimum */
533 			if (cload < load) {
534 				states = rpool->states;
535 				weight = rpool->weight;
536 				load = cload;
537 
538 				PF_ACPY(naddr, &rpool->counter, af);
539 				if (init_addr != NULL &&
540 				    PF_AZERO(init_addr, af))
541 				    PF_ACPY(init_addr, naddr, af);
542 			}
543 		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
544 		    (states > 0));
545 
546 		if (rpool->addr.type == PF_ADDR_TABLE) {
547 			if (pfr_states_increase(rpool->addr.p.tbl,
548 			    naddr, af) == -1) {
549 				if (pf_status.debug >= LOG_DEBUG) {
550 					log(LOG_DEBUG,"pf: pf_map_addr: "
551 					    "selected address ");
552 					pf_print_host(naddr, 0, af);
553 					addlog(". Failed to increase count!\n");
554 				}
555 				return (1);
556 			}
557 		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
558 			if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
559 			    naddr, af) == -1) {
560 				if (pf_status.debug >= LOG_DEBUG) {
561 					log(LOG_DEBUG, "pf: pf_map_addr: "
562 					    "selected address ");
563 					pf_print_host(naddr, 0, af);
564 					addlog(". Failed to increase count!\n");
565 				}
566 				return (1);
567 			}
568 		}
569 		break;
570 	}
571 
572 	if (rpool->opts & PF_POOL_STICKYADDR) {
573 		if (sns[type] != NULL) {
574 			pf_remove_src_node(sns[type]);
575 			sns[type] = NULL;
576 		}
577 		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
578 		    0))
579 			return (1);
580 	}
581 
582 	if (pf_status.debug >= LOG_NOTICE &&
583 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
584 		log(LOG_NOTICE, "pf: pf_map_addr: selected address ");
585 		pf_print_host(naddr, 0, af);
586 		if ((rpool->opts & PF_POOL_TYPEMASK) ==
587 		    PF_POOL_LEASTSTATES)
588 			addlog(" with state count %llu", states);
589 		if ((rpool->addr.type == PF_ADDR_TABLE &&
590 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
591 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
592 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
593 			addlog(" with weight %u", weight);
594 		addlog("\n");
595 	}
596 
597 	return (0);
598 }
599 
600 int
601 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
602     struct pf_src_node **sns, struct pf_rule **nr)
603 {
604 	struct pf_addr	naddr;
605 	u_int16_t	nport = 0;
606 
607 #ifdef INET6
608 	if (pd->af != pd->naf)
609 		return (pf_get_transaddr_af(r, pd, sns));
610 #endif /* INET6 */
611 
612 	if (r->nat.addr.type != PF_ADDR_NONE) {
613 		/* XXX is this right? what if rtable is changed at the same
614 		 * XXX time? where do I need to figure out the sport? */
615 		if (pf_get_sport(pd, r, &naddr, &nport,
616 		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
617 			DPFPRINTF(LOG_NOTICE,
618 			    "pf: NAT proxy port allocation (%u-%u) failed",
619 			    r->nat.proxy_port[0],
620 			    r->nat.proxy_port[1]);
621 			return (-1);
622 		}
623 		*nr = r;
624 		PF_ACPY(&pd->nsaddr, &naddr, pd->af);
625 		pd->nsport = nport;
626 	}
627 	if (r->rdr.addr.type != PF_ADDR_NONE) {
628 		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
629 		    &r->rdr, PF_SN_RDR))
630 			return (-1);
631 		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
632 			PF_POOLMASK(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
633 			    &pd->ndaddr, pd->af);
634 
635 			if (r->rdr.proxy_port[1]) {
636 				u_int32_t	tmp_nport;
637 
638 				tmp_nport = ((ntohs(pd->ndport) -
639 				    ntohs(r->dst.port[0])) %
640 				    (r->rdr.proxy_port[1] -
641 				    r->rdr.proxy_port[0] + 1)) +
642 				    r->rdr.proxy_port[0];
643 
644 				/* wrap around if necessary */
645 				if (tmp_nport > 65535)
646 					tmp_nport -= 65535;
647 				nport = htons((u_int16_t)tmp_nport);
648 			} else if (r->rdr.proxy_port[0])
649 				nport = htons(r->rdr.proxy_port[0]);
650 		*nr = r;
651 		PF_ACPY(&pd->ndaddr, &naddr, pd->af);
652 		if (nport)
653 			pd->ndport = nport;
654 	}
655 
656 	return (0);
657 }
658 
659 #ifdef INET6
660 int
661 pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
662     struct pf_src_node **sns)
663 {
664 	struct pf_addr	ndaddr, nsaddr, naddr;
665 	u_int16_t	nport = 0;
666 	int		prefixlen = 96;
667 
668 	if (pf_status.debug >= LOG_NOTICE) {
669 		log(LOG_NOTICE, "pf: af-to %s %s, ",
670 		    pd->naf == AF_INET ? "inet" : "inet6",
671 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
672 		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
673 		addlog(" -> ");
674 		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
675 		addlog("\n");
676 	}
677 
678 	if (r->nat.addr.type == PF_ADDR_NONE)
679 		panic("pf_get_transaddr_af: no nat pool for source address");
680 
681 	/* get source address and port */
682 	if (pf_get_sport(pd, r, &nsaddr, &nport,
683 	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
684 		DPFPRINTF(LOG_NOTICE,
685 		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
686 		    r->nat.proxy_port[0],
687 		    r->nat.proxy_port[1]);
688 		return (-1);
689 	}
690 	pd->nsport = nport;
691 
692 	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
693 		if (pd->dir == PF_IN) {
694 			NTOHS(pd->ndport);
695 			if (pd->ndport == ICMP6_ECHO_REQUEST)
696 				pd->ndport = ICMP_ECHO;
697 			else if (pd->ndport == ICMP6_ECHO_REPLY)
698 				pd->ndport = ICMP_ECHOREPLY;
699 			HTONS(pd->ndport);
700 		} else {
701 			NTOHS(pd->nsport);
702 			if (pd->nsport == ICMP6_ECHO_REQUEST)
703 				pd->nsport = ICMP_ECHO;
704 			else if (pd->nsport == ICMP6_ECHO_REPLY)
705 				pd->nsport = ICMP_ECHOREPLY;
706 			HTONS(pd->nsport);
707 		}
708 	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
709 		if (pd->dir == PF_IN) {
710 			NTOHS(pd->ndport);
711 			if (pd->ndport == ICMP_ECHO)
712 				pd->ndport = ICMP6_ECHO_REQUEST;
713 			else if (pd->ndport == ICMP_ECHOREPLY)
714 				pd->ndport = ICMP6_ECHO_REPLY;
715 			HTONS(pd->ndport);
716 		} else {
717 			NTOHS(pd->nsport);
718 			if (pd->nsport == ICMP_ECHO)
719 				pd->nsport = ICMP6_ECHO_REQUEST;
720 			else if (pd->nsport == ICMP_ECHOREPLY)
721 				pd->nsport = ICMP6_ECHO_REPLY;
722 			HTONS(pd->nsport);
723 		}
724 	}
725 
726 	/* get the destination address and port */
727 	if (r->rdr.addr.type != PF_ADDR_NONE) {
728 		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
729 		    &r->rdr, PF_SN_RDR))
730 			return (-1);
731 		if (r->rdr.proxy_port[0])
732 			pd->ndport = htons(r->rdr.proxy_port[0]);
733 
734 		if (pd->naf == AF_INET) {
735 			/* The prefix is the IPv4 rdr address */
736 			prefixlen = in_mask2len((struct in_addr *)
737 			    &r->rdr.addr.v.a.mask);
738 			inet_nat46(pd->naf, &pd->ndaddr,
739 			    &ndaddr, &naddr, prefixlen);
740 		} else {
741 			/* The prefix is the IPv6 rdr address */
742 			prefixlen =
743 			    in6_mask2len((struct in6_addr *)
744 			    &r->rdr.addr.v.a.mask, NULL);
745 			inet_nat64(pd->naf, &pd->ndaddr,
746 			    &ndaddr, &naddr, prefixlen);
747 		}
748 	} else {
749 		if (pd->naf == AF_INET) {
750 			/* The prefix is the IPv6 dst address */
751 			prefixlen =
752 			    in6_mask2len((struct in6_addr *)
753 			    &r->dst.addr.v.a.mask, NULL);
754 			if (prefixlen < 32)
755 				prefixlen = 96;
756 			inet_nat64(pd->naf, &pd->ndaddr,
757 			    &ndaddr, &pd->ndaddr, prefixlen);
758 		} else {
759 			/*
760 			 * The prefix is the IPv6 nat address
761 			 * (that was stored in pd->nsaddr)
762 			 */
763 			prefixlen = in6_mask2len((struct in6_addr *)
764 			    &r->nat.addr.v.a.mask, NULL);
765 			if (prefixlen > 96)
766 				prefixlen = 96;
767 			inet_nat64(pd->naf, &pd->ndaddr,
768 			    &ndaddr, &nsaddr, prefixlen);
769 		}
770 	}
771 
772 	PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
773 	PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
774 
775 	if (pf_status.debug >= LOG_NOTICE) {
776 		log(LOG_NOTICE, "pf: af-to %s %s done, prefixlen %d, ",
777 		    pd->naf == AF_INET ? "inet" : "inet6",
778 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
779 		    prefixlen);
780 		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
781 		addlog(" -> ");
782 		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
783 		addlog("\n");
784 	}
785 
786 	return (0);
787 }
788 #endif /* INET6 */
789 
790 int
791 pf_postprocess_addr(struct pf_state *cur) {
792 	struct pf_rule *nr;
793 
794 	nr = cur->natrule.ptr;
795 
796 	/* decrease counter */
797 	if (nr != NULL) {
798 		int			 slbcount;
799 		struct pf_pool		 rpool;
800 		struct pf_addr		 lookup_addr;
801 		struct pf_state_key	*sks;
802 
803 		sks = cur ? cur->key[PF_SK_STACK] : NULL;
804 
805 		/* check for outgoing or ingoing balancing */
806 		if (nr->rt == PF_ROUTETO)
807 			lookup_addr = cur->rt_addr;
808 		else if (sks != NULL)
809 			lookup_addr = sks->addr[1];
810 		else {
811 			if (pf_status.debug >= LOG_DEBUG) {
812 				log(LOG_DEBUG, "pf: pf_unlink_state: "
813 				    "unable to optain address");
814 			}
815 			return (1);
816 		}
817 
818 		/* check for appropriate pool */
819 		if (nr->rdr.addr.type != PF_ADDR_NONE)
820 			rpool = nr->rdr;
821 		else if (nr->nat.addr.type != PF_ADDR_NONE)
822 			rpool = nr->nat;
823 		else if (nr->route.addr.type != PF_ADDR_NONE)
824 			rpool = nr->route;
825 
826 		if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
827 			return (0);
828 
829 		if (rpool.addr.type == PF_ADDR_TABLE) {
830 			if ((slbcount = pfr_states_decrease(
831 			    rpool.addr.p.tbl,
832 			    &lookup_addr, sks->af)) == -1) {
833 				if (pf_status.debug >= LOG_DEBUG) {
834 					log(LOG_DEBUG, "pf: pf_unlink_state: "
835 					    "selected address ");
836 					pf_print_host(&lookup_addr,
837 					    sks->port[0], sks->af);
838 					addlog(". Failed to "
839 					    "decrease count!\n");
840 				}
841 				return (1);
842 			}
843 		} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
844 			if ((slbcount = pfr_states_decrease(
845 			    rpool.addr.p.dyn->pfid_kt,
846 			    &lookup_addr, sks->af)) == -1) {
847 				if (pf_status.debug >= LOG_DEBUG) {
848 					log(LOG_DEBUG,
849 					    "pf: pf_unlink_state: "
850 					    "selected address ");
851 					pf_print_host(&lookup_addr,
852 					    sks->port[0], sks->af);
853 					addlog(". Failed to "
854 					    "decrease count!\n");
855 				}
856 				return (1);
857 			}
858 		}
859 		if (slbcount > -1) {
860 			if (pf_status.debug >= LOG_NOTICE) {
861 				log(LOG_NOTICE,
862 				    "pf: pf_unlink_state: selected address ");
863 				pf_print_host(&lookup_addr, sks->port[0],
864 				    sks->af);
865 				addlog(" decreased state count to %u\n",
866 				    slbcount);
867 			}
868 		}
869 	}
870 
871 	return (0);
872 }
873