xref: /openbsd-src/sys/net/pf_lb.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: pf_lb.c,v 1.31 2014/07/02 13:06:00 mikeb Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "bpfilter.h"
39 #include "pflog.h"
40 #include "pfsync.h"
41 #include "pflow.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/filio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/pool.h>
52 #include <sys/rwlock.h>
53 #include <sys/syslog.h>
54 #include <sys/stdint.h>
55 
56 #include <crypto/md5.h>
57 
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/bpf.h>
61 #include <net/route.h>
62 #include <net/radix_mpath.h>
63 
64 #include <netinet/in.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/tcp.h>
69 #include <netinet/tcp_seq.h>
70 #include <netinet/udp.h>
71 #include <netinet/ip_icmp.h>
72 #include <netinet/in_pcb.h>
73 #include <netinet/tcp_timer.h>
74 #include <netinet/tcp_var.h>
75 #include <netinet/udp_var.h>
76 #include <netinet/icmp_var.h>
77 #include <netinet/if_ether.h>
78 
79 #include <dev/rndvar.h>
80 #include <net/pfvar.h>
81 #include <net/if_pflog.h>
82 #include <net/if_pflow.h>
83 
84 #if NPFSYNC > 0
85 #include <net/if_pfsync.h>
86 #endif /* NPFSYNC > 0 */
87 
88 #ifdef INET6
89 #include <netinet/ip6.h>
90 #include <netinet/in_pcb.h>
91 #include <netinet/icmp6.h>
92 #endif /* INET6 */
93 
94 
95 /*
96  * Global variables
97  */
98 
99 void			 pf_hash(struct pf_addr *, struct pf_addr *,
100 			    struct pf_poolhashkey *, sa_family_t);
101 int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
102 			    struct pf_addr *, u_int16_t *, u_int16_t,
103 			    u_int16_t, struct pf_src_node **);
104 int			 pf_get_transaddr_af(struct pf_rule *,
105 			    struct pf_pdesc *, struct pf_src_node **);
106 int			 pf_map_addr_sticky(sa_family_t, struct pf_rule *,
107 			    struct pf_addr *, struct pf_addr *,
108 			    struct pf_src_node **, struct pf_pool *,
109 			    enum pf_sn_types);
110 
111 #define mix(a,b,c) \
112 	do {					\
113 		a -= b; a -= c; a ^= (c >> 13);	\
114 		b -= c; b -= a; b ^= (a << 8);	\
115 		c -= a; c -= b; c ^= (b >> 13);	\
116 		a -= b; a -= c; a ^= (c >> 12);	\
117 		b -= c; b -= a; b ^= (a << 16);	\
118 		c -= a; c -= b; c ^= (b >> 5);	\
119 		a -= b; a -= c; a ^= (c >> 3);	\
120 		b -= c; b -= a; b ^= (a << 10);	\
121 		c -= a; c -= b; c ^= (b >> 15);	\
122 	} while (0)
123 
124 /*
125  * hash function based on bridge_hash in if_bridge.c
126  */
127 void
128 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
129     struct pf_poolhashkey *key, sa_family_t af)
130 {
131 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
132 
133 	switch (af) {
134 #ifdef INET
135 	case AF_INET:
136 		a += inaddr->addr32[0];
137 		b += key->key32[1];
138 		mix(a, b, c);
139 		hash->addr32[0] = c + key->key32[2];
140 		break;
141 #endif /* INET */
142 #ifdef INET6
143 	case AF_INET6:
144 		a += inaddr->addr32[0];
145 		b += inaddr->addr32[2];
146 		mix(a, b, c);
147 		hash->addr32[0] = c;
148 		a += inaddr->addr32[1];
149 		b += inaddr->addr32[3];
150 		c += key->key32[1];
151 		mix(a, b, c);
152 		hash->addr32[1] = c;
153 		a += inaddr->addr32[2];
154 		b += inaddr->addr32[1];
155 		c += key->key32[2];
156 		mix(a, b, c);
157 		hash->addr32[2] = c;
158 		a += inaddr->addr32[3];
159 		b += inaddr->addr32[0];
160 		c += key->key32[3];
161 		mix(a, b, c);
162 		hash->addr32[3] = c;
163 		break;
164 #endif /* INET6 */
165 	}
166 }
167 
168 int
169 pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
170     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
171     struct pf_src_node **sn)
172 {
173 	struct pf_state_key_cmp	key;
174 	struct pf_addr		init_addr;
175 	u_int16_t		cut;
176 
177 	bzero(&init_addr, sizeof(init_addr));
178 	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
179 	    PF_SN_NAT))
180 		return (1);
181 
182 	if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
183 		if (pd->ndport == htons(ICMP6_ECHO_REQUEST) ||
184 		    pd->ndport == htons(ICMP_ECHO)) {
185 			low = 1;
186 			high = 65535;
187 		} else
188 			return (0);	/* Don't try to modify non-echo ICMP */
189 	}
190 
191 	do {
192 		key.af = pd->naf;
193 		key.proto = pd->proto;
194 		key.rdomain = pd->rdomain;
195 		PF_ACPY(&key.addr[0], &pd->ndaddr, key.af);
196 		PF_ACPY(&key.addr[1], naddr, key.af);
197 		key.port[0] = pd->ndport;
198 
199 		/*
200 		 * port search; start random, step;
201 		 * similar 2 portloop in in_pcbbind
202 		 */
203 		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
204 		    pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
205 			/* XXX bug: icmp states dont use the id on both
206 			 * XXX sides (traceroute -I through nat) */
207 			key.port[1] = pd->nsport;
208 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
209 				*nport = pd->nsport;
210 				return (0);
211 			}
212 		} else if (low == 0 && high == 0) {
213 			key.port[1] = pd->nsport;
214 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
215 				*nport = pd->nsport;
216 				return (0);
217 			}
218 		} else if (low == high) {
219 			key.port[1] = htons(low);
220 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
221 				*nport = htons(low);
222 				return (0);
223 			}
224 		} else {
225 			u_int16_t tmp;
226 
227 			if (low > high) {
228 				tmp = low;
229 				low = high;
230 				high = tmp;
231 			}
232 			/* low < high */
233 			cut = arc4random_uniform(1 + high - low) + low;
234 			/* low <= cut <= high */
235 			for (tmp = cut; tmp <= high; ++(tmp)) {
236 				key.port[1] = htons(tmp);
237 				if (pf_find_state_all(&key, PF_IN, NULL) ==
238 				    NULL && !in_baddynamic(tmp, pd->proto)) {
239 					*nport = htons(tmp);
240 					return (0);
241 				}
242 			}
243 			for (tmp = cut - 1; tmp >= low; --(tmp)) {
244 				key.port[1] = htons(tmp);
245 				if (pf_find_state_all(&key, PF_IN, NULL) ==
246 				    NULL && !in_baddynamic(tmp, pd->proto)) {
247 					*nport = htons(tmp);
248 					return (0);
249 				}
250 			}
251 		}
252 
253 		switch (r->nat.opts & PF_POOL_TYPEMASK) {
254 		case PF_POOL_RANDOM:
255 		case PF_POOL_ROUNDROBIN:
256 		case PF_POOL_LEASTSTATES:
257 			/*
258 			 * pick a different source address since we're out
259 			 * of free port choices for the current one.
260 			 */
261 			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
262 			    &init_addr, sn, &r->nat, PF_SN_NAT))
263 				return (1);
264 			break;
265 		case PF_POOL_NONE:
266 		case PF_POOL_SRCHASH:
267 		case PF_POOL_BITMASK:
268 		default:
269 			return (1);
270 		}
271 	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
272 	return (1);					/* none available */
273 }
274 
275 int
276 pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
277     struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
278     enum pf_sn_types type)
279 {
280 	struct pf_addr		*raddr, *rmask, *cached;
281 	struct pf_state		*s;
282 	struct pf_src_node	 k;
283 	int			 valid;
284 
285 	k.af = af;
286 	k.type = type;
287 	PF_ACPY(&k.addr, saddr, af);
288 	k.rule.ptr = r;
289 	pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
290 	sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
291 	if (sns[type] == NULL)
292 		return (-1);
293 
294 	/* check if the cached entry is still valid */
295 	cached = &(sns[type])->raddr;
296 	valid = 0;
297 	if (PF_AZERO(cached, af)) {
298 		valid = 1;
299 	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
300 		if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
301 		    af, 0))
302 			valid = 1;
303 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
304 		if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
305 			valid = 1;
306 	} else if (rpool->addr.type != PF_ADDR_NOROUTE) {
307 		raddr = &rpool->addr.v.a.addr;
308 		rmask = &rpool->addr.v.a.mask;
309 		valid = pf_match_addr(0, raddr, rmask, cached, af);
310 	}
311 	if (!valid) {
312 		if (pf_status.debug >= LOG_DEBUG) {
313 			log(LOG_DEBUG, "pf: pf_map_addr: "
314 			    "stale src tracking (%u) ", type);
315 			pf_print_host(&k.addr, 0, af);
316 			addlog(" to ");
317 			pf_print_host(cached, 0, af);
318 			addlog("\n");
319 		}
320 		if (sns[type]->states != 0) {
321 			/* XXX expensive */
322 			RB_FOREACH(s, pf_state_tree_id,
323 			   &tree_id)
324 				pf_state_rm_src_node(s,
325 				    sns[type]);
326 		}
327 		sns[type]->expire = 1;
328 		pf_remove_src_node(sns[type]);
329 		sns[type] = NULL;
330 		return (-1);
331 	}
332 	if (!PF_AZERO(cached, af))
333 		PF_ACPY(naddr, cached, af);
334 	if (pf_status.debug >= LOG_DEBUG) {
335 		log(LOG_DEBUG, "pf: pf_map_addr: "
336 		    "src tracking (%u) maps ", type);
337 		pf_print_host(&k.addr, 0, af);
338 		addlog(" to ");
339 		pf_print_host(naddr, 0, af);
340 		addlog("\n");
341 	}
342 	return (0);
343 }
344 
345 int
346 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
347     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
348     struct pf_pool *rpool, enum pf_sn_types type)
349 {
350 	unsigned char		 hash[16];
351 	struct pf_addr		 faddr;
352 	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
353 	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
354 	u_int64_t		 states;
355 	u_int16_t		 weight;
356 	u_int64_t		 load;
357 	u_int64_t		 cload;
358 
359 	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
360 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
361 	    pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
362 		return (0);
363 
364 	if (rpool->addr.type == PF_ADDR_NOROUTE)
365 		return (1);
366 	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
367 		switch (af) {
368 #ifdef INET
369 		case AF_INET:
370 			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
371 			    ((rpool->opts & PF_POOL_TYPEMASK) !=
372 			    PF_POOL_ROUNDROBIN) &&
373 			    ((rpool->opts & PF_POOL_TYPEMASK) !=
374 			    PF_POOL_LEASTSTATES))
375 				return (1);
376 			raddr = &rpool->addr.p.dyn->pfid_addr4;
377 			rmask = &rpool->addr.p.dyn->pfid_mask4;
378 			break;
379 #endif /* INET */
380 #ifdef INET6
381 		case AF_INET6:
382 			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
383 			    ((rpool->opts & PF_POOL_TYPEMASK) !=
384 			    PF_POOL_ROUNDROBIN) &&
385 			    ((rpool->opts & PF_POOL_TYPEMASK) !=
386 			    PF_POOL_LEASTSTATES))
387 				return (1);
388 			raddr = &rpool->addr.p.dyn->pfid_addr6;
389 			rmask = &rpool->addr.p.dyn->pfid_mask6;
390 			break;
391 #endif /* INET6 */
392 		}
393 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
394 		if (((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) &&
395 		    ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
396 			return (1); /* unsupported */
397 	} else {
398 		raddr = &rpool->addr.v.a.addr;
399 		rmask = &rpool->addr.v.a.mask;
400 	}
401 
402 	switch (rpool->opts & PF_POOL_TYPEMASK) {
403 	case PF_POOL_NONE:
404 		PF_ACPY(naddr, raddr, af);
405 		break;
406 	case PF_POOL_BITMASK:
407 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
408 		break;
409 	case PF_POOL_RANDOM:
410 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
411 			switch (af) {
412 #ifdef INET
413 			case AF_INET:
414 				rpool->counter.addr32[0] = htonl(arc4random());
415 				break;
416 #endif /* INET */
417 #ifdef INET6
418 			case AF_INET6:
419 				if (rmask->addr32[3] != 0xffffffff)
420 					rpool->counter.addr32[3] =
421 					    htonl(arc4random());
422 				else
423 					break;
424 				if (rmask->addr32[2] != 0xffffffff)
425 					rpool->counter.addr32[2] =
426 					    htonl(arc4random());
427 				else
428 					break;
429 				if (rmask->addr32[1] != 0xffffffff)
430 					rpool->counter.addr32[1] =
431 					    htonl(arc4random());
432 				else
433 					break;
434 				if (rmask->addr32[0] != 0xffffffff)
435 					rpool->counter.addr32[0] =
436 					    htonl(arc4random());
437 				break;
438 #endif /* INET6 */
439 			}
440 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
441 			PF_ACPY(init_addr, naddr, af);
442 
443 		} else {
444 			PF_AINC(&rpool->counter, af);
445 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
446 		}
447 		break;
448 	case PF_POOL_SRCHASH:
449 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
450 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
451 		break;
452 	case PF_POOL_ROUNDROBIN:
453 		if (rpool->addr.type == PF_ADDR_TABLE ||
454 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
455 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
456 				/*
457 				 * reset counter in case its value
458 				 * has been removed from the pool.
459 				 */
460 				bzero(&rpool->counter, sizeof(rpool->counter));
461 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
462 					return (1);
463 			}
464 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
465 			return (1);
466 
467 		/* iterate over table if it contains entries which are weighted */
468 		if ((rpool->addr.type == PF_ADDR_TABLE &&
469 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
470 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
471 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
472 			do {
473 				if (rpool->addr.type == PF_ADDR_TABLE ||
474 				    rpool->addr.type == PF_ADDR_DYNIFTL) {
475 					if (pfr_pool_get(rpool,
476 					    &raddr, &rmask, af))
477 						return (1);
478 				} else {
479 					log(LOG_ERR, "pf: pf_map_addr: "
480 					    "weighted RR failure");
481 					return (1);
482 				}
483 				if (rpool->weight >= rpool->curweight)
484 					break;
485 				PF_AINC(&rpool->counter, af);
486 			} while (1);
487 
488 			weight = rpool->weight;
489 		}
490 
491 		PF_ACPY(naddr, &rpool->counter, af);
492 		if (init_addr != NULL && PF_AZERO(init_addr, af))
493 			PF_ACPY(init_addr, naddr, af);
494 		PF_AINC(&rpool->counter, af);
495 		break;
496 	case PF_POOL_LEASTSTATES:
497 		/* retrieve an address first */
498 		if (rpool->addr.type == PF_ADDR_TABLE ||
499 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
500 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
501 				/* see PF_POOL_ROUNDROBIN */
502 				bzero(&rpool->counter, sizeof(rpool->counter));
503 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
504 					return (1);
505 			}
506 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
507 			return (1);
508 
509 		states = rpool->states;
510 		weight = rpool->weight;
511 
512 		if ((rpool->addr.type == PF_ADDR_TABLE &&
513 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
514 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
515 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
516 			load = ((UINT16_MAX * rpool->states) / rpool->weight);
517 		else
518 			load = states;
519 
520 		PF_ACPY(&faddr, &rpool->counter, af);
521 
522 		PF_ACPY(naddr, &rpool->counter, af);
523 		if (init_addr != NULL && PF_AZERO(init_addr, af))
524 			PF_ACPY(init_addr, naddr, af);
525 
526 		/*
527 		 * iterate *once* over whole table and find destination with
528 		 * least connection
529 		 */
530 		do  {
531 			PF_AINC(&rpool->counter, af);
532 			if (rpool->addr.type == PF_ADDR_TABLE ||
533 			    rpool->addr.type == PF_ADDR_DYNIFTL) {
534 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
535 					return (1);
536 			} else if (pf_match_addr(0, raddr, rmask,
537 			    &rpool->counter, af))
538 				return (1);
539 
540 			if ((rpool->addr.type == PF_ADDR_TABLE &&
541 			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
542 			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
543 			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
544 				cload = ((UINT16_MAX * rpool->states)
545 					/ rpool->weight);
546 			else
547 				cload = rpool->states;
548 
549 			/* find lc minimum */
550 			if (cload < load) {
551 				states = rpool->states;
552 				weight = rpool->weight;
553 				load = cload;
554 
555 				PF_ACPY(naddr, &rpool->counter, af);
556 				if (init_addr != NULL &&
557 				    PF_AZERO(init_addr, af))
558 				    PF_ACPY(init_addr, naddr, af);
559 			}
560 		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
561 		    (states > 0));
562 
563 		if (rpool->addr.type == PF_ADDR_TABLE) {
564 			if (pfr_states_increase(rpool->addr.p.tbl,
565 			    naddr, af) == -1) {
566 				if (pf_status.debug >= LOG_DEBUG) {
567 					log(LOG_DEBUG,"pf: pf_map_addr: "
568 					    "selected address ");
569 					pf_print_host(naddr, 0, af);
570 					addlog(". Failed to increase count!\n");
571 				}
572 				return (1);
573 			}
574 		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
575 			if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
576 			    naddr, af) == -1) {
577 				if (pf_status.debug >= LOG_DEBUG) {
578 					log(LOG_DEBUG, "pf: pf_map_addr: "
579 					    "selected address ");
580 					pf_print_host(naddr, 0, af);
581 					addlog(". Failed to increase count!\n");
582 				}
583 				return (1);
584 			}
585 		}
586 		break;
587 	}
588 
589 	if (rpool->opts & PF_POOL_STICKYADDR) {
590 		if (sns[type] != NULL) {
591 			pf_remove_src_node(sns[type]);
592 			sns[type] = NULL;
593 		}
594 		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
595 		    0))
596 			return (1);
597 	}
598 
599 	if (pf_status.debug >= LOG_NOTICE &&
600 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
601 		log(LOG_NOTICE, "pf: pf_map_addr: selected address ");
602 		pf_print_host(naddr, 0, af);
603 		if ((rpool->opts & PF_POOL_TYPEMASK) ==
604 		    PF_POOL_LEASTSTATES)
605 			addlog(" with state count %llu", states);
606 		if ((rpool->addr.type == PF_ADDR_TABLE &&
607 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
608 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
609 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
610 			addlog(" with weight %u", weight);
611 		addlog("\n");
612 	}
613 
614 	return (0);
615 }
616 
617 int
618 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
619     struct pf_src_node **sns, struct pf_rule **nr)
620 {
621 	struct pf_addr	naddr;
622 	u_int16_t	nport = 0;
623 
624 #ifdef INET6
625 	if (pd->af != pd->naf)
626 		return (pf_get_transaddr_af(r, pd, sns));
627 #endif /* INET6 */
628 
629 	if (r->nat.addr.type != PF_ADDR_NONE) {
630 		/* XXX is this right? what if rtable is changed at the same
631 		 * XXX time? where do I need to figure out the sport? */
632 		if (pf_get_sport(pd, r, &naddr, &nport,
633 		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
634 			DPFPRINTF(LOG_NOTICE,
635 			    "pf: NAT proxy port allocation (%u-%u) failed",
636 			    r->nat.proxy_port[0],
637 			    r->nat.proxy_port[1]);
638 			return (-1);
639 		}
640 		*nr = r;
641 		PF_ACPY(&pd->nsaddr, &naddr, pd->af);
642 		pd->nsport = nport;
643 	}
644 	if (r->rdr.addr.type != PF_ADDR_NONE) {
645 		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
646 		    &r->rdr, PF_SN_RDR))
647 			return (-1);
648 		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
649 			PF_POOLMASK(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
650 			    &pd->ndaddr, pd->af);
651 
652 			if (r->rdr.proxy_port[1]) {
653 				u_int32_t	tmp_nport;
654 
655 				tmp_nport = ((ntohs(pd->ndport) -
656 				    ntohs(r->dst.port[0])) %
657 				    (r->rdr.proxy_port[1] -
658 				    r->rdr.proxy_port[0] + 1)) +
659 				    r->rdr.proxy_port[0];
660 
661 				/* wrap around if necessary */
662 				if (tmp_nport > 65535)
663 					tmp_nport -= 65535;
664 				nport = htons((u_int16_t)tmp_nport);
665 			} else if (r->rdr.proxy_port[0])
666 				nport = htons(r->rdr.proxy_port[0]);
667 		*nr = r;
668 		PF_ACPY(&pd->ndaddr, &naddr, pd->af);
669 		if (nport)
670 			pd->ndport = nport;
671 	}
672 
673 	return (0);
674 }
675 
676 #ifdef INET6
677 int
678 pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
679     struct pf_src_node **sns)
680 {
681 	struct pf_addr	ndaddr, nsaddr, naddr;
682 	u_int16_t	nport = 0;
683 	int		prefixlen = 96;
684 
685 	if (pf_status.debug >= LOG_NOTICE) {
686 		log(LOG_NOTICE, "pf: af-to %s %s, ",
687 		    pd->naf == AF_INET ? "inet" : "inet6",
688 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
689 		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
690 		addlog(" -> ");
691 		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
692 		addlog("\n");
693 	}
694 
695 	if (r->nat.addr.type == PF_ADDR_NONE)
696 		panic("pf_get_transaddr_af: no nat pool for source address");
697 
698 	/* get source address and port */
699 	if (pf_get_sport(pd, r, &nsaddr, &nport,
700 	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
701 		DPFPRINTF(LOG_NOTICE,
702 		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
703 		    r->nat.proxy_port[0],
704 		    r->nat.proxy_port[1]);
705 		return (-1);
706 	}
707 	pd->nsport = nport;
708 
709 	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
710 		if (pd->dir == PF_IN) {
711 			NTOHS(pd->ndport);
712 			if (pd->ndport == ICMP6_ECHO_REQUEST)
713 				pd->ndport = ICMP_ECHO;
714 			else if (pd->ndport == ICMP6_ECHO_REPLY)
715 				pd->ndport = ICMP_ECHOREPLY;
716 			HTONS(pd->ndport);
717 		} else {
718 			NTOHS(pd->nsport);
719 			if (pd->nsport == ICMP6_ECHO_REQUEST)
720 				pd->nsport = ICMP_ECHO;
721 			else if (pd->nsport == ICMP6_ECHO_REPLY)
722 				pd->nsport = ICMP_ECHOREPLY;
723 			HTONS(pd->nsport);
724 		}
725 	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
726 		if (pd->dir == PF_IN) {
727 			NTOHS(pd->ndport);
728 			if (pd->ndport == ICMP_ECHO)
729 				pd->ndport = ICMP6_ECHO_REQUEST;
730 			else if (pd->ndport == ICMP_ECHOREPLY)
731 				pd->ndport = ICMP6_ECHO_REPLY;
732 			HTONS(pd->ndport);
733 		} else {
734 			NTOHS(pd->nsport);
735 			if (pd->nsport == ICMP_ECHO)
736 				pd->nsport = ICMP6_ECHO_REQUEST;
737 			else if (pd->nsport == ICMP_ECHOREPLY)
738 				pd->nsport = ICMP6_ECHO_REPLY;
739 			HTONS(pd->nsport);
740 		}
741 	}
742 
743 	/* get the destination address and port */
744 	if (r->rdr.addr.type != PF_ADDR_NONE) {
745 		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
746 		    &r->rdr, PF_SN_RDR))
747 			return (-1);
748 		if (r->rdr.proxy_port[0])
749 			pd->ndport = htons(r->rdr.proxy_port[0]);
750 
751 		if (pd->naf == AF_INET) {
752 			/* The prefix is the IPv4 rdr address */
753 			prefixlen = in_mask2len((struct in_addr *)
754 			    &r->rdr.addr.v.a.mask);
755 			inet_nat46(pd->naf, &pd->ndaddr,
756 			    &ndaddr, &naddr, prefixlen);
757 		} else {
758 			/* The prefix is the IPv6 rdr address */
759 			prefixlen =
760 			    in6_mask2len((struct in6_addr *)
761 			    &r->rdr.addr.v.a.mask, NULL);
762 			inet_nat64(pd->naf, &pd->ndaddr,
763 			    &ndaddr, &naddr, prefixlen);
764 		}
765 	} else {
766 		if (pd->naf == AF_INET) {
767 			/* The prefix is the IPv6 dst address */
768 			prefixlen =
769 			    in6_mask2len((struct in6_addr *)
770 			    &r->dst.addr.v.a.mask, NULL);
771 			if (prefixlen < 32)
772 				prefixlen = 96;
773 			inet_nat64(pd->naf, &pd->ndaddr,
774 			    &ndaddr, &pd->ndaddr, prefixlen);
775 		} else {
776 			/*
777 			 * The prefix is the IPv6 nat address
778 			 * (that was stored in pd->nsaddr)
779 			 */
780 			prefixlen = in6_mask2len((struct in6_addr *)
781 			    &r->nat.addr.v.a.mask, NULL);
782 			if (prefixlen > 96)
783 				prefixlen = 96;
784 			inet_nat64(pd->naf, &pd->ndaddr,
785 			    &ndaddr, &nsaddr, prefixlen);
786 		}
787 	}
788 
789 	PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
790 	PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
791 
792 	if (pf_status.debug >= LOG_NOTICE) {
793 		log(LOG_NOTICE, "pf: af-to %s %s done, prefixlen %d, ",
794 		    pd->naf == AF_INET ? "inet" : "inet6",
795 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
796 		    prefixlen);
797 		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
798 		addlog(" -> ");
799 		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
800 		addlog("\n");
801 	}
802 
803 	return (0);
804 }
805 #endif /* INET6 */
806 
807 int
808 pf_postprocess_addr(struct pf_state *cur)
809 {
810 	struct pf_rule		*nr;
811 	struct pf_state_key	*sks;
812 	struct pf_pool		 rpool;
813 	struct pf_addr		 lookup_addr;
814 	int			 slbcount;
815 
816 	nr = cur->natrule.ptr;
817 
818 	if (nr == NULL)
819 		return (0);
820 
821 	/* decrease counter */
822 
823 	sks = cur ? cur->key[PF_SK_STACK] : NULL;
824 
825 	/* check for outgoing or ingoing balancing */
826 	if (nr->rt == PF_ROUTETO)
827 		lookup_addr = cur->rt_addr;
828 	else if (sks != NULL)
829 		lookup_addr = sks->addr[1];
830 	else {
831 		if (pf_status.debug >= LOG_DEBUG) {
832 			log(LOG_DEBUG, "pf: pf_unlink_state: "
833 			    "unable to optain address");
834 		}
835 		return (1);
836 	}
837 
838 	/* check for appropriate pool */
839 	if (nr->rdr.addr.type != PF_ADDR_NONE)
840 		rpool = nr->rdr;
841 	else if (nr->nat.addr.type != PF_ADDR_NONE)
842 		rpool = nr->nat;
843 	else if (nr->route.addr.type != PF_ADDR_NONE)
844 		rpool = nr->route;
845 
846 	if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
847 		return (0);
848 
849 	if (rpool.addr.type == PF_ADDR_TABLE) {
850 		if ((slbcount = pfr_states_decrease(
851 		    rpool.addr.p.tbl,
852 		    &lookup_addr, sks->af)) == -1) {
853 			if (pf_status.debug >= LOG_DEBUG) {
854 				log(LOG_DEBUG, "pf: pf_unlink_state: "
855 				    "selected address ");
856 				pf_print_host(&lookup_addr,
857 				    sks->port[0], sks->af);
858 				addlog(". Failed to "
859 				    "decrease count!\n");
860 			}
861 			return (1);
862 		}
863 	} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
864 		if ((slbcount = pfr_states_decrease(
865 		    rpool.addr.p.dyn->pfid_kt,
866 		    &lookup_addr, sks->af)) == -1) {
867 			if (pf_status.debug >= LOG_DEBUG) {
868 				log(LOG_DEBUG,
869 				    "pf: pf_unlink_state: "
870 				    "selected address ");
871 				pf_print_host(&lookup_addr,
872 				    sks->port[0], sks->af);
873 				addlog(". Failed to "
874 				    "decrease count!\n");
875 			}
876 			return (1);
877 		}
878 	}
879 	if (slbcount > -1) {
880 		if (pf_status.debug >= LOG_NOTICE) {
881 			log(LOG_NOTICE,
882 			    "pf: pf_unlink_state: selected address ");
883 			pf_print_host(&lookup_addr, sks->port[0],
884 			    sks->af);
885 			addlog(" decreased state count to %u\n",
886 			    slbcount);
887 		}
888 	}
889 	return (0);
890 }
891