xref: /openbsd-src/sys/net/pf_lb.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: pf_lb.c,v 1.55 2016/07/19 12:51:19 henning Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "bpfilter.h"
39 #include "pflog.h"
40 #include "pfsync.h"
41 #include "pflow.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/filio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/pool.h>
52 #include <sys/rwlock.h>
53 #include <sys/syslog.h>
54 #include <sys/stdint.h>
55 
56 #include <crypto/siphash.h>
57 
58 #include <net/if.h>
59 #include <net/bpf.h>
60 #include <net/route.h>
61 
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/tcp.h>
66 #include <netinet/tcp_seq.h>
67 #include <netinet/udp.h>
68 #include <netinet/ip_icmp.h>
69 #include <netinet/tcp_timer.h>
70 #include <netinet/udp_var.h>
71 #include <netinet/icmp_var.h>
72 #include <netinet/if_ether.h>
73 #include <netinet/in_pcb.h>
74 
75 #include <net/pfvar.h>
76 
77 #if NPFLOG > 0
78 #include <net/if_pflog.h>
79 #endif	/* NPFLOG > 0 */
80 
81 #if NPFLOW > 0
82 #include <net/if_pflow.h>
83 #endif	/* NPFLOW > 0 */
84 
85 #if NPFSYNC > 0
86 #include <net/if_pfsync.h>
87 #endif /* NPFSYNC > 0 */
88 
89 #ifdef INET6
90 #include <netinet/ip6.h>
91 #include <netinet/icmp6.h>
92 #endif /* INET6 */
93 
94 u_int64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
95 			    struct pf_poolhashkey *, sa_family_t);
96 int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
97 			    struct pf_addr *, u_int16_t *, u_int16_t,
98 			    u_int16_t, struct pf_src_node **);
99 int			 pf_get_transaddr_af(struct pf_rule *,
100 			    struct pf_pdesc *, struct pf_src_node **);
101 int			 pf_map_addr_sticky(sa_family_t, struct pf_rule *,
102 			    struct pf_addr *, struct pf_addr *,
103 			    struct pf_src_node **, struct pf_pool *,
104 			    enum pf_sn_types);
105 
106 u_int64_t
107 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
108     struct pf_poolhashkey *key, sa_family_t af)
109 {
110 	uint64_t res = 0;
111 #ifdef INET6
112 	union {
113 		uint64_t hash64;
114 		uint32_t hash32[2];
115 	} h;
116 #endif	/* INET6 */
117 
118 	switch (af) {
119 	case AF_INET:
120 		res = SipHash24((SIPHASH_KEY *)key,
121 		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
122 		hash->addr32[0] = res;
123 		break;
124 #ifdef INET6
125 	case AF_INET6:
126 		res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0],
127 		    4 * sizeof(inaddr->addr32[0]));
128 		h.hash64 = res;
129 		hash->addr32[0] = h.hash32[0];
130 		hash->addr32[1] = h.hash32[1];
131 		/*
132 		 * siphash isn't big enough, but flipping it around is
133 		 * good enough here.
134 		 */
135 		hash->addr32[2] = ~h.hash32[1];
136 		hash->addr32[3] = ~h.hash32[0];
137 		break;
138 #endif /* INET6 */
139 	default:
140 		unhandled_af(af);
141 	}
142 	return (res);
143 }
144 
145 int
146 pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
147     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
148     struct pf_src_node **sn)
149 {
150 	struct pf_state_key_cmp	key;
151 	struct pf_addr		init_addr;
152 	u_int16_t		cut;
153 	int			dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
154 	int			sidx = pd->sidx;
155 	int			didx = pd->didx;
156 
157 	bzero(&init_addr, sizeof(init_addr));
158 	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
159 	    PF_SN_NAT))
160 		return (1);
161 
162 	if (pd->proto == IPPROTO_ICMP) {
163 		if (pd->ndport == htons(ICMP_ECHO)) {
164 			low = 1;
165 			high = 65535;
166 		} else
167 			return (0);	/* Don't try to modify non-echo ICMP */
168 	}
169 #ifdef INET6
170 	if (pd->proto == IPPROTO_ICMPV6) {
171 		if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
172 			low = 1;
173 			high = 65535;
174 		} else
175 			return (0);	/* Don't try to modify non-echo ICMP */
176 	}
177 #endif /* INET6 */
178 
179 	do {
180 		key.af = pd->naf;
181 		key.proto = pd->proto;
182 		key.rdomain = pd->rdomain;
183 		PF_ACPY(&key.addr[didx], &pd->ndaddr, key.af);
184 		PF_ACPY(&key.addr[sidx], naddr, key.af);
185 		key.port[didx] = pd->ndport;
186 
187 		/*
188 		 * port search; start random, step;
189 		 * similar 2 portloop in in_pcbbind
190 		 */
191 		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
192 		    pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
193 			/* XXX bug: icmp states dont use the id on both
194 			 * XXX sides (traceroute -I through nat) */
195 			key.port[sidx] = pd->nsport;
196 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
197 				*nport = pd->nsport;
198 				return (0);
199 			}
200 		} else if (low == 0 && high == 0) {
201 			key.port[sidx] = pd->nsport;
202 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
203 				*nport = pd->nsport;
204 				return (0);
205 			}
206 		} else if (low == high) {
207 			key.port[sidx] = htons(low);
208 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
209 				*nport = htons(low);
210 				return (0);
211 			}
212 		} else {
213 			u_int16_t tmp;
214 
215 			if (low > high) {
216 				tmp = low;
217 				low = high;
218 				high = tmp;
219 			}
220 			/* low < high */
221 			cut = arc4random_uniform(1 + high - low) + low;
222 			/* low <= cut <= high */
223 			for (tmp = cut; tmp <= high; ++(tmp)) {
224 				key.port[sidx] = htons(tmp);
225 				if (pf_find_state_all(&key, dir, NULL) ==
226 				    NULL && !in_baddynamic(tmp, pd->proto)) {
227 					*nport = htons(tmp);
228 					return (0);
229 				}
230 			}
231 			for (tmp = cut - 1; tmp >= low; --(tmp)) {
232 				key.port[sidx] = htons(tmp);
233 				if (pf_find_state_all(&key, dir, NULL) ==
234 				    NULL && !in_baddynamic(tmp, pd->proto)) {
235 					*nport = htons(tmp);
236 					return (0);
237 				}
238 			}
239 		}
240 
241 		switch (r->nat.opts & PF_POOL_TYPEMASK) {
242 		case PF_POOL_RANDOM:
243 		case PF_POOL_ROUNDROBIN:
244 		case PF_POOL_LEASTSTATES:
245 			/*
246 			 * pick a different source address since we're out
247 			 * of free port choices for the current one.
248 			 */
249 			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
250 			    &init_addr, sn, &r->nat, PF_SN_NAT))
251 				return (1);
252 			break;
253 		case PF_POOL_NONE:
254 		case PF_POOL_SRCHASH:
255 		case PF_POOL_BITMASK:
256 		default:
257 			return (1);
258 		}
259 	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
260 	return (1);					/* none available */
261 }
262 
263 int
264 pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
265     struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
266     enum pf_sn_types type)
267 {
268 	struct pf_addr		*raddr, *rmask, *cached;
269 	struct pf_state		*s;
270 	struct pf_src_node	 k;
271 	int			 valid;
272 
273 	k.af = af;
274 	k.type = type;
275 	PF_ACPY(&k.addr, saddr, af);
276 	k.rule.ptr = r;
277 	pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
278 	sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
279 	if (sns[type] == NULL)
280 		return (-1);
281 
282 	/* check if the cached entry is still valid */
283 	cached = &(sns[type])->raddr;
284 	valid = 0;
285 	if (PF_AZERO(cached, af)) {
286 		valid = 1;
287 	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
288 		if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
289 		    af, 0))
290 			valid = 1;
291 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
292 		if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
293 			valid = 1;
294 	} else if (rpool->addr.type != PF_ADDR_NOROUTE) {
295 		raddr = &rpool->addr.v.a.addr;
296 		rmask = &rpool->addr.v.a.mask;
297 		valid = pf_match_addr(0, raddr, rmask, cached, af);
298 	}
299 	if (!valid) {
300 		if (pf_status.debug >= LOG_DEBUG) {
301 			log(LOG_DEBUG, "pf: pf_map_addr: "
302 			    "stale src tracking (%u) ", type);
303 			pf_print_host(&k.addr, 0, af);
304 			addlog(" to ");
305 			pf_print_host(cached, 0, af);
306 			addlog("\n");
307 		}
308 		if (sns[type]->states != 0) {
309 			/* XXX expensive */
310 			RB_FOREACH(s, pf_state_tree_id,
311 			   &tree_id)
312 				pf_state_rm_src_node(s,
313 				    sns[type]);
314 		}
315 		sns[type]->expire = 1;
316 		pf_remove_src_node(sns[type]);
317 		sns[type] = NULL;
318 		return (-1);
319 	}
320 	if (!PF_AZERO(cached, af))
321 		PF_ACPY(naddr, cached, af);
322 	if (pf_status.debug >= LOG_DEBUG) {
323 		log(LOG_DEBUG, "pf: pf_map_addr: "
324 		    "src tracking (%u) maps ", type);
325 		pf_print_host(&k.addr, 0, af);
326 		addlog(" to ");
327 		pf_print_host(naddr, 0, af);
328 		addlog("\n");
329 	}
330 	return (0);
331 }
332 
333 int
334 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
335     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
336     struct pf_pool *rpool, enum pf_sn_types type)
337 {
338 	unsigned char		 hash[16];
339 	struct pf_addr		 faddr;
340 	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
341 	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
342 	u_int64_t		 states;
343 	u_int16_t		 weight;
344 	u_int64_t		 load;
345 	u_int64_t		 cload;
346 	u_int64_t		 hashidx;
347 	int			 cnt;
348 
349 	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
350 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
351 	    pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
352 		return (0);
353 
354 	if (rpool->addr.type == PF_ADDR_NOROUTE)
355 		return (1);
356 	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
357 		switch (af) {
358 		case AF_INET:
359 			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
360 			    !PF_POOL_DYNTYPE(rpool->opts))
361 				return (1);
362 			raddr = &rpool->addr.p.dyn->pfid_addr4;
363 			rmask = &rpool->addr.p.dyn->pfid_mask4;
364 			break;
365 #ifdef INET6
366 		case AF_INET6:
367 			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
368 			    !PF_POOL_DYNTYPE(rpool->opts))
369 				return (1);
370 			raddr = &rpool->addr.p.dyn->pfid_addr6;
371 			rmask = &rpool->addr.p.dyn->pfid_mask6;
372 			break;
373 #endif /* INET6 */
374 		default:
375 			unhandled_af(af);
376 		}
377 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
378 		if (!PF_POOL_DYNTYPE(rpool->opts))
379 			return (1); /* unsupported */
380 	} else {
381 		raddr = &rpool->addr.v.a.addr;
382 		rmask = &rpool->addr.v.a.mask;
383 	}
384 
385 	switch (rpool->opts & PF_POOL_TYPEMASK) {
386 	case PF_POOL_NONE:
387 		PF_ACPY(naddr, raddr, af);
388 		break;
389 	case PF_POOL_BITMASK:
390 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
391 		break;
392 	case PF_POOL_RANDOM:
393 		if (rpool->addr.type == PF_ADDR_TABLE) {
394 			cnt = rpool->addr.p.tbl->pfrkt_cnt;
395 			if (cnt == 0)
396 				rpool->tblidx = 0;
397 			else
398 				rpool->tblidx = (int)arc4random_uniform(cnt);
399 			memset(&rpool->counter, 0, sizeof(rpool->counter));
400 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
401 				return (1);
402 			PF_ACPY(naddr, &rpool->counter, af);
403 		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
404 			cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
405 			if (cnt == 0)
406 				rpool->tblidx = 0;
407 			else
408 				rpool->tblidx = (int)arc4random_uniform(cnt);
409 			memset(&rpool->counter, 0, sizeof(rpool->counter));
410 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
411 				return (1);
412 			PF_ACPY(naddr, &rpool->counter, af);
413 		} else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
414 			switch (af) {
415 			case AF_INET:
416 				rpool->counter.addr32[0] = arc4random();
417 				break;
418 #ifdef INET6
419 			case AF_INET6:
420 				if (rmask->addr32[3] != 0xffffffff)
421 					rpool->counter.addr32[3] = arc4random();
422 				else
423 					break;
424 				if (rmask->addr32[2] != 0xffffffff)
425 					rpool->counter.addr32[2] = arc4random();
426 				else
427 					break;
428 				if (rmask->addr32[1] != 0xffffffff)
429 					rpool->counter.addr32[1] = arc4random();
430 				else
431 					break;
432 				if (rmask->addr32[0] != 0xffffffff)
433 					rpool->counter.addr32[0] = arc4random();
434 				break;
435 #endif /* INET6 */
436 			default:
437 				unhandled_af(af);
438 			}
439 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
440 			PF_ACPY(init_addr, naddr, af);
441 
442 		} else {
443 			PF_AINC(&rpool->counter, af);
444 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
445 		}
446 		break;
447 	case PF_POOL_SRCHASH:
448 		hashidx =
449 		    pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
450 		if (rpool->addr.type == PF_ADDR_TABLE) {
451 			cnt = rpool->addr.p.tbl->pfrkt_cnt;
452 			if (cnt == 0)
453 				rpool->tblidx = 0;
454 			else
455 				rpool->tblidx = (int)(hashidx % cnt);
456 			memset(&rpool->counter, 0, sizeof(rpool->counter));
457 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
458 				return (1);
459 			PF_ACPY(naddr, &rpool->counter, af);
460 		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
461 			cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
462 			if (cnt == 0)
463 				rpool->tblidx = 0;
464 			else
465 				rpool->tblidx = (int)(hashidx % cnt);
466 			memset(&rpool->counter, 0, sizeof(rpool->counter));
467 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
468 				return (1);
469 			PF_ACPY(naddr, &rpool->counter, af);
470 		} else {
471 			PF_POOLMASK(naddr, raddr, rmask,
472 			    (struct pf_addr *)&hash, af);
473 		}
474 		break;
475 	case PF_POOL_ROUNDROBIN:
476 		if (rpool->addr.type == PF_ADDR_TABLE ||
477 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
478 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
479 				/*
480 				 * reset counter in case its value
481 				 * has been removed from the pool.
482 				 */
483 				bzero(&rpool->counter, sizeof(rpool->counter));
484 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
485 					return (1);
486 			}
487 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
488 			return (1);
489 
490 		/* iterate over table if it contains entries which are weighted */
491 		if ((rpool->addr.type == PF_ADDR_TABLE &&
492 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
493 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
494 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
495 			do {
496 				if (rpool->addr.type == PF_ADDR_TABLE ||
497 				    rpool->addr.type == PF_ADDR_DYNIFTL) {
498 					if (pfr_pool_get(rpool,
499 					    &raddr, &rmask, af))
500 						return (1);
501 				} else {
502 					log(LOG_ERR, "pf: pf_map_addr: "
503 					    "weighted RR failure");
504 					return (1);
505 				}
506 				if (rpool->weight >= rpool->curweight)
507 					break;
508 				PF_AINC(&rpool->counter, af);
509 			} while (1);
510 
511 			weight = rpool->weight;
512 		}
513 
514 		PF_ACPY(naddr, &rpool->counter, af);
515 		if (init_addr != NULL && PF_AZERO(init_addr, af))
516 			PF_ACPY(init_addr, naddr, af);
517 		PF_AINC(&rpool->counter, af);
518 		break;
519 	case PF_POOL_LEASTSTATES:
520 		/* retrieve an address first */
521 		if (rpool->addr.type == PF_ADDR_TABLE ||
522 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
523 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
524 				/* see PF_POOL_ROUNDROBIN */
525 				bzero(&rpool->counter, sizeof(rpool->counter));
526 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
527 					return (1);
528 			}
529 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
530 			return (1);
531 
532 		states = rpool->states;
533 		weight = rpool->weight;
534 
535 		if ((rpool->addr.type == PF_ADDR_TABLE &&
536 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
537 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
538 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
539 			load = ((UINT16_MAX * rpool->states) / rpool->weight);
540 		else
541 			load = states;
542 
543 		PF_ACPY(&faddr, &rpool->counter, af);
544 
545 		PF_ACPY(naddr, &rpool->counter, af);
546 		if (init_addr != NULL && PF_AZERO(init_addr, af))
547 			PF_ACPY(init_addr, naddr, af);
548 
549 		/*
550 		 * iterate *once* over whole table and find destination with
551 		 * least connection
552 		 */
553 		do  {
554 			PF_AINC(&rpool->counter, af);
555 			if (rpool->addr.type == PF_ADDR_TABLE ||
556 			    rpool->addr.type == PF_ADDR_DYNIFTL) {
557 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
558 					return (1);
559 			} else if (pf_match_addr(0, raddr, rmask,
560 			    &rpool->counter, af))
561 				return (1);
562 
563 			if ((rpool->addr.type == PF_ADDR_TABLE &&
564 			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
565 			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
566 			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
567 				cload = ((UINT16_MAX * rpool->states)
568 					/ rpool->weight);
569 			else
570 				cload = rpool->states;
571 
572 			/* find lc minimum */
573 			if (cload < load) {
574 				states = rpool->states;
575 				weight = rpool->weight;
576 				load = cload;
577 
578 				PF_ACPY(naddr, &rpool->counter, af);
579 				if (init_addr != NULL &&
580 				    PF_AZERO(init_addr, af))
581 				    PF_ACPY(init_addr, naddr, af);
582 			}
583 		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
584 		    (states > 0));
585 
586 		if (rpool->addr.type == PF_ADDR_TABLE) {
587 			if (pfr_states_increase(rpool->addr.p.tbl,
588 			    naddr, af) == -1) {
589 				if (pf_status.debug >= LOG_DEBUG) {
590 					log(LOG_DEBUG,"pf: pf_map_addr: "
591 					    "selected address ");
592 					pf_print_host(naddr, 0, af);
593 					addlog(". Failed to increase count!\n");
594 				}
595 				return (1);
596 			}
597 		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
598 			if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
599 			    naddr, af) == -1) {
600 				if (pf_status.debug >= LOG_DEBUG) {
601 					log(LOG_DEBUG, "pf: pf_map_addr: "
602 					    "selected address ");
603 					pf_print_host(naddr, 0, af);
604 					addlog(". Failed to increase count!\n");
605 				}
606 				return (1);
607 			}
608 		}
609 		break;
610 	}
611 
612 	if (rpool->opts & PF_POOL_STICKYADDR) {
613 		if (sns[type] != NULL) {
614 			pf_remove_src_node(sns[type]);
615 			sns[type] = NULL;
616 		}
617 		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr))
618 			return (1);
619 	}
620 
621 	if (pf_status.debug >= LOG_NOTICE &&
622 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
623 		log(LOG_NOTICE, "pf: pf_map_addr: selected address ");
624 		pf_print_host(naddr, 0, af);
625 		if ((rpool->opts & PF_POOL_TYPEMASK) ==
626 		    PF_POOL_LEASTSTATES)
627 			addlog(" with state count %llu", states);
628 		if ((rpool->addr.type == PF_ADDR_TABLE &&
629 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
630 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
631 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
632 			addlog(" with weight %u", weight);
633 		addlog("\n");
634 	}
635 
636 	return (0);
637 }
638 
639 int
640 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
641     struct pf_src_node **sns, struct pf_rule **nr)
642 {
643 	struct pf_addr	naddr;
644 	u_int16_t	nport;
645 
646 #ifdef INET6
647 	if (pd->af != pd->naf)
648 		return (pf_get_transaddr_af(r, pd, sns));
649 #endif /* INET6 */
650 
651 	if (r->nat.addr.type != PF_ADDR_NONE) {
652 		/* XXX is this right? what if rtable is changed at the same
653 		 * XXX time? where do I need to figure out the sport? */
654 		nport = 0;
655 		if (pf_get_sport(pd, r, &naddr, &nport,
656 		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
657 			DPFPRINTF(LOG_NOTICE,
658 			    "pf: NAT proxy port allocation (%u-%u) failed",
659 			    r->nat.proxy_port[0],
660 			    r->nat.proxy_port[1]);
661 			return (-1);
662 		}
663 		*nr = r;
664 		PF_ACPY(&pd->nsaddr, &naddr, pd->af);
665 		pd->nsport = nport;
666 	}
667 	if (r->rdr.addr.type != PF_ADDR_NONE) {
668 		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
669 		    &r->rdr, PF_SN_RDR))
670 			return (-1);
671 		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
672 			PF_POOLMASK(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
673 			    &pd->ndaddr, pd->af);
674 
675 		nport = 0;
676 		if (r->rdr.proxy_port[1]) {
677 			u_int32_t	tmp_nport;
678 
679 			tmp_nport = ((ntohs(pd->ndport) -
680 			    ntohs(r->dst.port[0])) %
681 			    (r->rdr.proxy_port[1] -
682 			    r->rdr.proxy_port[0] + 1)) +
683 			    r->rdr.proxy_port[0];
684 
685 			/* wrap around if necessary */
686 			if (tmp_nport > 65535)
687 				tmp_nport -= 65535;
688 			nport = htons((u_int16_t)tmp_nport);
689 		} else if (r->rdr.proxy_port[0])
690 			nport = htons(r->rdr.proxy_port[0]);
691 		*nr = r;
692 		PF_ACPY(&pd->ndaddr, &naddr, pd->af);
693 		if (nport)
694 			pd->ndport = nport;
695 	}
696 
697 	return (0);
698 }
699 
700 #ifdef INET6
701 int
702 pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
703     struct pf_src_node **sns)
704 {
705 	struct pf_addr	ndaddr, nsaddr, naddr;
706 	u_int16_t	nport;
707 	int		prefixlen = 96;
708 
709 	if (pf_status.debug >= LOG_NOTICE) {
710 		log(LOG_NOTICE, "pf: af-to %s %s, ",
711 		    pd->naf == AF_INET ? "inet" : "inet6",
712 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
713 		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
714 		addlog(" -> ");
715 		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
716 		addlog("\n");
717 	}
718 
719 	if (r->nat.addr.type == PF_ADDR_NONE)
720 		panic("pf_get_transaddr_af: no nat pool for source address");
721 
722 	/* get source address and port */
723 	nport = 0;
724 	if (pf_get_sport(pd, r, &nsaddr, &nport,
725 	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
726 		DPFPRINTF(LOG_NOTICE,
727 		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
728 		    r->nat.proxy_port[0],
729 		    r->nat.proxy_port[1]);
730 		return (-1);
731 	}
732 	pd->nsport = nport;
733 
734 	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
735 		if (pd->dir == PF_IN) {
736 			pd->ndport = ntohs(pd->ndport);
737 			if (pd->ndport == ICMP6_ECHO_REQUEST)
738 				pd->ndport = ICMP_ECHO;
739 			else if (pd->ndport == ICMP6_ECHO_REPLY)
740 				pd->ndport = ICMP_ECHOREPLY;
741 			pd->ndport = htons(pd->ndport);
742 		} else {
743 			pd->nsport = ntohs(pd->nsport);
744 			if (pd->nsport == ICMP6_ECHO_REQUEST)
745 				pd->nsport = ICMP_ECHO;
746 			else if (pd->nsport == ICMP6_ECHO_REPLY)
747 				pd->nsport = ICMP_ECHOREPLY;
748 			pd->nsport = htons(pd->nsport);
749 		}
750 	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
751 		if (pd->dir == PF_IN) {
752 			pd->ndport = ntohs(pd->ndport);
753 			if (pd->ndport == ICMP_ECHO)
754 				pd->ndport = ICMP6_ECHO_REQUEST;
755 			else if (pd->ndport == ICMP_ECHOREPLY)
756 				pd->ndport = ICMP6_ECHO_REPLY;
757 			pd->ndport = htons(pd->ndport);
758 		} else {
759 			pd->nsport = ntohs(pd->nsport);
760 			if (pd->nsport == ICMP_ECHO)
761 				pd->nsport = ICMP6_ECHO_REQUEST;
762 			else if (pd->nsport == ICMP_ECHOREPLY)
763 				pd->nsport = ICMP6_ECHO_REPLY;
764 			pd->nsport = htons(pd->nsport);
765 		}
766 	}
767 
768 	/* get the destination address and port */
769 	if (r->rdr.addr.type != PF_ADDR_NONE) {
770 		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
771 		    &r->rdr, PF_SN_RDR))
772 			return (-1);
773 		if (r->rdr.proxy_port[0])
774 			pd->ndport = htons(r->rdr.proxy_port[0]);
775 
776 		if (pd->naf == AF_INET) {
777 			/* The prefix is the IPv4 rdr address */
778 			prefixlen = in_mask2len((struct in_addr *)
779 			    &r->rdr.addr.v.a.mask);
780 			inet_nat46(pd->naf, &pd->ndaddr,
781 			    &ndaddr, &naddr, prefixlen);
782 		} else {
783 			/* The prefix is the IPv6 rdr address */
784 			prefixlen =
785 			    in6_mask2len((struct in6_addr *)
786 			    &r->rdr.addr.v.a.mask, NULL);
787 			inet_nat64(pd->naf, &pd->ndaddr,
788 			    &ndaddr, &naddr, prefixlen);
789 		}
790 	} else {
791 		if (pd->naf == AF_INET) {
792 			/* The prefix is the IPv6 dst address */
793 			prefixlen =
794 			    in6_mask2len((struct in6_addr *)
795 			    &r->dst.addr.v.a.mask, NULL);
796 			if (prefixlen < 32)
797 				prefixlen = 96;
798 			inet_nat64(pd->naf, &pd->ndaddr,
799 			    &ndaddr, &pd->ndaddr, prefixlen);
800 		} else {
801 			/*
802 			 * The prefix is the IPv6 nat address
803 			 * (that was stored in pd->nsaddr)
804 			 */
805 			prefixlen = in6_mask2len((struct in6_addr *)
806 			    &r->nat.addr.v.a.mask, NULL);
807 			if (prefixlen > 96)
808 				prefixlen = 96;
809 			inet_nat64(pd->naf, &pd->ndaddr,
810 			    &ndaddr, &nsaddr, prefixlen);
811 		}
812 	}
813 
814 	PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
815 	PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
816 
817 	if (pf_status.debug >= LOG_NOTICE) {
818 		log(LOG_NOTICE, "pf: af-to %s %s done, prefixlen %d, ",
819 		    pd->naf == AF_INET ? "inet" : "inet6",
820 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
821 		    prefixlen);
822 		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
823 		addlog(" -> ");
824 		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
825 		addlog("\n");
826 	}
827 
828 	return (0);
829 }
830 #endif /* INET6 */
831 
832 int
833 pf_postprocess_addr(struct pf_state *cur)
834 {
835 	struct pf_rule		*nr;
836 	struct pf_state_key	*sks;
837 	struct pf_pool		 rpool;
838 	struct pf_addr		 lookup_addr;
839 	int			 slbcount = -1;
840 
841 	nr = cur->natrule.ptr;
842 
843 	if (nr == NULL)
844 		return (0);
845 
846 	/* decrease counter */
847 
848 	sks = cur ? cur->key[PF_SK_STACK] : NULL;
849 
850 	/* check for outgoing or ingoing balancing */
851 	if (nr->rt == PF_ROUTETO)
852 		lookup_addr = cur->rt_addr;
853 	else if (sks != NULL)
854 		lookup_addr = sks->addr[1];
855 	else {
856 		if (pf_status.debug >= LOG_DEBUG) {
857 			log(LOG_DEBUG, "pf: %s: unable to obtain address",
858 			    __func__);
859 		}
860 		return (1);
861 	}
862 
863 	/* check for appropriate pool */
864 	if (nr->rdr.addr.type != PF_ADDR_NONE)
865 		rpool = nr->rdr;
866 	else if (nr->nat.addr.type != PF_ADDR_NONE)
867 		rpool = nr->nat;
868 	else if (nr->route.addr.type != PF_ADDR_NONE)
869 		rpool = nr->route;
870 	else
871 		return (0);
872 
873 	if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
874 		return (0);
875 
876 	if (rpool.addr.type == PF_ADDR_TABLE) {
877 		if ((slbcount = pfr_states_decrease(
878 		    rpool.addr.p.tbl,
879 		    &lookup_addr, sks->af)) == -1) {
880 			if (pf_status.debug >= LOG_DEBUG) {
881 				log(LOG_DEBUG, "pf: %s: selected address ",
882 				    __func__);
883 				pf_print_host(&lookup_addr,
884 				    sks->port[0], sks->af);
885 				addlog(". Failed to "
886 				    "decrease count!\n");
887 			}
888 			return (1);
889 		}
890 	} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
891 		if ((slbcount = pfr_states_decrease(
892 		    rpool.addr.p.dyn->pfid_kt,
893 		    &lookup_addr, sks->af)) == -1) {
894 			if (pf_status.debug >= LOG_DEBUG) {
895 				log(LOG_DEBUG, "pf: %s: selected address ",
896 				    __func__);
897 				pf_print_host(&lookup_addr,
898 				    sks->port[0], sks->af);
899 				addlog(". Failed to "
900 				    "decrease count!\n");
901 			}
902 			return (1);
903 		}
904 	}
905 	if (slbcount > -1) {
906 		if (pf_status.debug >= LOG_NOTICE) {
907 			log(LOG_NOTICE, "pf: %s: selected address ", __func__);
908 			pf_print_host(&lookup_addr, sks->port[0],
909 			    sks->af);
910 			addlog(" decreased state count to %u\n",
911 			    slbcount);
912 		}
913 	}
914 	return (0);
915 }
916