1 /* $OpenBSD: pf_lb.c,v 1.73 2023/01/04 10:31:55 dlg Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "pflog.h" 40 #include "pfsync.h" 41 #include "pflow.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/filio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/kernel.h> 50 #include <sys/time.h> 51 #include <sys/pool.h> 52 #include <sys/rwlock.h> 53 #include <sys/syslog.h> 54 #include <sys/stdint.h> 55 56 #include <crypto/siphash.h> 57 58 #include <net/if.h> 59 #include <net/bpf.h> 60 #include <net/route.h> 61 62 #include <netinet/in.h> 63 #include <netinet/ip.h> 64 #include <netinet/in_pcb.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/ip_icmp.h> 67 #include <netinet/icmp_var.h> 68 #include <netinet/tcp.h> 69 #include <netinet/tcp_seq.h> 70 #include <netinet/tcp_timer.h> 71 #include <netinet/udp.h> 72 #include <netinet/udp_var.h> 73 #include <netinet/if_ether.h> 74 75 #ifdef INET6 76 #include <netinet/ip6.h> 77 #include <netinet/icmp6.h> 78 #endif /* INET6 */ 79 80 #include <net/pfvar.h> 81 #include <net/pfvar_priv.h> 82 83 #if NPFLOG > 0 84 #include <net/if_pflog.h> 85 #endif /* NPFLOG > 0 */ 86 87 #if NPFLOW > 0 88 #include <net/if_pflow.h> 89 #endif /* NPFLOW > 0 */ 90 91 #if NPFSYNC > 0 92 #include <net/if_pfsync.h> 93 #endif /* NPFSYNC > 0 */ 94 95 u_int64_t pf_hash(struct pf_addr *, struct pf_addr *, 96 struct pf_poolhashkey *, sa_family_t); 97 int pf_get_sport(struct pf_pdesc *, struct pf_rule *, 98 struct pf_addr *, u_int16_t *, u_int16_t, 99 u_int16_t, struct pf_src_node **); 100 int pf_map_addr_states_increase(sa_family_t, 101 struct pf_pool *, struct pf_addr *); 102 int pf_get_transaddr_af(struct pf_rule *, 103 struct pf_pdesc *, struct pf_src_node **); 104 int pf_map_addr_sticky(sa_family_t, struct pf_rule *, 105 struct pf_addr *, struct pf_addr *, 106 struct pf_src_node **, struct pf_pool *, 107 enum pf_sn_types); 108 109 u_int64_t 110 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 111 struct pf_poolhashkey *key, sa_family_t af) 112 { 113 uint64_t res = 0; 114 #ifdef INET6 115 union { 116 uint64_t hash64; 117 uint32_t hash32[2]; 118 } h; 119 #endif /* INET6 */ 120 121 switch (af) { 122 case AF_INET: 123 res = SipHash24((SIPHASH_KEY *)key, 124 &inaddr->addr32[0], sizeof(inaddr->addr32[0])); 125 hash->addr32[0] = res; 126 break; 127 #ifdef INET6 128 case AF_INET6: 129 res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0], 130 4 * sizeof(inaddr->addr32[0])); 131 h.hash64 = res; 132 hash->addr32[0] = h.hash32[0]; 133 hash->addr32[1] = h.hash32[1]; 134 /* 135 * siphash isn't big enough, but flipping it around is 136 * good enough here. 137 */ 138 hash->addr32[2] = ~h.hash32[1]; 139 hash->addr32[3] = ~h.hash32[0]; 140 break; 141 #endif /* INET6 */ 142 default: 143 unhandled_af(af); 144 } 145 return (res); 146 } 147 148 int 149 pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r, 150 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, 151 struct pf_src_node **sn) 152 { 153 struct pf_state_key_cmp key; 154 struct pf_addr init_addr; 155 u_int16_t cut; 156 int dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN; 157 int sidx = pd->sidx; 158 int didx = pd->didx; 159 160 memset(&init_addr, 0, sizeof(init_addr)); 161 if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat, 162 PF_SN_NAT)) 163 return (1); 164 165 if (pd->proto == IPPROTO_ICMP) { 166 if (pd->ndport == htons(ICMP_ECHO)) { 167 low = 1; 168 high = 65535; 169 } else 170 return (0); /* Don't try to modify non-echo ICMP */ 171 } 172 #ifdef INET6 173 if (pd->proto == IPPROTO_ICMPV6) { 174 if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) { 175 low = 1; 176 high = 65535; 177 } else 178 return (0); /* Don't try to modify non-echo ICMP */ 179 } 180 #endif /* INET6 */ 181 182 do { 183 key.af = pd->naf; 184 key.proto = pd->proto; 185 key.rdomain = pd->rdomain; 186 pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af); 187 pf_addrcpy(&key.addr[sidx], naddr, key.af); 188 key.port[didx] = pd->ndport; 189 190 /* 191 * port search; start random, step; 192 * similar 2 portloop in in_pcbbind 193 */ 194 if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP || 195 pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) { 196 /* XXX bug: icmp states dont use the id on both 197 * XXX sides (traceroute -I through nat) */ 198 key.port[sidx] = pd->nsport; 199 if (pf_find_state_all(&key, dir, NULL) == NULL) { 200 *nport = pd->nsport; 201 return (0); 202 } 203 } else if (low == 0 && high == 0) { 204 key.port[sidx] = pd->nsport; 205 if (pf_find_state_all(&key, dir, NULL) == NULL) { 206 *nport = pd->nsport; 207 return (0); 208 } 209 } else if (low == high) { 210 key.port[sidx] = htons(low); 211 if (pf_find_state_all(&key, dir, NULL) == NULL) { 212 *nport = htons(low); 213 return (0); 214 } 215 } else { 216 u_int32_t tmp; 217 218 if (low > high) { 219 tmp = low; 220 low = high; 221 high = tmp; 222 } 223 /* low < high */ 224 cut = arc4random_uniform(1 + high - low) + low; 225 /* low <= cut <= high */ 226 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) { 227 key.port[sidx] = htons(tmp); 228 if (pf_find_state_all(&key, dir, NULL) == 229 NULL && !in_baddynamic(tmp, pd->proto)) { 230 *nport = htons(tmp); 231 return (0); 232 } 233 } 234 tmp = cut; 235 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) { 236 key.port[sidx] = htons(tmp); 237 if (pf_find_state_all(&key, dir, NULL) == 238 NULL && !in_baddynamic(tmp, pd->proto)) { 239 *nport = htons(tmp); 240 return (0); 241 } 242 } 243 } 244 245 switch (r->nat.opts & PF_POOL_TYPEMASK) { 246 case PF_POOL_RANDOM: 247 case PF_POOL_ROUNDROBIN: 248 case PF_POOL_LEASTSTATES: 249 /* 250 * pick a different source address since we're out 251 * of free port choices for the current one. 252 */ 253 if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, 254 &init_addr, sn, &r->nat, PF_SN_NAT)) 255 return (1); 256 break; 257 case PF_POOL_NONE: 258 case PF_POOL_SRCHASH: 259 case PF_POOL_BITMASK: 260 default: 261 return (1); 262 } 263 } while (! PF_AEQ(&init_addr, naddr, pd->naf) ); 264 return (1); /* none available */ 265 } 266 267 int 268 pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 269 struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool, 270 enum pf_sn_types type) 271 { 272 struct pf_addr *raddr, *rmask, *cached; 273 struct pf_state *s; 274 struct pf_src_node k; 275 int valid; 276 277 k.af = af; 278 k.type = type; 279 pf_addrcpy(&k.addr, saddr, af); 280 k.rule.ptr = r; 281 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 282 sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 283 if (sns[type] == NULL) 284 return (-1); 285 286 /* check if the cached entry is still valid */ 287 cached = &(sns[type])->raddr; 288 valid = 0; 289 if (PF_AZERO(cached, af)) { 290 valid = 1; 291 } else if (rpool->addr.type == PF_ADDR_DYNIFTL) { 292 if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached, 293 af, 0)) 294 valid = 1; 295 } else if (rpool->addr.type == PF_ADDR_TABLE) { 296 if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0)) 297 valid = 1; 298 } else if (rpool->addr.type != PF_ADDR_NOROUTE) { 299 raddr = &rpool->addr.v.a.addr; 300 rmask = &rpool->addr.v.a.mask; 301 valid = pf_match_addr(0, raddr, rmask, cached, af); 302 } 303 if (!valid) { 304 if (pf_status.debug >= LOG_DEBUG) { 305 log(LOG_DEBUG, "pf: pf_map_addr: " 306 "stale src tracking (%u) ", type); 307 pf_print_host(&k.addr, 0, af); 308 addlog(" to "); 309 pf_print_host(cached, 0, af); 310 addlog("\n"); 311 } 312 if (sns[type]->states != 0) { 313 /* XXX expensive */ 314 RBT_FOREACH(s, pf_state_tree_id, &tree_id) 315 pf_state_rm_src_node(s, sns[type]); 316 } 317 sns[type]->expire = 1; 318 pf_remove_src_node(sns[type]); 319 sns[type] = NULL; 320 return (-1); 321 } 322 323 324 if (!PF_AZERO(cached, af)) { 325 pf_addrcpy(naddr, cached, af); 326 if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES && 327 pf_map_addr_states_increase(af, rpool, cached) == -1) 328 return (-1); 329 } 330 if (pf_status.debug >= LOG_DEBUG) { 331 log(LOG_DEBUG, "pf: pf_map_addr: " 332 "src tracking (%u) maps ", type); 333 pf_print_host(&k.addr, 0, af); 334 addlog(" to "); 335 pf_print_host(naddr, 0, af); 336 addlog("\n"); 337 } 338 339 if (sns[type]->kif != NULL) 340 rpool->kif = sns[type]->kif; 341 342 return (0); 343 } 344 345 uint32_t 346 pf_rand_addr(uint32_t mask) 347 { 348 uint32_t addr; 349 350 mask = ~ntohl(mask); 351 addr = arc4random_uniform(mask + 1); 352 353 return (htonl(addr)); 354 } 355 356 int 357 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 358 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns, 359 struct pf_pool *rpool, enum pf_sn_types type) 360 { 361 struct pf_addr hash; 362 struct pf_addr faddr; 363 struct pf_addr *raddr = &rpool->addr.v.a.addr; 364 struct pf_addr *rmask = &rpool->addr.v.a.mask; 365 struct pfr_ktable *kt; 366 struct pfi_kif *kif; 367 u_int64_t states; 368 u_int16_t weight; 369 u_int64_t load; 370 u_int64_t cload; 371 u_int64_t hashidx; 372 int cnt; 373 374 if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR && 375 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE && 376 pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0) 377 return (0); 378 379 if (rpool->addr.type == PF_ADDR_NOROUTE) 380 return (1); 381 if (rpool->addr.type == PF_ADDR_DYNIFTL) { 382 switch (af) { 383 case AF_INET: 384 if (rpool->addr.p.dyn->pfid_acnt4 < 1 && 385 !PF_POOL_DYNTYPE(rpool->opts)) 386 return (1); 387 raddr = &rpool->addr.p.dyn->pfid_addr4; 388 rmask = &rpool->addr.p.dyn->pfid_mask4; 389 break; 390 #ifdef INET6 391 case AF_INET6: 392 if (rpool->addr.p.dyn->pfid_acnt6 < 1 && 393 !PF_POOL_DYNTYPE(rpool->opts)) 394 return (1); 395 raddr = &rpool->addr.p.dyn->pfid_addr6; 396 rmask = &rpool->addr.p.dyn->pfid_mask6; 397 break; 398 #endif /* INET6 */ 399 default: 400 unhandled_af(af); 401 } 402 } else if (rpool->addr.type == PF_ADDR_TABLE) { 403 if (!PF_POOL_DYNTYPE(rpool->opts)) 404 return (1); /* unsupported */ 405 } else { 406 raddr = &rpool->addr.v.a.addr; 407 rmask = &rpool->addr.v.a.mask; 408 } 409 410 switch (rpool->opts & PF_POOL_TYPEMASK) { 411 case PF_POOL_NONE: 412 pf_addrcpy(naddr, raddr, af); 413 break; 414 case PF_POOL_BITMASK: 415 pf_poolmask(naddr, raddr, rmask, saddr, af); 416 break; 417 case PF_POOL_RANDOM: 418 if (rpool->addr.type == PF_ADDR_TABLE || 419 rpool->addr.type == PF_ADDR_DYNIFTL) { 420 if (rpool->addr.type == PF_ADDR_TABLE) 421 kt = rpool->addr.p.tbl; 422 else 423 kt = rpool->addr.p.dyn->pfid_kt; 424 kt = pfr_ktable_select_active(kt); 425 if (kt == NULL) 426 return (1); 427 428 cnt = kt->pfrkt_cnt; 429 if (cnt == 0) 430 rpool->tblidx = 0; 431 else 432 rpool->tblidx = (int)arc4random_uniform(cnt); 433 memset(&rpool->counter, 0, sizeof(rpool->counter)); 434 if (pfr_pool_get(rpool, &raddr, &rmask, af)) 435 return (1); 436 pf_addrcpy(naddr, &rpool->counter, af); 437 } else if (init_addr != NULL && PF_AZERO(init_addr, af)) { 438 switch (af) { 439 case AF_INET: 440 rpool->counter.addr32[0] = pf_rand_addr( 441 rmask->addr32[0]); 442 break; 443 #ifdef INET6 444 case AF_INET6: 445 if (rmask->addr32[3] != 0xffffffff) 446 rpool->counter.addr32[3] = pf_rand_addr( 447 rmask->addr32[3]); 448 else 449 break; 450 if (rmask->addr32[2] != 0xffffffff) 451 rpool->counter.addr32[2] = pf_rand_addr( 452 rmask->addr32[2]); 453 else 454 break; 455 if (rmask->addr32[1] != 0xffffffff) 456 rpool->counter.addr32[1] = pf_rand_addr( 457 rmask->addr32[1]); 458 else 459 break; 460 if (rmask->addr32[0] != 0xffffffff) 461 rpool->counter.addr32[0] = pf_rand_addr( 462 rmask->addr32[0]); 463 break; 464 #endif /* INET6 */ 465 default: 466 unhandled_af(af); 467 } 468 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); 469 pf_addrcpy(init_addr, naddr, af); 470 471 } else { 472 pf_addr_inc(&rpool->counter, af); 473 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); 474 } 475 break; 476 case PF_POOL_SRCHASH: 477 hashidx = pf_hash(saddr, &hash, &rpool->key, af); 478 479 if (rpool->addr.type == PF_ADDR_TABLE || 480 rpool->addr.type == PF_ADDR_DYNIFTL) { 481 if (rpool->addr.type == PF_ADDR_TABLE) 482 kt = rpool->addr.p.tbl; 483 else 484 kt = rpool->addr.p.dyn->pfid_kt; 485 kt = pfr_ktable_select_active(kt); 486 if (kt == NULL) 487 return (1); 488 489 cnt = kt->pfrkt_cnt; 490 if (cnt == 0) 491 rpool->tblidx = 0; 492 else 493 rpool->tblidx = (int)(hashidx % cnt); 494 memset(&rpool->counter, 0, sizeof(rpool->counter)); 495 if (pfr_pool_get(rpool, &raddr, &rmask, af)) 496 return (1); 497 pf_addrcpy(naddr, &rpool->counter, af); 498 } else { 499 pf_poolmask(naddr, raddr, rmask, &hash, af); 500 } 501 break; 502 case PF_POOL_ROUNDROBIN: 503 if (rpool->addr.type == PF_ADDR_TABLE || 504 rpool->addr.type == PF_ADDR_DYNIFTL) { 505 if (pfr_pool_get(rpool, &raddr, &rmask, af)) { 506 /* 507 * reset counter in case its value 508 * has been removed from the pool. 509 */ 510 memset(&rpool->counter, 0, 511 sizeof(rpool->counter)); 512 if (pfr_pool_get(rpool, &raddr, &rmask, af)) 513 return (1); 514 } 515 } else if (PF_AZERO(&rpool->counter, af)) { 516 /* 517 * fall back to POOL_NONE if there is a single host 518 * address in pool. 519 */ 520 if (af == AF_INET && 521 rmask->addr32[0] == INADDR_BROADCAST) { 522 pf_addrcpy(naddr, raddr, af); 523 break; 524 } 525 #ifdef INET6 526 if (af == AF_INET6 && 527 IN6_ARE_ADDR_EQUAL(&rmask->v6, &in6mask128)) { 528 pf_addrcpy(naddr, raddr, af); 529 break; 530 } 531 #endif 532 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 533 return (1); 534 535 /* iterate over table if it contains entries which are weighted */ 536 if ((rpool->addr.type == PF_ADDR_TABLE && 537 rpool->addr.p.tbl->pfrkt_refcntcost > 0) || 538 (rpool->addr.type == PF_ADDR_DYNIFTL && 539 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) { 540 do { 541 if (rpool->addr.type == PF_ADDR_TABLE || 542 rpool->addr.type == PF_ADDR_DYNIFTL) { 543 if (pfr_pool_get(rpool, 544 &raddr, &rmask, af)) 545 return (1); 546 } else { 547 log(LOG_ERR, "pf: pf_map_addr: " 548 "weighted RR failure"); 549 return (1); 550 } 551 if (rpool->weight >= rpool->curweight) 552 break; 553 pf_addr_inc(&rpool->counter, af); 554 } while (1); 555 556 weight = rpool->weight; 557 } 558 559 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); 560 if (init_addr != NULL && PF_AZERO(init_addr, af)) 561 pf_addrcpy(init_addr, &rpool->counter, af); 562 pf_addr_inc(&rpool->counter, af); 563 break; 564 case PF_POOL_LEASTSTATES: 565 /* retrieve an address first */ 566 if (rpool->addr.type == PF_ADDR_TABLE || 567 rpool->addr.type == PF_ADDR_DYNIFTL) { 568 if (pfr_pool_get(rpool, &raddr, &rmask, af)) { 569 /* see PF_POOL_ROUNDROBIN */ 570 memset(&rpool->counter, 0, 571 sizeof(rpool->counter)); 572 if (pfr_pool_get(rpool, &raddr, &rmask, af)) 573 return (1); 574 } 575 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 576 return (1); 577 578 states = rpool->states; 579 weight = rpool->weight; 580 kif = rpool->kif; 581 582 if ((rpool->addr.type == PF_ADDR_TABLE && 583 rpool->addr.p.tbl->pfrkt_refcntcost > 0) || 584 (rpool->addr.type == PF_ADDR_DYNIFTL && 585 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) 586 load = ((UINT16_MAX * rpool->states) / rpool->weight); 587 else 588 load = states; 589 590 pf_addrcpy(&faddr, &rpool->counter, af); 591 592 pf_addrcpy(naddr, &rpool->counter, af); 593 if (init_addr != NULL && PF_AZERO(init_addr, af)) 594 pf_addrcpy(init_addr, naddr, af); 595 596 /* 597 * iterate *once* over whole table and find destination with 598 * least connection 599 */ 600 do { 601 pf_addr_inc(&rpool->counter, af); 602 if (rpool->addr.type == PF_ADDR_TABLE || 603 rpool->addr.type == PF_ADDR_DYNIFTL) { 604 if (pfr_pool_get(rpool, &raddr, &rmask, af)) 605 return (1); 606 } else if (pf_match_addr(0, raddr, rmask, 607 &rpool->counter, af)) 608 return (1); 609 610 if ((rpool->addr.type == PF_ADDR_TABLE && 611 rpool->addr.p.tbl->pfrkt_refcntcost > 0) || 612 (rpool->addr.type == PF_ADDR_DYNIFTL && 613 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) 614 cload = ((UINT16_MAX * rpool->states) 615 / rpool->weight); 616 else 617 cload = rpool->states; 618 619 /* find lc minimum */ 620 if (cload < load) { 621 states = rpool->states; 622 weight = rpool->weight; 623 kif = rpool->kif; 624 load = cload; 625 626 pf_addrcpy(naddr, &rpool->counter, af); 627 if (init_addr != NULL && 628 PF_AZERO(init_addr, af)) 629 pf_addrcpy(init_addr, naddr, af); 630 } 631 } while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) && 632 (states > 0)); 633 634 if (pf_map_addr_states_increase(af, rpool, naddr) == -1) 635 return (1); 636 /* revert the kif which was set by pfr_pool_get() */ 637 rpool->kif = kif; 638 break; 639 } 640 641 if (rpool->opts & PF_POOL_STICKYADDR) { 642 if (sns[type] != NULL) { 643 pf_remove_src_node(sns[type]); 644 sns[type] = NULL; 645 } 646 if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr, 647 rpool->kif)) 648 return (1); 649 } 650 651 if (pf_status.debug >= LOG_INFO && 652 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 653 log(LOG_INFO, "pf: pf_map_addr: selected address "); 654 pf_print_host(naddr, 0, af); 655 if ((rpool->opts & PF_POOL_TYPEMASK) == 656 PF_POOL_LEASTSTATES) 657 addlog(" with state count %llu", states); 658 if ((rpool->addr.type == PF_ADDR_TABLE && 659 rpool->addr.p.tbl->pfrkt_refcntcost > 0) || 660 (rpool->addr.type == PF_ADDR_DYNIFTL && 661 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) 662 addlog(" with weight %u", weight); 663 addlog("\n"); 664 } 665 666 return (0); 667 } 668 669 int 670 pf_map_addr_states_increase(sa_family_t af, struct pf_pool *rpool, 671 struct pf_addr *naddr) 672 { 673 if (rpool->addr.type == PF_ADDR_TABLE) { 674 if (pfr_states_increase(rpool->addr.p.tbl, 675 naddr, af) == -1) { 676 if (pf_status.debug >= LOG_DEBUG) { 677 log(LOG_DEBUG, 678 "pf: pf_map_addr_states_increase: " 679 "selected address "); 680 pf_print_host(naddr, 0, af); 681 addlog(". Failed to increase count!\n"); 682 } 683 return (-1); 684 } 685 } else if (rpool->addr.type == PF_ADDR_DYNIFTL) { 686 if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt, 687 naddr, af) == -1) { 688 if (pf_status.debug >= LOG_DEBUG) { 689 log(LOG_DEBUG, 690 "pf: pf_map_addr_states_increase: " 691 "selected address "); 692 pf_print_host(naddr, 0, af); 693 addlog(". Failed to increase count!\n"); 694 } 695 return (-1); 696 } 697 } 698 return (0); 699 } 700 701 int 702 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, 703 struct pf_src_node **sns, struct pf_rule **nr) 704 { 705 struct pf_addr naddr; 706 u_int16_t nport; 707 708 #ifdef INET6 709 if (pd->af != pd->naf) 710 return (pf_get_transaddr_af(r, pd, sns)); 711 #endif /* INET6 */ 712 713 if (r->nat.addr.type != PF_ADDR_NONE) { 714 /* XXX is this right? what if rtable is changed at the same 715 * XXX time? where do I need to figure out the sport? */ 716 nport = 0; 717 if (pf_get_sport(pd, r, &naddr, &nport, 718 r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) { 719 DPFPRINTF(LOG_NOTICE, 720 "pf: NAT proxy port allocation (%u-%u) failed", 721 r->nat.proxy_port[0], 722 r->nat.proxy_port[1]); 723 return (-1); 724 } 725 *nr = r; 726 pf_addrcpy(&pd->nsaddr, &naddr, pd->af); 727 pd->nsport = nport; 728 } 729 if (r->rdr.addr.type != PF_ADDR_NONE) { 730 if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns, 731 &r->rdr, PF_SN_RDR)) 732 return (-1); 733 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 734 pf_poolmask(&naddr, &naddr, &r->rdr.addr.v.a.mask, 735 &pd->ndaddr, pd->af); 736 737 nport = 0; 738 if (r->rdr.proxy_port[1]) { 739 u_int32_t tmp_nport; 740 u_int16_t div; 741 742 div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1; 743 div = (div == 0) ? 1 : div; 744 745 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) + 746 r->rdr.proxy_port[0]; 747 748 /* wrap around if necessary */ 749 if (tmp_nport > 65535) 750 tmp_nport -= 65535; 751 nport = htons((u_int16_t)tmp_nport); 752 } else if (r->rdr.proxy_port[0]) 753 nport = htons(r->rdr.proxy_port[0]); 754 *nr = r; 755 pf_addrcpy(&pd->ndaddr, &naddr, pd->af); 756 if (nport) 757 pd->ndport = nport; 758 } 759 760 return (0); 761 } 762 763 #ifdef INET6 764 int 765 pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd, 766 struct pf_src_node **sns) 767 { 768 struct pf_addr ndaddr, nsaddr, naddr; 769 u_int16_t nport; 770 int prefixlen = 96; 771 772 if (pf_status.debug >= LOG_INFO) { 773 log(LOG_INFO, "pf: af-to %s %s, ", 774 pd->naf == AF_INET ? "inet" : "inet6", 775 r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr"); 776 pf_print_host(&pd->nsaddr, pd->nsport, pd->af); 777 addlog(" -> "); 778 pf_print_host(&pd->ndaddr, pd->ndport, pd->af); 779 addlog("\n"); 780 } 781 782 if (r->nat.addr.type == PF_ADDR_NONE) 783 panic("pf_get_transaddr_af: no nat pool for source address"); 784 785 /* get source address and port */ 786 nport = 0; 787 if (pf_get_sport(pd, r, &nsaddr, &nport, 788 r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) { 789 DPFPRINTF(LOG_NOTICE, 790 "pf: af-to NAT proxy port allocation (%u-%u) failed", 791 r->nat.proxy_port[0], 792 r->nat.proxy_port[1]); 793 return (-1); 794 } 795 pd->nsport = nport; 796 797 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) { 798 if (pd->dir == PF_IN) { 799 pd->ndport = ntohs(pd->ndport); 800 if (pd->ndport == ICMP6_ECHO_REQUEST) 801 pd->ndport = ICMP_ECHO; 802 else if (pd->ndport == ICMP6_ECHO_REPLY) 803 pd->ndport = ICMP_ECHOREPLY; 804 pd->ndport = htons(pd->ndport); 805 } else { 806 pd->nsport = ntohs(pd->nsport); 807 if (pd->nsport == ICMP6_ECHO_REQUEST) 808 pd->nsport = ICMP_ECHO; 809 else if (pd->nsport == ICMP6_ECHO_REPLY) 810 pd->nsport = ICMP_ECHOREPLY; 811 pd->nsport = htons(pd->nsport); 812 } 813 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) { 814 if (pd->dir == PF_IN) { 815 pd->ndport = ntohs(pd->ndport); 816 if (pd->ndport == ICMP_ECHO) 817 pd->ndport = ICMP6_ECHO_REQUEST; 818 else if (pd->ndport == ICMP_ECHOREPLY) 819 pd->ndport = ICMP6_ECHO_REPLY; 820 pd->ndport = htons(pd->ndport); 821 } else { 822 pd->nsport = ntohs(pd->nsport); 823 if (pd->nsport == ICMP_ECHO) 824 pd->nsport = ICMP6_ECHO_REQUEST; 825 else if (pd->nsport == ICMP_ECHOREPLY) 826 pd->nsport = ICMP6_ECHO_REPLY; 827 pd->nsport = htons(pd->nsport); 828 } 829 } 830 831 /* get the destination address and port */ 832 if (r->rdr.addr.type != PF_ADDR_NONE) { 833 if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns, 834 &r->rdr, PF_SN_RDR)) 835 return (-1); 836 if (r->rdr.proxy_port[0]) 837 pd->ndport = htons(r->rdr.proxy_port[0]); 838 839 if (pd->naf == AF_INET) { 840 /* The prefix is the IPv4 rdr address */ 841 prefixlen = in_mask2len((struct in_addr *) 842 &r->rdr.addr.v.a.mask); 843 inet_nat46(pd->naf, &pd->ndaddr, 844 &ndaddr, &naddr, prefixlen); 845 } else { 846 /* The prefix is the IPv6 rdr address */ 847 prefixlen = 848 in6_mask2len((struct in6_addr *) 849 &r->rdr.addr.v.a.mask, NULL); 850 inet_nat64(pd->naf, &pd->ndaddr, 851 &ndaddr, &naddr, prefixlen); 852 } 853 } else { 854 if (pd->naf == AF_INET) { 855 /* The prefix is the IPv6 dst address */ 856 prefixlen = 857 in6_mask2len((struct in6_addr *) 858 &r->dst.addr.v.a.mask, NULL); 859 if (prefixlen < 32) 860 prefixlen = 96; 861 inet_nat64(pd->naf, &pd->ndaddr, 862 &ndaddr, &pd->ndaddr, prefixlen); 863 } else { 864 /* 865 * The prefix is the IPv6 nat address 866 * (that was stored in pd->nsaddr) 867 */ 868 prefixlen = in6_mask2len((struct in6_addr *) 869 &r->nat.addr.v.a.mask, NULL); 870 if (prefixlen > 96) 871 prefixlen = 96; 872 inet_nat64(pd->naf, &pd->ndaddr, 873 &ndaddr, &nsaddr, prefixlen); 874 } 875 } 876 877 pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf); 878 pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf); 879 880 if (pf_status.debug >= LOG_INFO) { 881 log(LOG_INFO, "pf: af-to %s %s done, prefixlen %d, ", 882 pd->naf == AF_INET ? "inet" : "inet6", 883 r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr", 884 prefixlen); 885 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf); 886 addlog(" -> "); 887 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf); 888 addlog("\n"); 889 } 890 891 return (0); 892 } 893 #endif /* INET6 */ 894 895 int 896 pf_postprocess_addr(struct pf_state *cur) 897 { 898 struct pf_rule *nr; 899 struct pf_state_key *sks; 900 struct pf_pool rpool; 901 struct pf_addr lookup_addr; 902 int slbcount = -1; 903 904 nr = cur->natrule.ptr; 905 906 if (nr == NULL) 907 return (0); 908 909 /* decrease counter */ 910 911 sks = cur->key[PF_SK_STACK]; 912 913 /* check for outgoing or ingoing balancing */ 914 if (nr->rt == PF_ROUTETO) 915 lookup_addr = cur->rt_addr; 916 else if (sks != NULL) 917 lookup_addr = sks->addr[1]; 918 else { 919 if (pf_status.debug >= LOG_DEBUG) { 920 log(LOG_DEBUG, "pf: %s: unable to obtain address", 921 __func__); 922 } 923 return (1); 924 } 925 926 /* check for appropriate pool */ 927 if (nr->rdr.addr.type != PF_ADDR_NONE) 928 rpool = nr->rdr; 929 else if (nr->nat.addr.type != PF_ADDR_NONE) 930 rpool = nr->nat; 931 else if (nr->route.addr.type != PF_ADDR_NONE) 932 rpool = nr->route; 933 else 934 return (0); 935 936 if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES)) 937 return (0); 938 939 if (rpool.addr.type == PF_ADDR_TABLE) { 940 if ((slbcount = pfr_states_decrease( 941 rpool.addr.p.tbl, 942 &lookup_addr, sks->af)) == -1) { 943 if (pf_status.debug >= LOG_DEBUG) { 944 log(LOG_DEBUG, "pf: %s: selected address ", 945 __func__); 946 pf_print_host(&lookup_addr, 947 sks->port[0], sks->af); 948 addlog(". Failed to " 949 "decrease count!\n"); 950 } 951 return (1); 952 } 953 } else if (rpool.addr.type == PF_ADDR_DYNIFTL) { 954 if ((slbcount = pfr_states_decrease( 955 rpool.addr.p.dyn->pfid_kt, 956 &lookup_addr, sks->af)) == -1) { 957 if (pf_status.debug >= LOG_DEBUG) { 958 log(LOG_DEBUG, "pf: %s: selected address ", 959 __func__); 960 pf_print_host(&lookup_addr, 961 sks->port[0], sks->af); 962 addlog(". Failed to " 963 "decrease count!\n"); 964 } 965 return (1); 966 } 967 } 968 if (slbcount > -1) { 969 if (pf_status.debug >= LOG_INFO) { 970 log(LOG_INFO, "pf: %s: selected address ", __func__); 971 pf_print_host(&lookup_addr, sks->port[0], 972 sks->af); 973 addlog(" decreased state count to %u\n", 974 slbcount); 975 } 976 } 977 return (0); 978 } 979