1 /* $OpenBSD: pf_lb.c,v 1.8 2009/11/03 10:59:04 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "pflog.h" 40 #include "pfsync.h" 41 #include "pflow.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/filio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/kernel.h> 50 #include <sys/time.h> 51 #include <sys/pool.h> 52 #include <sys/proc.h> 53 #include <sys/rwlock.h> 54 55 #include <crypto/md5.h> 56 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/bpf.h> 60 #include <net/route.h> 61 #include <net/radix_mpath.h> 62 63 #include <netinet/in.h> 64 #include <netinet/in_var.h> 65 #include <netinet/in_systm.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/tcp.h> 69 #include <netinet/tcp_seq.h> 70 #include <netinet/udp.h> 71 #include <netinet/ip_icmp.h> 72 #include <netinet/in_pcb.h> 73 #include <netinet/tcp_timer.h> 74 #include <netinet/tcp_var.h> 75 #include <netinet/udp_var.h> 76 #include <netinet/icmp_var.h> 77 #include <netinet/if_ether.h> 78 79 #include <dev/rndvar.h> 80 #include <net/pfvar.h> 81 #include <net/if_pflog.h> 82 #include <net/if_pflow.h> 83 84 #if NPFSYNC > 0 85 #include <net/if_pfsync.h> 86 #endif /* NPFSYNC > 0 */ 87 88 #ifdef INET6 89 #include <netinet/ip6.h> 90 #include <netinet/in_pcb.h> 91 #include <netinet/icmp6.h> 92 #include <netinet6/nd6.h> 93 #endif /* INET6 */ 94 95 96 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x 97 98 /* 99 * Global variables 100 */ 101 102 void pf_hash(struct pf_addr *, struct pf_addr *, 103 struct pf_poolhashkey *, sa_family_t); 104 int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, 105 struct pf_addr *, struct pf_addr *, u_int16_t, 106 struct pf_addr *, u_int16_t *, u_int16_t, u_int16_t, 107 struct pf_src_node **, int); 108 109 #define mix(a,b,c) \ 110 do { \ 111 a -= b; a -= c; a ^= (c >> 13); \ 112 b -= c; b -= a; b ^= (a << 8); \ 113 c -= a; c -= b; c ^= (b >> 13); \ 114 a -= b; a -= c; a ^= (c >> 12); \ 115 b -= c; b -= a; b ^= (a << 16); \ 116 c -= a; c -= b; c ^= (b >> 5); \ 117 a -= b; a -= c; a ^= (c >> 3); \ 118 b -= c; b -= a; b ^= (a << 10); \ 119 c -= a; c -= b; c ^= (b >> 15); \ 120 } while (0) 121 122 /* 123 * hash function based on bridge_hash in if_bridge.c 124 */ 125 void 126 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 127 struct pf_poolhashkey *key, sa_family_t af) 128 { 129 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; 130 131 switch (af) { 132 #ifdef INET 133 case AF_INET: 134 a += inaddr->addr32[0]; 135 b += key->key32[1]; 136 mix(a, b, c); 137 hash->addr32[0] = c + key->key32[2]; 138 break; 139 #endif /* INET */ 140 #ifdef INET6 141 case AF_INET6: 142 a += inaddr->addr32[0]; 143 b += inaddr->addr32[2]; 144 mix(a, b, c); 145 hash->addr32[0] = c; 146 a += inaddr->addr32[1]; 147 b += inaddr->addr32[3]; 148 c += key->key32[1]; 149 mix(a, b, c); 150 hash->addr32[1] = c; 151 a += inaddr->addr32[2]; 152 b += inaddr->addr32[1]; 153 c += key->key32[2]; 154 mix(a, b, c); 155 hash->addr32[2] = c; 156 a += inaddr->addr32[3]; 157 b += inaddr->addr32[0]; 158 c += key->key32[3]; 159 mix(a, b, c); 160 hash->addr32[3] = c; 161 break; 162 #endif /* INET6 */ 163 } 164 } 165 166 int 167 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, 168 struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, 169 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, 170 struct pf_src_node **sn, int rdomain) 171 { 172 struct pf_state_key_cmp key; 173 struct pf_addr init_addr; 174 u_int16_t cut; 175 176 bzero(&init_addr, sizeof(init_addr)); 177 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, &r->nat)) 178 return (1); 179 180 if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { 181 if (dport == htons(ICMP6_ECHO_REQUEST) || 182 dport == htons(ICMP_ECHO)) { 183 low = 1; 184 high = 65535; 185 } else 186 return (0); /* Don't try to modify non-echo ICMP */ 187 } 188 189 do { 190 key.af = af; 191 key.proto = proto; 192 key.rdomain = rdomain; 193 PF_ACPY(&key.addr[1], daddr, key.af); 194 PF_ACPY(&key.addr[0], naddr, key.af); 195 key.port[1] = dport; 196 197 /* 198 * port search; start random, step; 199 * similar 2 portloop in in_pcbbind 200 */ 201 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || 202 proto == IPPROTO_ICMP)) { 203 /* XXX bug icmp states dont use the id on both sides */ 204 key.port[0] = dport; 205 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) 206 return (0); 207 } else if (low == 0 && high == 0) { 208 key.port[0] = *nport; 209 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) 210 return (0); 211 } else if (low == high) { 212 key.port[0] = htons(low); 213 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 214 *nport = htons(low); 215 return (0); 216 } 217 } else { 218 u_int16_t tmp; 219 220 if (low > high) { 221 tmp = low; 222 low = high; 223 high = tmp; 224 } 225 /* low < high */ 226 cut = arc4random_uniform(1 + high - low) + low; 227 /* low <= cut <= high */ 228 for (tmp = cut; tmp <= high; ++(tmp)) { 229 key.port[0] = htons(tmp); 230 if (pf_find_state_all(&key, PF_IN, NULL) == 231 NULL && !in_baddynamic(tmp, proto)) { 232 *nport = htons(tmp); 233 return (0); 234 } 235 } 236 for (tmp = cut - 1; tmp >= low; --(tmp)) { 237 key.port[0] = htons(tmp); 238 if (pf_find_state_all(&key, PF_IN, NULL) == 239 NULL && !in_baddynamic(tmp, proto)) { 240 *nport = htons(tmp); 241 return (0); 242 } 243 } 244 } 245 246 switch (r->nat.opts & PF_POOL_TYPEMASK) { 247 case PF_POOL_RANDOM: 248 case PF_POOL_ROUNDROBIN: 249 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, 250 &r->nat)) 251 return (1); 252 break; 253 case PF_POOL_NONE: 254 case PF_POOL_SRCHASH: 255 case PF_POOL_BITMASK: 256 default: 257 return (1); 258 } 259 } while (! PF_AEQ(&init_addr, naddr, af) ); 260 return (1); /* none available */ 261 } 262 263 int 264 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 265 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn, 266 struct pf_pool *rpool) 267 { 268 unsigned char hash[16]; 269 struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; 270 struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; 271 struct pf_pooladdr *acur = rpool->cur; 272 struct pf_src_node k; 273 274 if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR && 275 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 276 k.af = af; 277 PF_ACPY(&k.addr, saddr, af); 278 if (r->rule_flag & PFRULE_RULESRCTRACK || 279 rpool->opts & PF_POOL_STICKYADDR) 280 k.rule.ptr = r; 281 else 282 k.rule.ptr = NULL; 283 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 284 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 285 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { 286 PF_ACPY(naddr, &(*sn)->raddr, af); 287 if (pf_status.debug >= PF_DEBUG_MISC) { 288 printf("pf_map_addr: src tracking maps "); 289 pf_print_host(&k.addr, 0, af); 290 printf(" to "); 291 pf_print_host(naddr, 0, af); 292 printf("\n"); 293 } 294 return (0); 295 } 296 } 297 298 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) 299 return (1); 300 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 301 switch (af) { 302 #ifdef INET 303 case AF_INET: 304 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && 305 (rpool->opts & PF_POOL_TYPEMASK) != 306 PF_POOL_ROUNDROBIN) 307 return (1); 308 raddr = &rpool->cur->addr.p.dyn->pfid_addr4; 309 rmask = &rpool->cur->addr.p.dyn->pfid_mask4; 310 break; 311 #endif /* INET */ 312 #ifdef INET6 313 case AF_INET6: 314 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && 315 (rpool->opts & PF_POOL_TYPEMASK) != 316 PF_POOL_ROUNDROBIN) 317 return (1); 318 raddr = &rpool->cur->addr.p.dyn->pfid_addr6; 319 rmask = &rpool->cur->addr.p.dyn->pfid_mask6; 320 break; 321 #endif /* INET6 */ 322 } 323 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { 324 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) 325 return (1); /* unsupported */ 326 } else { 327 raddr = &rpool->cur->addr.v.a.addr; 328 rmask = &rpool->cur->addr.v.a.mask; 329 } 330 331 switch (rpool->opts & PF_POOL_TYPEMASK) { 332 case PF_POOL_NONE: 333 PF_ACPY(naddr, raddr, af); 334 break; 335 case PF_POOL_BITMASK: 336 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 337 break; 338 case PF_POOL_RANDOM: 339 if (init_addr != NULL && PF_AZERO(init_addr, af)) { 340 switch (af) { 341 #ifdef INET 342 case AF_INET: 343 rpool->counter.addr32[0] = htonl(arc4random()); 344 break; 345 #endif /* INET */ 346 #ifdef INET6 347 case AF_INET6: 348 if (rmask->addr32[3] != 0xffffffff) 349 rpool->counter.addr32[3] = 350 htonl(arc4random()); 351 else 352 break; 353 if (rmask->addr32[2] != 0xffffffff) 354 rpool->counter.addr32[2] = 355 htonl(arc4random()); 356 else 357 break; 358 if (rmask->addr32[1] != 0xffffffff) 359 rpool->counter.addr32[1] = 360 htonl(arc4random()); 361 else 362 break; 363 if (rmask->addr32[0] != 0xffffffff) 364 rpool->counter.addr32[0] = 365 htonl(arc4random()); 366 break; 367 #endif /* INET6 */ 368 } 369 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 370 PF_ACPY(init_addr, naddr, af); 371 372 } else { 373 PF_AINC(&rpool->counter, af); 374 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 375 } 376 break; 377 case PF_POOL_SRCHASH: 378 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 379 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); 380 break; 381 case PF_POOL_ROUNDROBIN: 382 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 383 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 384 &rpool->tblidx, &rpool->counter, 385 &raddr, &rmask, af)) 386 goto get_addr; 387 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 388 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 389 &rpool->tblidx, &rpool->counter, 390 &raddr, &rmask, af)) 391 goto get_addr; 392 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 393 goto get_addr; 394 395 try_next: 396 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) 397 rpool->cur = TAILQ_FIRST(&rpool->list); 398 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 399 rpool->tblidx = -1; 400 if (pfr_pool_get(rpool->cur->addr.p.tbl, 401 &rpool->tblidx, &rpool->counter, 402 &raddr, &rmask, af)) { 403 /* table contains no address of type 'af' */ 404 if (rpool->cur != acur) 405 goto try_next; 406 return (1); 407 } 408 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 409 rpool->tblidx = -1; 410 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 411 &rpool->tblidx, &rpool->counter, 412 &raddr, &rmask, af)) { 413 /* table contains no address of type 'af' */ 414 if (rpool->cur != acur) 415 goto try_next; 416 return (1); 417 } 418 } else { 419 raddr = &rpool->cur->addr.v.a.addr; 420 rmask = &rpool->cur->addr.v.a.mask; 421 PF_ACPY(&rpool->counter, raddr, af); 422 } 423 424 get_addr: 425 PF_ACPY(naddr, &rpool->counter, af); 426 if (init_addr != NULL && PF_AZERO(init_addr, af)) 427 PF_ACPY(init_addr, naddr, af); 428 PF_AINC(&rpool->counter, af); 429 break; 430 } 431 if (*sn != NULL) 432 PF_ACPY(&(*sn)->raddr, naddr, af); 433 434 if (pf_status.debug >= PF_DEBUG_NOISY && 435 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 436 printf("pf_map_addr: selected address "); 437 pf_print_host(naddr, 0, af); 438 printf("\n"); 439 } 440 441 return (0); 442 } 443 444 int 445 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, struct pf_addr *saddr, 446 u_int16_t *sport, struct pf_addr *daddr, u_int16_t *dport) 447 { 448 struct pf_addr naddr; 449 u_int16_t nport = 0; 450 451 struct pf_src_node srcnode, *sn = &srcnode; 452 453 if (!TAILQ_EMPTY(&r->nat.list)) { 454 /* XXX is this right? what if rtable is changed at the same 455 * XXX time? where do I need to figure out the sport? */ 456 if (pf_get_sport(pd->af, pd->proto, r, saddr, 457 daddr, *dport, &naddr, &nport, r->nat.proxy_port[0], 458 r->nat.proxy_port[1], &sn, pd->rdomain)) { 459 DPFPRINTF(PF_DEBUG_MISC, 460 ("pf: NAT proxy port allocation " 461 "(%u-%u) failed\n", 462 r->nat.proxy_port[0], 463 r->nat.proxy_port[1])); 464 return (-1); 465 } 466 PF_ACPY(saddr, &naddr, pd->af); 467 if (nport) 468 *sport = nport; 469 } 470 if (!TAILQ_EMPTY(&r->rdr.list)) { 471 if (pf_map_addr(pd->af, r, saddr, &naddr, NULL, &sn, &r->rdr)) 472 return (-1); 473 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 474 PF_POOLMASK(&naddr, &naddr, &r->rdr.cur->addr.v.a.mask, 475 daddr, pd->af); 476 477 if (r->rdr.proxy_port[1]) { 478 u_int32_t tmp_nport; 479 480 tmp_nport = ((ntohs(*dport) - 481 ntohs(r->dst.port[0])) % 482 (r->rdr.proxy_port[1] - 483 r->rdr.proxy_port[0] + 1)) + 484 r->rdr.proxy_port[0]; 485 486 /* wrap around if necessary */ 487 if (tmp_nport > 65535) 488 tmp_nport -= 65535; 489 nport = htons((u_int16_t)tmp_nport); 490 } else if (r->rdr.proxy_port[0]) 491 nport = htons(r->rdr.proxy_port[0]); 492 493 PF_ACPY(daddr, &naddr, pd->af); 494 if (nport) 495 *dport = nport; 496 } 497 498 return (0); 499 } 500 501