1 /*- 2 * Copyright (c) 2014-2020 Mindaugas Rasiukevicius <rmind at noxt eu> 3 * Copyright (c) 2010-2013 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This material is based upon work partially supported by The 7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * NPF network address port translation (NAPT) and other forms of NAT. 33 * Described in RFC 2663, RFC 3022, etc. 34 * 35 * Overview 36 * 37 * There are a few mechanisms: NAT policy, port map and translation. 38 * The NAT module has a separate ruleset where rules always have an 39 * associated NAT policy. 40 * 41 * Translation types 42 * 43 * There are two types of translation: outbound (NPF_NATOUT) and 44 * inbound (NPF_NATIN). It should not be confused with connection 45 * direction. See npf_nat_which() for the description of how the 46 * addresses are rewritten. The bi-directional NAT is a combined 47 * outbound and inbound translation, therefore is constructed as 48 * two policies. 49 * 50 * NAT policies and port maps 51 * 52 * The NAT (translation) policy is applied when packet matches the 53 * rule. Apart from the filter criteria, the NAT policy always has 54 * a translation IP address or a table. If port translation is set, 55 * then NAT mechanism relies on port map mechanism. 56 * 57 * Connections, translation entries and their life-cycle 58 * 59 * NAT relies on the connection tracking module. Each translated 60 * connection has an associated translation entry (npf_nat_t) which 61 * contains information used for backwards stream translation, i.e. 62 * the original IP address with port and translation port, allocated 63 * from the port map. Each NAT entry is associated with the policy, 64 * which contains translation IP address. Allocated port is returned 65 * to the port map and NAT entry is destroyed when connection expires. 66 */ 67 68 #ifdef _KERNEL 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.52 2022/04/09 23:38:33 riastradh Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/types.h> 74 75 #include <sys/atomic.h> 76 #include <sys/condvar.h> 77 #include <sys/kmem.h> 78 #include <sys/mutex.h> 79 #include <sys/pool.h> 80 #include <sys/proc.h> 81 #endif 82 83 #include "npf_impl.h" 84 #include "npf_conn.h" 85 86 /* 87 * NAT policy structure. 88 */ 89 struct npf_natpolicy { 90 npf_t * n_npfctx; 91 kmutex_t n_lock; 92 LIST_HEAD(, npf_nat) n_nat_list; 93 unsigned n_refcnt; 94 uint64_t n_id; 95 96 /* 97 * Translation type, flags, address or table and the port. 98 * Additionally, there may be translation algorithm and any 99 * auxiliary data, e.g. NPTv6 adjustment value. 100 * 101 * NPF_NP_CMP_START mark starts here. 102 */ 103 unsigned n_type; 104 unsigned n_flags; 105 unsigned n_alen; 106 107 npf_addr_t n_taddr; 108 npf_netmask_t n_tmask; 109 in_port_t n_tport; 110 unsigned n_tid; 111 112 unsigned n_algo; 113 union { 114 unsigned n_rr_idx; 115 uint16_t n_npt66_adj; 116 }; 117 }; 118 119 /* 120 * Private flags - must be in the NPF_NAT_PRIVMASK range. 121 */ 122 #define NPF_NAT_USETABLE (0x01000000 & NPF_NAT_PRIVMASK) 123 124 #define NPF_NP_CMP_START offsetof(npf_natpolicy_t, n_type) 125 #define NPF_NP_CMP_SIZE (sizeof(npf_natpolicy_t) - NPF_NP_CMP_START) 126 127 /* 128 * NAT entry for a connection. 129 */ 130 struct npf_nat { 131 /* Associated NAT policy. */ 132 npf_natpolicy_t * nt_natpolicy; 133 134 uint16_t nt_ifid; 135 uint16_t nt_alen; 136 137 /* 138 * Translation address as well as the original address which is 139 * used for backwards translation. The same for ports. 140 */ 141 npf_addr_t nt_taddr; 142 npf_addr_t nt_oaddr; 143 144 in_port_t nt_oport; 145 in_port_t nt_tport; 146 147 /* ALG (if any) associated with this NAT entry. */ 148 npf_alg_t * nt_alg; 149 uintptr_t nt_alg_arg; 150 151 LIST_ENTRY(npf_nat) nt_entry; 152 npf_conn_t * nt_conn; 153 }; 154 155 static pool_cache_t nat_cache __read_mostly; 156 157 /* 158 * npf_nat_sys{init,fini}: initialize/destroy NAT subsystem structures. 159 */ 160 161 void 162 npf_nat_sysinit(void) 163 { 164 nat_cache = pool_cache_init(sizeof(npf_nat_t), 0, 165 0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL); 166 KASSERT(nat_cache != NULL); 167 } 168 169 void 170 npf_nat_sysfini(void) 171 { 172 /* All NAT policies should already be destroyed. */ 173 pool_cache_destroy(nat_cache); 174 } 175 176 /* 177 * npf_natpolicy_create: create a new NAT policy. 178 */ 179 npf_natpolicy_t * 180 npf_natpolicy_create(npf_t *npf, const nvlist_t *nat, npf_ruleset_t *rset) 181 { 182 npf_natpolicy_t *np; 183 const void *addr; 184 size_t len; 185 186 np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP); 187 atomic_store_relaxed(&np->n_refcnt, 1); 188 np->n_npfctx = npf; 189 190 /* The translation type, flags and policy ID. */ 191 np->n_type = dnvlist_get_number(nat, "type", 0); 192 np->n_flags = dnvlist_get_number(nat, "flags", 0) & ~NPF_NAT_PRIVMASK; 193 np->n_id = dnvlist_get_number(nat, "nat-policy", 0); 194 195 /* Should be exclusively either inbound or outbound NAT. */ 196 if (((np->n_type == NPF_NATIN) ^ (np->n_type == NPF_NATOUT)) == 0) { 197 goto err; 198 } 199 mutex_init(&np->n_lock, MUTEX_DEFAULT, IPL_SOFTNET); 200 LIST_INIT(&np->n_nat_list); 201 202 /* 203 * Translation IP, mask and port (if applicable). If using the 204 * the table, specified by the ID, then the nat-addr/nat-mask will 205 * be used as a filter for the addresses selected from table. 206 */ 207 if (nvlist_exists_number(nat, "nat-table-id")) { 208 if (np->n_flags & NPF_NAT_STATIC) { 209 goto err; 210 } 211 np->n_tid = nvlist_get_number(nat, "nat-table-id"); 212 np->n_tmask = NPF_NO_NETMASK; 213 np->n_flags |= NPF_NAT_USETABLE; 214 } else { 215 addr = dnvlist_get_binary(nat, "nat-addr", &len, NULL, 0); 216 if (!addr || len == 0 || len > sizeof(npf_addr_t)) { 217 goto err; 218 } 219 memcpy(&np->n_taddr, addr, len); 220 np->n_alen = len; 221 np->n_tmask = dnvlist_get_number(nat, "nat-mask", NPF_NO_NETMASK); 222 if (npf_netmask_check(np->n_alen, np->n_tmask)) { 223 goto err; 224 } 225 } 226 np->n_tport = dnvlist_get_number(nat, "nat-port", 0); 227 228 /* 229 * NAT algorithm. 230 */ 231 np->n_algo = dnvlist_get_number(nat, "nat-algo", 0); 232 switch (np->n_algo) { 233 case NPF_ALGO_NPT66: 234 np->n_npt66_adj = dnvlist_get_number(nat, "npt66-adj", 0); 235 break; 236 case NPF_ALGO_NETMAP: 237 break; 238 case NPF_ALGO_IPHASH: 239 case NPF_ALGO_RR: 240 default: 241 if (np->n_tmask != NPF_NO_NETMASK) { 242 goto err; 243 } 244 break; 245 } 246 return np; 247 err: 248 mutex_destroy(&np->n_lock); 249 kmem_free(np, sizeof(npf_natpolicy_t)); 250 return NULL; 251 } 252 253 int 254 npf_natpolicy_export(const npf_natpolicy_t *np, nvlist_t *nat) 255 { 256 nvlist_add_number(nat, "nat-policy", np->n_id); 257 nvlist_add_number(nat, "type", np->n_type); 258 nvlist_add_number(nat, "flags", np->n_flags); 259 260 if (np->n_flags & NPF_NAT_USETABLE) { 261 nvlist_add_number(nat, "nat-table-id", np->n_tid); 262 } else { 263 nvlist_add_binary(nat, "nat-addr", &np->n_taddr, np->n_alen); 264 nvlist_add_number(nat, "nat-mask", np->n_tmask); 265 } 266 nvlist_add_number(nat, "nat-port", np->n_tport); 267 nvlist_add_number(nat, "nat-algo", np->n_algo); 268 269 switch (np->n_algo) { 270 case NPF_ALGO_NPT66: 271 nvlist_add_number(nat, "npt66-adj", np->n_npt66_adj); 272 break; 273 } 274 return 0; 275 } 276 277 static void 278 npf_natpolicy_release(npf_natpolicy_t *np) 279 { 280 KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0); 281 282 #ifndef __HAVE_ATOMIC_AS_MEMBAR 283 membar_release(); 284 #endif 285 if (atomic_dec_uint_nv(&np->n_refcnt) != 0) { 286 return; 287 } 288 #ifndef __HAVE_ATOMIC_AS_MEMBAR 289 membar_acquire(); 290 #endif 291 KASSERT(LIST_EMPTY(&np->n_nat_list)); 292 mutex_destroy(&np->n_lock); 293 kmem_free(np, sizeof(npf_natpolicy_t)); 294 } 295 296 /* 297 * npf_natpolicy_destroy: free the NAT policy. 298 * 299 * => Called from npf_rule_free() during the reload via npf_ruleset_destroy(). 300 * => At this point, NAT policy cannot acquire new references. 301 */ 302 void 303 npf_natpolicy_destroy(npf_natpolicy_t *np) 304 { 305 /* 306 * Drain the references. If there are active NAT connections, 307 * then expire them and kick the worker. 308 */ 309 if (atomic_load_relaxed(&np->n_refcnt) > 1) { 310 npf_nat_t *nt; 311 312 mutex_enter(&np->n_lock); 313 LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { 314 npf_conn_t *con = nt->nt_conn; 315 KASSERT(con != NULL); 316 npf_conn_expire(con); 317 } 318 mutex_exit(&np->n_lock); 319 npf_worker_signal(np->n_npfctx); 320 } 321 KASSERT(atomic_load_relaxed(&np->n_refcnt) >= 1); 322 323 /* 324 * Drop the initial reference, but it might not be the last one. 325 * If so, the last reference will be triggered via: 326 * 327 * npf_conn_destroy() -> npf_nat_destroy() -> npf_natpolicy_release() 328 */ 329 npf_natpolicy_release(np); 330 } 331 332 void 333 npf_nat_freealg(npf_natpolicy_t *np, npf_alg_t *alg) 334 { 335 npf_nat_t *nt; 336 337 mutex_enter(&np->n_lock); 338 LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { 339 if (nt->nt_alg == alg) { 340 npf_alg_destroy(np->n_npfctx, alg, nt, nt->nt_conn); 341 nt->nt_alg = NULL; 342 } 343 } 344 mutex_exit(&np->n_lock); 345 } 346 347 /* 348 * npf_natpolicy_cmp: compare two NAT policies. 349 * 350 * => Return 0 on match, and non-zero otherwise. 351 */ 352 bool 353 npf_natpolicy_cmp(npf_natpolicy_t *np, npf_natpolicy_t *mnp) 354 { 355 const void *np_raw, *mnp_raw; 356 357 /* 358 * Compare the relevant NAT policy information (in its raw form) 359 * that is enough as a matching criteria. 360 */ 361 KASSERT(np && mnp && np != mnp); 362 np_raw = (const uint8_t *)np + NPF_NP_CMP_START; 363 mnp_raw = (const uint8_t *)mnp + NPF_NP_CMP_START; 364 return memcmp(np_raw, mnp_raw, NPF_NP_CMP_SIZE) == 0; 365 } 366 367 void 368 npf_nat_setid(npf_natpolicy_t *np, uint64_t id) 369 { 370 np->n_id = id; 371 } 372 373 uint64_t 374 npf_nat_getid(const npf_natpolicy_t *np) 375 { 376 return np->n_id; 377 } 378 379 /* 380 * npf_nat_which: tell which address (source or destination) should be 381 * rewritten given the combination of the NAT type and flow direction. 382 * 383 * => Returns NPF_SRC or NPF_DST constant. 384 */ 385 static inline unsigned 386 npf_nat_which(const unsigned type, const npf_flow_t flow) 387 { 388 unsigned which; 389 390 /* The logic below relies on these values being 0 or 1. */ 391 CTASSERT(NPF_SRC == 0 && NPF_DST == 1); 392 CTASSERT(NPF_FLOW_FORW == NPF_SRC && NPF_FLOW_BACK == NPF_DST); 393 394 KASSERT(type == NPF_NATIN || type == NPF_NATOUT); 395 KASSERT(flow == NPF_FLOW_FORW || flow == NPF_FLOW_BACK); 396 397 /* 398 * Outbound NAT rewrites: 399 * 400 * - Source (NPF_SRC) on "forwards" stream. 401 * - Destination (NPF_DST) on "backwards" stream. 402 * 403 * Inbound NAT is other way round. 404 */ 405 which = (type == NPF_NATOUT) ? flow : !flow; 406 KASSERT(which == NPF_SRC || which == NPF_DST); 407 return which; 408 } 409 410 /* 411 * npf_nat_inspect: inspect packet against NAT ruleset and return a policy. 412 * 413 * => Acquire a reference on the policy, if found. 414 * => NAT lookup is protected by EBR. 415 */ 416 static npf_natpolicy_t * 417 npf_nat_inspect(npf_cache_t *npc, const unsigned di) 418 { 419 npf_t *npf = npc->npc_ctx; 420 int slock = npf_config_read_enter(npf); 421 npf_ruleset_t *rlset = npf_config_natset(npf); 422 npf_natpolicy_t *np; 423 npf_rule_t *rl; 424 425 rl = npf_ruleset_inspect(npc, rlset, di, NPF_LAYER_3); 426 if (rl == NULL) { 427 npf_config_read_exit(npf, slock); 428 return NULL; 429 } 430 np = npf_rule_getnat(rl); 431 atomic_inc_uint(&np->n_refcnt); 432 npf_config_read_exit(npf, slock); 433 return np; 434 } 435 436 static void 437 npf_nat_algo_netmap(const npf_cache_t *npc, const npf_natpolicy_t *np, 438 const unsigned which, npf_addr_t *addr) 439 { 440 const npf_addr_t *orig_addr = npc->npc_ips[which]; 441 442 /* 443 * NETMAP: 444 * 445 * addr = net-addr | (orig-addr & ~mask) 446 */ 447 npf_addr_mask(&np->n_taddr, np->n_tmask, npc->npc_alen, addr); 448 npf_addr_bitor(orig_addr, np->n_tmask, npc->npc_alen, addr); 449 } 450 451 static inline npf_addr_t * 452 npf_nat_getaddr(npf_cache_t *npc, npf_natpolicy_t *np, const unsigned alen) 453 { 454 npf_tableset_t *ts = npf_config_tableset(np->n_npfctx); 455 npf_table_t *t = npf_tableset_getbyid(ts, np->n_tid); 456 unsigned idx; 457 458 /* 459 * Dynamically select the translation IP address. 460 */ 461 switch (np->n_algo) { 462 case NPF_ALGO_RR: 463 idx = atomic_inc_uint_nv(&np->n_rr_idx); 464 break; 465 case NPF_ALGO_IPHASH: 466 default: 467 idx = npf_addr_mix(alen, 468 npc->npc_ips[NPF_SRC], 469 npc->npc_ips[NPF_DST]); 470 break; 471 } 472 return npf_table_getsome(t, alen, idx); 473 } 474 475 /* 476 * npf_nat_create: create a new NAT translation entry. 477 * 478 * => The caller must pass the NAT policy with a reference acquired for us. 479 */ 480 static npf_nat_t * 481 npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np, npf_conn_t *con) 482 { 483 const unsigned proto = npc->npc_proto; 484 const unsigned alen = npc->npc_alen; 485 const nbuf_t *nbuf = npc->npc_nbuf; 486 npf_t *npf = npc->npc_ctx; 487 npf_addr_t *taddr; 488 npf_nat_t *nt; 489 490 KASSERT(npf_iscached(npc, NPC_IP46)); 491 KASSERT(npf_iscached(npc, NPC_LAYER4)); 492 493 /* Construct a new NAT entry and associate it with the connection. */ 494 nt = pool_cache_get(nat_cache, PR_NOWAIT); 495 if (__predict_false(!nt)) { 496 return NULL; 497 } 498 npf_stats_inc(npf, NPF_STAT_NAT_CREATE); 499 nt->nt_natpolicy = np; 500 nt->nt_conn = con; 501 nt->nt_alg = NULL; 502 503 /* 504 * Save the interface ID. 505 * 506 * Note: this can be different from the given connection if it 507 * was established on a different interface, using the global state 508 * mode (state.key.interface = 0). 509 */ 510 KASSERT(nbuf->nb_ifid != 0); 511 nt->nt_ifid = nbuf->nb_ifid; 512 513 /* 514 * Select the translation address. 515 */ 516 if (np->n_flags & NPF_NAT_USETABLE) { 517 int slock = npf_config_read_enter(npf); 518 taddr = npf_nat_getaddr(npc, np, alen); 519 if (__predict_false(!taddr)) { 520 npf_config_read_exit(npf, slock); 521 pool_cache_put(nat_cache, nt); 522 return NULL; 523 } 524 memcpy(&nt->nt_taddr, taddr, alen); 525 npf_config_read_exit(npf, slock); 526 527 } else if (np->n_algo == NPF_ALGO_NETMAP) { 528 const unsigned which = npf_nat_which(np->n_type, NPF_FLOW_FORW); 529 npf_nat_algo_netmap(npc, np, which, &nt->nt_taddr); 530 taddr = &nt->nt_taddr; 531 } else { 532 /* Static IP address. */ 533 taddr = &np->n_taddr; 534 memcpy(&nt->nt_taddr, taddr, alen); 535 } 536 nt->nt_alen = alen; 537 538 /* Save the original address which may be rewritten. */ 539 if (np->n_type == NPF_NATOUT) { 540 /* Outbound NAT: source (think internal) address. */ 541 memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_SRC], alen); 542 } else { 543 /* Inbound NAT: destination (think external) address. */ 544 KASSERT(np->n_type == NPF_NATIN); 545 memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_DST], alen); 546 } 547 548 /* 549 * Port translation, if required, and if it is TCP/UDP. 550 */ 551 if ((np->n_flags & NPF_NAT_PORTS) == 0 || 552 (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) { 553 nt->nt_oport = 0; 554 nt->nt_tport = 0; 555 goto out; 556 } 557 558 /* Save the relevant TCP/UDP port. */ 559 if (proto == IPPROTO_TCP) { 560 const struct tcphdr *th = npc->npc_l4.tcp; 561 nt->nt_oport = (np->n_type == NPF_NATOUT) ? 562 th->th_sport : th->th_dport; 563 } else { 564 const struct udphdr *uh = npc->npc_l4.udp; 565 nt->nt_oport = (np->n_type == NPF_NATOUT) ? 566 uh->uh_sport : uh->uh_dport; 567 } 568 569 /* Get a new port for translation. */ 570 if ((np->n_flags & NPF_NAT_PORTMAP) != 0) { 571 npf_portmap_t *pm = np->n_npfctx->portmap; 572 nt->nt_tport = npf_portmap_get(pm, alen, taddr); 573 } else { 574 nt->nt_tport = np->n_tport; 575 } 576 out: 577 mutex_enter(&np->n_lock); 578 LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry); 579 /* Note: we also consume the reference on policy. */ 580 mutex_exit(&np->n_lock); 581 return nt; 582 } 583 584 /* 585 * npf_dnat_translate: perform translation given the state data. 586 */ 587 static inline int 588 npf_dnat_translate(npf_cache_t *npc, npf_nat_t *nt, npf_flow_t flow) 589 { 590 const npf_natpolicy_t *np = nt->nt_natpolicy; 591 const unsigned which = npf_nat_which(np->n_type, flow); 592 const npf_addr_t *addr; 593 in_port_t port; 594 595 KASSERT(npf_iscached(npc, NPC_IP46)); 596 KASSERT(npf_iscached(npc, NPC_LAYER4)); 597 598 if (flow == NPF_FLOW_FORW) { 599 /* "Forwards" stream: use translation address/port. */ 600 addr = &nt->nt_taddr; 601 port = nt->nt_tport; 602 } else { 603 /* "Backwards" stream: use original address/port. */ 604 addr = &nt->nt_oaddr; 605 port = nt->nt_oport; 606 } 607 KASSERT((np->n_flags & NPF_NAT_PORTS) != 0 || port == 0); 608 609 /* Execute ALG translation first. */ 610 if ((npc->npc_info & NPC_ALG_EXEC) == 0) { 611 npc->npc_info |= NPC_ALG_EXEC; 612 npf_alg_exec(npc, nt, flow); 613 npf_recache(npc); 614 } 615 KASSERT(!nbuf_flag_p(npc->npc_nbuf, NBUF_DATAREF_RESET)); 616 617 /* Finally, perform the translation. */ 618 return npf_napt_rwr(npc, which, addr, port); 619 } 620 621 /* 622 * npf_snat_translate: perform translation given the algorithm. 623 */ 624 static inline int 625 npf_snat_translate(npf_cache_t *npc, const npf_natpolicy_t *np, npf_flow_t flow) 626 { 627 const unsigned which = npf_nat_which(np->n_type, flow); 628 const npf_addr_t *taddr; 629 npf_addr_t addr; 630 631 KASSERT(np->n_flags & NPF_NAT_STATIC); 632 633 switch (np->n_algo) { 634 case NPF_ALGO_NETMAP: 635 npf_nat_algo_netmap(npc, np, which, &addr); 636 taddr = &addr; 637 break; 638 case NPF_ALGO_NPT66: 639 return npf_npt66_rwr(npc, which, &np->n_taddr, 640 np->n_tmask, np->n_npt66_adj); 641 default: 642 taddr = &np->n_taddr; 643 break; 644 } 645 return npf_napt_rwr(npc, which, taddr, np->n_tport); 646 } 647 648 /* 649 * Associate NAT policy with an existing connection state. 650 */ 651 npf_nat_t * 652 npf_nat_share_policy(npf_cache_t *npc, npf_conn_t *con, npf_nat_t *src_nt) 653 { 654 npf_natpolicy_t *np = src_nt->nt_natpolicy; 655 npf_nat_t *nt; 656 int ret; 657 658 /* Create a new NAT entry. */ 659 nt = npf_nat_create(npc, np, con); 660 if (__predict_false(nt == NULL)) { 661 return NULL; 662 } 663 atomic_inc_uint(&np->n_refcnt); 664 665 /* Associate the NAT translation entry with the connection. */ 666 ret = npf_conn_setnat(npc, con, nt, np->n_type); 667 if (__predict_false(ret)) { 668 /* Will release the reference. */ 669 npf_nat_destroy(con, nt); 670 return NULL; 671 } 672 return nt; 673 } 674 675 /* 676 * npf_nat_lookup: lookup the (dynamic) NAT state and return its entry, 677 * 678 * => Checks that the packet is on the interface where NAT policy is applied. 679 * => Determines the flow direction in the context of the NAT policy. 680 */ 681 static npf_nat_t * 682 npf_nat_lookup(const npf_cache_t *npc, npf_conn_t *con, 683 const unsigned di, npf_flow_t *flow) 684 { 685 const nbuf_t *nbuf = npc->npc_nbuf; 686 const npf_natpolicy_t *np; 687 npf_nat_t *nt; 688 689 if ((nt = npf_conn_getnat(con)) == NULL) { 690 return NULL; 691 } 692 if (nt->nt_ifid != nbuf->nb_ifid) { 693 return NULL; 694 } 695 696 np = nt->nt_natpolicy; 697 KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0); 698 699 /* 700 * We rely on NPF_NAT{IN,OUT} being equal to PFIL_{IN,OUT}. 701 */ 702 CTASSERT(NPF_NATIN == PFIL_IN && NPF_NATOUT == PFIL_OUT); 703 *flow = (np->n_type == di) ? NPF_FLOW_FORW : NPF_FLOW_BACK; 704 return nt; 705 } 706 707 /* 708 * npf_do_nat: 709 * 710 * - Inspect packet for a NAT policy, unless a connection with a NAT 711 * association already exists. In such case, determine whether it 712 * is a "forwards" or "backwards" stream. 713 * 714 * - Perform translation: rewrite source or destination fields, 715 * depending on translation type and direction. 716 * 717 * - Associate a NAT policy with a connection (may establish a new). 718 */ 719 int 720 npf_do_nat(npf_cache_t *npc, npf_conn_t *con, const unsigned di) 721 { 722 nbuf_t *nbuf = npc->npc_nbuf; 723 npf_conn_t *ncon = NULL; 724 npf_natpolicy_t *np; 725 npf_flow_t flow; 726 npf_nat_t *nt; 727 int error; 728 729 /* All relevant data should be already cached. */ 730 if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) { 731 return 0; 732 } 733 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 734 735 /* 736 * Return the NAT entry associated with the connection, if any. 737 * Determines whether the stream is "forwards" or "backwards". 738 * Note: no need to lock, since reference on connection is held. 739 */ 740 if (con && (nt = npf_nat_lookup(npc, con, di, &flow)) != NULL) { 741 np = nt->nt_natpolicy; 742 goto translate; 743 } 744 745 /* 746 * Inspect the packet for a NAT policy, if there is no connection. 747 * Note: acquires a reference if found. 748 */ 749 np = npf_nat_inspect(npc, di); 750 if (np == NULL) { 751 /* If packet does not match - done. */ 752 return 0; 753 } 754 flow = NPF_FLOW_FORW; 755 756 /* Static NAT - just perform the translation. */ 757 if (np->n_flags & NPF_NAT_STATIC) { 758 if (nbuf_cksum_barrier(nbuf, di)) { 759 npf_recache(npc); 760 } 761 error = npf_snat_translate(npc, np, flow); 762 npf_natpolicy_release(np); 763 return error; 764 } 765 766 /* 767 * If there is no local connection (no "stateful" rule - unusual, 768 * but possible configuration), establish one before translation. 769 * Note that it is not a "pass" connection, therefore passing of 770 * "backwards" stream depends on other, stateless filtering rules. 771 */ 772 if (con == NULL) { 773 ncon = npf_conn_establish(npc, di, true); 774 if (ncon == NULL) { 775 npf_natpolicy_release(np); 776 return ENOMEM; 777 } 778 con = ncon; 779 } 780 781 /* 782 * Create a new NAT entry and associate with the connection. 783 * We will consume the reference on success (release on error). 784 */ 785 nt = npf_nat_create(npc, np, con); 786 if (nt == NULL) { 787 npf_natpolicy_release(np); 788 error = ENOMEM; 789 goto out; 790 } 791 792 /* Determine whether any ALG matches. */ 793 if (npf_alg_match(npc, nt, di)) { 794 KASSERT(nt->nt_alg != NULL); 795 } 796 797 /* Associate the NAT translation entry with the connection. */ 798 error = npf_conn_setnat(npc, con, nt, np->n_type); 799 if (error) { 800 /* Will release the reference. */ 801 npf_nat_destroy(con, nt); 802 goto out; 803 } 804 805 translate: 806 /* May need to process the delayed checksums first (XXX: NetBSD). */ 807 if (nbuf_cksum_barrier(nbuf, di)) { 808 npf_recache(npc); 809 } 810 811 /* Perform the translation. */ 812 error = npf_dnat_translate(npc, nt, flow); 813 out: 814 if (__predict_false(ncon)) { 815 if (error) { 816 /* It was created for NAT - just expire. */ 817 npf_conn_expire(ncon); 818 } 819 npf_conn_release(ncon); 820 } 821 return error; 822 } 823 824 /* 825 * npf_nat_gettrans: return translation IP address and port. 826 */ 827 void 828 npf_nat_gettrans(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port) 829 { 830 *addr = &nt->nt_taddr; 831 *port = nt->nt_tport; 832 } 833 834 /* 835 * npf_nat_getorig: return original IP address and port from translation entry. 836 */ 837 void 838 npf_nat_getorig(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port) 839 { 840 *addr = &nt->nt_oaddr; 841 *port = nt->nt_oport; 842 } 843 844 /* 845 * npf_nat_setalg: associate an ALG with the NAT entry. 846 */ 847 void 848 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg) 849 { 850 nt->nt_alg = alg; 851 nt->nt_alg_arg = arg; 852 } 853 854 npf_alg_t * 855 npf_nat_getalg(const npf_nat_t *nt) 856 { 857 return nt->nt_alg; 858 } 859 860 uintptr_t 861 npf_nat_getalgarg(const npf_nat_t *nt) 862 { 863 return nt->nt_alg_arg; 864 } 865 866 /* 867 * npf_nat_destroy: destroy NAT structure (performed on connection expiration). 868 */ 869 void 870 npf_nat_destroy(npf_conn_t *con, npf_nat_t *nt) 871 { 872 npf_natpolicy_t *np = nt->nt_natpolicy; 873 npf_t *npf = np->n_npfctx; 874 npf_alg_t *alg; 875 876 /* Execute the ALG destroy callback, if any. */ 877 if ((alg = npf_nat_getalg(nt)) != NULL) { 878 npf_alg_destroy(npf, alg, nt, con); 879 nt->nt_alg = NULL; 880 } 881 882 /* Return taken port to the portmap. */ 883 if ((np->n_flags & NPF_NAT_PORTMAP) != 0 && nt->nt_tport) { 884 npf_portmap_t *pm = npf->portmap; 885 npf_portmap_put(pm, nt->nt_alen, &nt->nt_taddr, nt->nt_tport); 886 } 887 npf_stats_inc(np->n_npfctx, NPF_STAT_NAT_DESTROY); 888 889 /* 890 * Remove the connection from the list and drop the reference on 891 * the NAT policy. Note: this might trigger its destruction. 892 */ 893 mutex_enter(&np->n_lock); 894 LIST_REMOVE(nt, nt_entry); 895 mutex_exit(&np->n_lock); 896 npf_natpolicy_release(np); 897 898 pool_cache_put(nat_cache, nt); 899 } 900 901 /* 902 * npf_nat_export: serialize the NAT entry with a NAT policy ID. 903 */ 904 void 905 npf_nat_export(npf_t *npf, const npf_nat_t *nt, nvlist_t *con_nv) 906 { 907 npf_natpolicy_t *np = nt->nt_natpolicy; 908 unsigned alen = nt->nt_alen; 909 nvlist_t *nat_nv; 910 911 nat_nv = nvlist_create(0); 912 if (nt->nt_ifid) { 913 char ifname[IFNAMSIZ]; 914 npf_ifmap_copyname(npf, nt->nt_ifid, ifname, sizeof(ifname)); 915 nvlist_add_string(nat_nv, "ifname", ifname); 916 } 917 nvlist_add_number(nat_nv, "alen", alen); 918 919 nvlist_add_binary(nat_nv, "oaddr", &nt->nt_oaddr, alen); 920 nvlist_add_number(nat_nv, "oport", nt->nt_oport); 921 922 nvlist_add_binary(nat_nv, "taddr", &nt->nt_taddr, alen); 923 nvlist_add_number(nat_nv, "tport", nt->nt_tport); 924 925 nvlist_add_number(nat_nv, "nat-policy", np->n_id); 926 nvlist_move_nvlist(con_nv, "nat", nat_nv); 927 } 928 929 /* 930 * npf_nat_import: find the NAT policy and unserialize the NAT entry. 931 */ 932 npf_nat_t * 933 npf_nat_import(npf_t *npf, const nvlist_t *nat, 934 npf_ruleset_t *natlist, npf_conn_t *con) 935 { 936 npf_natpolicy_t *np; 937 npf_nat_t *nt; 938 const char *ifname; 939 const void *taddr, *oaddr; 940 size_t alen, len; 941 uint64_t np_id; 942 943 np_id = dnvlist_get_number(nat, "nat-policy", UINT64_MAX); 944 if ((np = npf_ruleset_findnat(natlist, np_id)) == NULL) { 945 return NULL; 946 } 947 nt = pool_cache_get(nat_cache, PR_WAITOK); 948 memset(nt, 0, sizeof(npf_nat_t)); 949 950 ifname = dnvlist_get_string(nat, "ifname", NULL); 951 if (ifname && (nt->nt_ifid = npf_ifmap_register(npf, ifname)) == 0) { 952 goto err; 953 } 954 955 alen = dnvlist_get_number(nat, "alen", 0); 956 if (alen == 0 || alen > sizeof(npf_addr_t)) { 957 goto err; 958 } 959 960 taddr = dnvlist_get_binary(nat, "taddr", &len, NULL, 0); 961 if (!taddr || len != alen) { 962 goto err; 963 } 964 memcpy(&nt->nt_taddr, taddr, sizeof(npf_addr_t)); 965 966 oaddr = dnvlist_get_binary(nat, "oaddr", &len, NULL, 0); 967 if (!oaddr || len != alen) { 968 goto err; 969 } 970 memcpy(&nt->nt_oaddr, oaddr, sizeof(npf_addr_t)); 971 972 nt->nt_oport = dnvlist_get_number(nat, "oport", 0); 973 nt->nt_tport = dnvlist_get_number(nat, "tport", 0); 974 975 /* Take a specific port from port-map. */ 976 if ((np->n_flags & NPF_NAT_PORTMAP) != 0 && nt->nt_tport) { 977 npf_portmap_t *pm = npf->portmap; 978 979 if (!npf_portmap_take(pm, nt->nt_alen, 980 &nt->nt_taddr, nt->nt_tport)) { 981 goto err; 982 } 983 } 984 npf_stats_inc(npf, NPF_STAT_NAT_CREATE); 985 986 /* 987 * Associate, take a reference and insert. Unlocked/non-atomic 988 * since the policy is not yet globally visible. 989 */ 990 nt->nt_natpolicy = np; 991 nt->nt_conn = con; 992 atomic_store_relaxed(&np->n_refcnt, 993 atomic_load_relaxed(&np->n_refcnt) + 1); 994 LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry); 995 return nt; 996 err: 997 pool_cache_put(nat_cache, nt); 998 return NULL; 999 } 1000 1001 #if defined(DDB) || defined(_NPF_TESTING) 1002 1003 void 1004 npf_nat_dump(const npf_nat_t *nt) 1005 { 1006 const npf_natpolicy_t *np; 1007 struct in_addr ip; 1008 1009 np = nt->nt_natpolicy; 1010 memcpy(&ip, &nt->nt_taddr, sizeof(ip)); 1011 printf("\tNATP(%p): type %u flags 0x%x taddr %s tport %d\n", np, 1012 np->n_type, np->n_flags, inet_ntoa(ip), ntohs(np->n_tport)); 1013 memcpy(&ip, &nt->nt_oaddr, sizeof(ip)); 1014 printf("\tNAT: original address %s oport %d tport %d\n", 1015 inet_ntoa(ip), ntohs(nt->nt_oport), ntohs(nt->nt_tport)); 1016 if (nt->nt_alg) { 1017 printf("\tNAT ALG = %p, ARG = %p\n", 1018 nt->nt_alg, (void *)nt->nt_alg_arg); 1019 } 1020 } 1021 1022 #endif 1023