1 /*- 2 * Copyright (c) 2014-2020 Mindaugas Rasiukevicius <rmind at noxt eu> 3 * Copyright (c) 2010-2013 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This material is based upon work partially supported by The 7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * NPF network address port translation (NAPT) and other forms of NAT. 33 * Described in RFC 2663, RFC 3022, etc. 34 * 35 * Overview 36 * 37 * There are a few mechanisms: NAT policy, port map and translation. 38 * The NAT module has a separate ruleset where rules always have an 39 * associated NAT policy. 40 * 41 * Translation types 42 * 43 * There are two types of translation: outbound (NPF_NATOUT) and 44 * inbound (NPF_NATIN). It should not be confused with connection 45 * direction. See npf_nat_which() for the description of how the 46 * addresses are rewritten. The bi-directional NAT is a combined 47 * outbound and inbound translation, therefore is constructed as 48 * two policies. 49 * 50 * NAT policies and port maps 51 * 52 * The NAT (translation) policy is applied when packet matches the 53 * rule. Apart from the filter criteria, the NAT policy always has 54 * a translation IP address or a table. If port translation is set, 55 * then NAT mechanism relies on port map mechanism. 56 * 57 * Connections, translation entries and their life-cycle 58 * 59 * NAT relies on the connection tracking module. Each translated 60 * connection has an associated translation entry (npf_nat_t) which 61 * contains information used for backwards stream translation, i.e. 62 * the original IP address with port and translation port, allocated 63 * from the port map. Each NAT entry is associated with the policy, 64 * which contains translation IP address. Allocated port is returned 65 * to the port map and NAT entry is destroyed when connection expires. 66 */ 67 68 #ifdef _KERNEL 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.53 2023/02/24 11:03:01 riastradh Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/types.h> 74 75 #include <sys/atomic.h> 76 #include <sys/condvar.h> 77 #include <sys/kmem.h> 78 #include <sys/mutex.h> 79 #include <sys/pool.h> 80 #include <sys/proc.h> 81 #endif 82 83 #include "npf_impl.h" 84 #include "npf_conn.h" 85 86 /* 87 * NAT policy structure. 88 */ 89 struct npf_natpolicy { 90 npf_t * n_npfctx; 91 kmutex_t n_lock; 92 LIST_HEAD(, npf_nat) n_nat_list; 93 unsigned n_refcnt; 94 uint64_t n_id; 95 96 /* 97 * Translation type, flags, address or table and the port. 98 * Additionally, there may be translation algorithm and any 99 * auxiliary data, e.g. NPTv6 adjustment value. 100 * 101 * NPF_NP_CMP_START mark starts here. 102 */ 103 unsigned n_type; 104 unsigned n_flags; 105 unsigned n_alen; 106 107 npf_addr_t n_taddr; 108 npf_netmask_t n_tmask; 109 in_port_t n_tport; 110 unsigned n_tid; 111 112 unsigned n_algo; 113 union { 114 unsigned n_rr_idx; 115 uint16_t n_npt66_adj; 116 }; 117 }; 118 119 /* 120 * Private flags - must be in the NPF_NAT_PRIVMASK range. 121 */ 122 #define NPF_NAT_USETABLE (0x01000000 & NPF_NAT_PRIVMASK) 123 124 #define NPF_NP_CMP_START offsetof(npf_natpolicy_t, n_type) 125 #define NPF_NP_CMP_SIZE (sizeof(npf_natpolicy_t) - NPF_NP_CMP_START) 126 127 /* 128 * NAT entry for a connection. 129 */ 130 struct npf_nat { 131 /* Associated NAT policy. */ 132 npf_natpolicy_t * nt_natpolicy; 133 134 uint16_t nt_ifid; 135 uint16_t nt_alen; 136 137 /* 138 * Translation address as well as the original address which is 139 * used for backwards translation. The same for ports. 140 */ 141 npf_addr_t nt_taddr; 142 npf_addr_t nt_oaddr; 143 144 in_port_t nt_oport; 145 in_port_t nt_tport; 146 147 /* ALG (if any) associated with this NAT entry. */ 148 npf_alg_t * nt_alg; 149 uintptr_t nt_alg_arg; 150 151 LIST_ENTRY(npf_nat) nt_entry; 152 npf_conn_t * nt_conn; 153 }; 154 155 static pool_cache_t nat_cache __read_mostly; 156 157 /* 158 * npf_nat_sys{init,fini}: initialize/destroy NAT subsystem structures. 159 */ 160 161 void 162 npf_nat_sysinit(void) 163 { 164 nat_cache = pool_cache_init(sizeof(npf_nat_t), 0, 165 0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL); 166 KASSERT(nat_cache != NULL); 167 } 168 169 void 170 npf_nat_sysfini(void) 171 { 172 /* All NAT policies should already be destroyed. */ 173 pool_cache_destroy(nat_cache); 174 } 175 176 /* 177 * npf_natpolicy_create: create a new NAT policy. 178 */ 179 npf_natpolicy_t * 180 npf_natpolicy_create(npf_t *npf, const nvlist_t *nat, npf_ruleset_t *rset) 181 { 182 npf_natpolicy_t *np; 183 const void *addr; 184 size_t len; 185 186 np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP); 187 atomic_store_relaxed(&np->n_refcnt, 1); 188 np->n_npfctx = npf; 189 190 /* The translation type, flags and policy ID. */ 191 np->n_type = dnvlist_get_number(nat, "type", 0); 192 np->n_flags = dnvlist_get_number(nat, "flags", 0) & ~NPF_NAT_PRIVMASK; 193 np->n_id = dnvlist_get_number(nat, "nat-policy", 0); 194 195 /* Should be exclusively either inbound or outbound NAT. */ 196 if (((np->n_type == NPF_NATIN) ^ (np->n_type == NPF_NATOUT)) == 0) { 197 goto err; 198 } 199 mutex_init(&np->n_lock, MUTEX_DEFAULT, IPL_SOFTNET); 200 LIST_INIT(&np->n_nat_list); 201 202 /* 203 * Translation IP, mask and port (if applicable). If using the 204 * the table, specified by the ID, then the nat-addr/nat-mask will 205 * be used as a filter for the addresses selected from table. 206 */ 207 if (nvlist_exists_number(nat, "nat-table-id")) { 208 if (np->n_flags & NPF_NAT_STATIC) { 209 goto err; 210 } 211 np->n_tid = nvlist_get_number(nat, "nat-table-id"); 212 np->n_tmask = NPF_NO_NETMASK; 213 np->n_flags |= NPF_NAT_USETABLE; 214 } else { 215 addr = dnvlist_get_binary(nat, "nat-addr", &len, NULL, 0); 216 if (!addr || len == 0 || len > sizeof(npf_addr_t)) { 217 goto err; 218 } 219 memcpy(&np->n_taddr, addr, len); 220 np->n_alen = len; 221 np->n_tmask = dnvlist_get_number(nat, "nat-mask", NPF_NO_NETMASK); 222 if (npf_netmask_check(np->n_alen, np->n_tmask)) { 223 goto err; 224 } 225 } 226 np->n_tport = dnvlist_get_number(nat, "nat-port", 0); 227 228 /* 229 * NAT algorithm. 230 */ 231 np->n_algo = dnvlist_get_number(nat, "nat-algo", 0); 232 switch (np->n_algo) { 233 case NPF_ALGO_NPT66: 234 np->n_npt66_adj = dnvlist_get_number(nat, "npt66-adj", 0); 235 break; 236 case NPF_ALGO_NETMAP: 237 break; 238 case NPF_ALGO_IPHASH: 239 case NPF_ALGO_RR: 240 default: 241 if (np->n_tmask != NPF_NO_NETMASK) { 242 goto err; 243 } 244 break; 245 } 246 return np; 247 err: 248 mutex_destroy(&np->n_lock); 249 kmem_free(np, sizeof(npf_natpolicy_t)); 250 return NULL; 251 } 252 253 int 254 npf_natpolicy_export(const npf_natpolicy_t *np, nvlist_t *nat) 255 { 256 nvlist_add_number(nat, "nat-policy", np->n_id); 257 nvlist_add_number(nat, "type", np->n_type); 258 nvlist_add_number(nat, "flags", np->n_flags); 259 260 if (np->n_flags & NPF_NAT_USETABLE) { 261 nvlist_add_number(nat, "nat-table-id", np->n_tid); 262 } else { 263 nvlist_add_binary(nat, "nat-addr", &np->n_taddr, np->n_alen); 264 nvlist_add_number(nat, "nat-mask", np->n_tmask); 265 } 266 nvlist_add_number(nat, "nat-port", np->n_tport); 267 nvlist_add_number(nat, "nat-algo", np->n_algo); 268 269 switch (np->n_algo) { 270 case NPF_ALGO_NPT66: 271 nvlist_add_number(nat, "npt66-adj", np->n_npt66_adj); 272 break; 273 } 274 return 0; 275 } 276 277 static void 278 npf_natpolicy_release(npf_natpolicy_t *np) 279 { 280 KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0); 281 282 membar_release(); 283 if (atomic_dec_uint_nv(&np->n_refcnt) != 0) { 284 return; 285 } 286 membar_acquire(); 287 KASSERT(LIST_EMPTY(&np->n_nat_list)); 288 mutex_destroy(&np->n_lock); 289 kmem_free(np, sizeof(npf_natpolicy_t)); 290 } 291 292 /* 293 * npf_natpolicy_destroy: free the NAT policy. 294 * 295 * => Called from npf_rule_free() during the reload via npf_ruleset_destroy(). 296 * => At this point, NAT policy cannot acquire new references. 297 */ 298 void 299 npf_natpolicy_destroy(npf_natpolicy_t *np) 300 { 301 /* 302 * Drain the references. If there are active NAT connections, 303 * then expire them and kick the worker. 304 */ 305 if (atomic_load_relaxed(&np->n_refcnt) > 1) { 306 npf_nat_t *nt; 307 308 mutex_enter(&np->n_lock); 309 LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { 310 npf_conn_t *con = nt->nt_conn; 311 KASSERT(con != NULL); 312 npf_conn_expire(con); 313 } 314 mutex_exit(&np->n_lock); 315 npf_worker_signal(np->n_npfctx); 316 } 317 KASSERT(atomic_load_relaxed(&np->n_refcnt) >= 1); 318 319 /* 320 * Drop the initial reference, but it might not be the last one. 321 * If so, the last reference will be triggered via: 322 * 323 * npf_conn_destroy() -> npf_nat_destroy() -> npf_natpolicy_release() 324 */ 325 npf_natpolicy_release(np); 326 } 327 328 void 329 npf_nat_freealg(npf_natpolicy_t *np, npf_alg_t *alg) 330 { 331 npf_nat_t *nt; 332 333 mutex_enter(&np->n_lock); 334 LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { 335 if (nt->nt_alg == alg) { 336 npf_alg_destroy(np->n_npfctx, alg, nt, nt->nt_conn); 337 nt->nt_alg = NULL; 338 } 339 } 340 mutex_exit(&np->n_lock); 341 } 342 343 /* 344 * npf_natpolicy_cmp: compare two NAT policies. 345 * 346 * => Return 0 on match, and non-zero otherwise. 347 */ 348 bool 349 npf_natpolicy_cmp(npf_natpolicy_t *np, npf_natpolicy_t *mnp) 350 { 351 const void *np_raw, *mnp_raw; 352 353 /* 354 * Compare the relevant NAT policy information (in its raw form) 355 * that is enough as a matching criteria. 356 */ 357 KASSERT(np && mnp && np != mnp); 358 np_raw = (const uint8_t *)np + NPF_NP_CMP_START; 359 mnp_raw = (const uint8_t *)mnp + NPF_NP_CMP_START; 360 return memcmp(np_raw, mnp_raw, NPF_NP_CMP_SIZE) == 0; 361 } 362 363 void 364 npf_nat_setid(npf_natpolicy_t *np, uint64_t id) 365 { 366 np->n_id = id; 367 } 368 369 uint64_t 370 npf_nat_getid(const npf_natpolicy_t *np) 371 { 372 return np->n_id; 373 } 374 375 /* 376 * npf_nat_which: tell which address (source or destination) should be 377 * rewritten given the combination of the NAT type and flow direction. 378 * 379 * => Returns NPF_SRC or NPF_DST constant. 380 */ 381 static inline unsigned 382 npf_nat_which(const unsigned type, const npf_flow_t flow) 383 { 384 unsigned which; 385 386 /* The logic below relies on these values being 0 or 1. */ 387 CTASSERT(NPF_SRC == 0 && NPF_DST == 1); 388 CTASSERT(NPF_FLOW_FORW == NPF_SRC && NPF_FLOW_BACK == NPF_DST); 389 390 KASSERT(type == NPF_NATIN || type == NPF_NATOUT); 391 KASSERT(flow == NPF_FLOW_FORW || flow == NPF_FLOW_BACK); 392 393 /* 394 * Outbound NAT rewrites: 395 * 396 * - Source (NPF_SRC) on "forwards" stream. 397 * - Destination (NPF_DST) on "backwards" stream. 398 * 399 * Inbound NAT is other way round. 400 */ 401 which = (type == NPF_NATOUT) ? flow : !flow; 402 KASSERT(which == NPF_SRC || which == NPF_DST); 403 return which; 404 } 405 406 /* 407 * npf_nat_inspect: inspect packet against NAT ruleset and return a policy. 408 * 409 * => Acquire a reference on the policy, if found. 410 * => NAT lookup is protected by EBR. 411 */ 412 static npf_natpolicy_t * 413 npf_nat_inspect(npf_cache_t *npc, const unsigned di) 414 { 415 npf_t *npf = npc->npc_ctx; 416 int slock = npf_config_read_enter(npf); 417 npf_ruleset_t *rlset = npf_config_natset(npf); 418 npf_natpolicy_t *np; 419 npf_rule_t *rl; 420 421 rl = npf_ruleset_inspect(npc, rlset, di, NPF_LAYER_3); 422 if (rl == NULL) { 423 npf_config_read_exit(npf, slock); 424 return NULL; 425 } 426 np = npf_rule_getnat(rl); 427 atomic_inc_uint(&np->n_refcnt); 428 npf_config_read_exit(npf, slock); 429 return np; 430 } 431 432 static void 433 npf_nat_algo_netmap(const npf_cache_t *npc, const npf_natpolicy_t *np, 434 const unsigned which, npf_addr_t *addr) 435 { 436 const npf_addr_t *orig_addr = npc->npc_ips[which]; 437 438 /* 439 * NETMAP: 440 * 441 * addr = net-addr | (orig-addr & ~mask) 442 */ 443 npf_addr_mask(&np->n_taddr, np->n_tmask, npc->npc_alen, addr); 444 npf_addr_bitor(orig_addr, np->n_tmask, npc->npc_alen, addr); 445 } 446 447 static inline npf_addr_t * 448 npf_nat_getaddr(npf_cache_t *npc, npf_natpolicy_t *np, const unsigned alen) 449 { 450 npf_tableset_t *ts = npf_config_tableset(np->n_npfctx); 451 npf_table_t *t = npf_tableset_getbyid(ts, np->n_tid); 452 unsigned idx; 453 454 /* 455 * Dynamically select the translation IP address. 456 */ 457 switch (np->n_algo) { 458 case NPF_ALGO_RR: 459 idx = atomic_inc_uint_nv(&np->n_rr_idx); 460 break; 461 case NPF_ALGO_IPHASH: 462 default: 463 idx = npf_addr_mix(alen, 464 npc->npc_ips[NPF_SRC], 465 npc->npc_ips[NPF_DST]); 466 break; 467 } 468 return npf_table_getsome(t, alen, idx); 469 } 470 471 /* 472 * npf_nat_create: create a new NAT translation entry. 473 * 474 * => The caller must pass the NAT policy with a reference acquired for us. 475 */ 476 static npf_nat_t * 477 npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np, npf_conn_t *con) 478 { 479 const unsigned proto = npc->npc_proto; 480 const unsigned alen = npc->npc_alen; 481 const nbuf_t *nbuf = npc->npc_nbuf; 482 npf_t *npf = npc->npc_ctx; 483 npf_addr_t *taddr; 484 npf_nat_t *nt; 485 486 KASSERT(npf_iscached(npc, NPC_IP46)); 487 KASSERT(npf_iscached(npc, NPC_LAYER4)); 488 489 /* Construct a new NAT entry and associate it with the connection. */ 490 nt = pool_cache_get(nat_cache, PR_NOWAIT); 491 if (__predict_false(!nt)) { 492 return NULL; 493 } 494 npf_stats_inc(npf, NPF_STAT_NAT_CREATE); 495 nt->nt_natpolicy = np; 496 nt->nt_conn = con; 497 nt->nt_alg = NULL; 498 499 /* 500 * Save the interface ID. 501 * 502 * Note: this can be different from the given connection if it 503 * was established on a different interface, using the global state 504 * mode (state.key.interface = 0). 505 */ 506 KASSERT(nbuf->nb_ifid != 0); 507 nt->nt_ifid = nbuf->nb_ifid; 508 509 /* 510 * Select the translation address. 511 */ 512 if (np->n_flags & NPF_NAT_USETABLE) { 513 int slock = npf_config_read_enter(npf); 514 taddr = npf_nat_getaddr(npc, np, alen); 515 if (__predict_false(!taddr)) { 516 npf_config_read_exit(npf, slock); 517 pool_cache_put(nat_cache, nt); 518 return NULL; 519 } 520 memcpy(&nt->nt_taddr, taddr, alen); 521 npf_config_read_exit(npf, slock); 522 523 } else if (np->n_algo == NPF_ALGO_NETMAP) { 524 const unsigned which = npf_nat_which(np->n_type, NPF_FLOW_FORW); 525 npf_nat_algo_netmap(npc, np, which, &nt->nt_taddr); 526 taddr = &nt->nt_taddr; 527 } else { 528 /* Static IP address. */ 529 taddr = &np->n_taddr; 530 memcpy(&nt->nt_taddr, taddr, alen); 531 } 532 nt->nt_alen = alen; 533 534 /* Save the original address which may be rewritten. */ 535 if (np->n_type == NPF_NATOUT) { 536 /* Outbound NAT: source (think internal) address. */ 537 memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_SRC], alen); 538 } else { 539 /* Inbound NAT: destination (think external) address. */ 540 KASSERT(np->n_type == NPF_NATIN); 541 memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_DST], alen); 542 } 543 544 /* 545 * Port translation, if required, and if it is TCP/UDP. 546 */ 547 if ((np->n_flags & NPF_NAT_PORTS) == 0 || 548 (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) { 549 nt->nt_oport = 0; 550 nt->nt_tport = 0; 551 goto out; 552 } 553 554 /* Save the relevant TCP/UDP port. */ 555 if (proto == IPPROTO_TCP) { 556 const struct tcphdr *th = npc->npc_l4.tcp; 557 nt->nt_oport = (np->n_type == NPF_NATOUT) ? 558 th->th_sport : th->th_dport; 559 } else { 560 const struct udphdr *uh = npc->npc_l4.udp; 561 nt->nt_oport = (np->n_type == NPF_NATOUT) ? 562 uh->uh_sport : uh->uh_dport; 563 } 564 565 /* Get a new port for translation. */ 566 if ((np->n_flags & NPF_NAT_PORTMAP) != 0) { 567 npf_portmap_t *pm = np->n_npfctx->portmap; 568 nt->nt_tport = npf_portmap_get(pm, alen, taddr); 569 } else { 570 nt->nt_tport = np->n_tport; 571 } 572 out: 573 mutex_enter(&np->n_lock); 574 LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry); 575 /* Note: we also consume the reference on policy. */ 576 mutex_exit(&np->n_lock); 577 return nt; 578 } 579 580 /* 581 * npf_dnat_translate: perform translation given the state data. 582 */ 583 static inline int 584 npf_dnat_translate(npf_cache_t *npc, npf_nat_t *nt, npf_flow_t flow) 585 { 586 const npf_natpolicy_t *np = nt->nt_natpolicy; 587 const unsigned which = npf_nat_which(np->n_type, flow); 588 const npf_addr_t *addr; 589 in_port_t port; 590 591 KASSERT(npf_iscached(npc, NPC_IP46)); 592 KASSERT(npf_iscached(npc, NPC_LAYER4)); 593 594 if (flow == NPF_FLOW_FORW) { 595 /* "Forwards" stream: use translation address/port. */ 596 addr = &nt->nt_taddr; 597 port = nt->nt_tport; 598 } else { 599 /* "Backwards" stream: use original address/port. */ 600 addr = &nt->nt_oaddr; 601 port = nt->nt_oport; 602 } 603 KASSERT((np->n_flags & NPF_NAT_PORTS) != 0 || port == 0); 604 605 /* Execute ALG translation first. */ 606 if ((npc->npc_info & NPC_ALG_EXEC) == 0) { 607 npc->npc_info |= NPC_ALG_EXEC; 608 npf_alg_exec(npc, nt, flow); 609 npf_recache(npc); 610 } 611 KASSERT(!nbuf_flag_p(npc->npc_nbuf, NBUF_DATAREF_RESET)); 612 613 /* Finally, perform the translation. */ 614 return npf_napt_rwr(npc, which, addr, port); 615 } 616 617 /* 618 * npf_snat_translate: perform translation given the algorithm. 619 */ 620 static inline int 621 npf_snat_translate(npf_cache_t *npc, const npf_natpolicy_t *np, npf_flow_t flow) 622 { 623 const unsigned which = npf_nat_which(np->n_type, flow); 624 const npf_addr_t *taddr; 625 npf_addr_t addr; 626 627 KASSERT(np->n_flags & NPF_NAT_STATIC); 628 629 switch (np->n_algo) { 630 case NPF_ALGO_NETMAP: 631 npf_nat_algo_netmap(npc, np, which, &addr); 632 taddr = &addr; 633 break; 634 case NPF_ALGO_NPT66: 635 return npf_npt66_rwr(npc, which, &np->n_taddr, 636 np->n_tmask, np->n_npt66_adj); 637 default: 638 taddr = &np->n_taddr; 639 break; 640 } 641 return npf_napt_rwr(npc, which, taddr, np->n_tport); 642 } 643 644 /* 645 * Associate NAT policy with an existing connection state. 646 */ 647 npf_nat_t * 648 npf_nat_share_policy(npf_cache_t *npc, npf_conn_t *con, npf_nat_t *src_nt) 649 { 650 npf_natpolicy_t *np = src_nt->nt_natpolicy; 651 npf_nat_t *nt; 652 int ret; 653 654 /* Create a new NAT entry. */ 655 nt = npf_nat_create(npc, np, con); 656 if (__predict_false(nt == NULL)) { 657 return NULL; 658 } 659 atomic_inc_uint(&np->n_refcnt); 660 661 /* Associate the NAT translation entry with the connection. */ 662 ret = npf_conn_setnat(npc, con, nt, np->n_type); 663 if (__predict_false(ret)) { 664 /* Will release the reference. */ 665 npf_nat_destroy(con, nt); 666 return NULL; 667 } 668 return nt; 669 } 670 671 /* 672 * npf_nat_lookup: lookup the (dynamic) NAT state and return its entry, 673 * 674 * => Checks that the packet is on the interface where NAT policy is applied. 675 * => Determines the flow direction in the context of the NAT policy. 676 */ 677 static npf_nat_t * 678 npf_nat_lookup(const npf_cache_t *npc, npf_conn_t *con, 679 const unsigned di, npf_flow_t *flow) 680 { 681 const nbuf_t *nbuf = npc->npc_nbuf; 682 const npf_natpolicy_t *np; 683 npf_nat_t *nt; 684 685 if ((nt = npf_conn_getnat(con)) == NULL) { 686 return NULL; 687 } 688 if (nt->nt_ifid != nbuf->nb_ifid) { 689 return NULL; 690 } 691 692 np = nt->nt_natpolicy; 693 KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0); 694 695 /* 696 * We rely on NPF_NAT{IN,OUT} being equal to PFIL_{IN,OUT}. 697 */ 698 CTASSERT(NPF_NATIN == PFIL_IN && NPF_NATOUT == PFIL_OUT); 699 *flow = (np->n_type == di) ? NPF_FLOW_FORW : NPF_FLOW_BACK; 700 return nt; 701 } 702 703 /* 704 * npf_do_nat: 705 * 706 * - Inspect packet for a NAT policy, unless a connection with a NAT 707 * association already exists. In such case, determine whether it 708 * is a "forwards" or "backwards" stream. 709 * 710 * - Perform translation: rewrite source or destination fields, 711 * depending on translation type and direction. 712 * 713 * - Associate a NAT policy with a connection (may establish a new). 714 */ 715 int 716 npf_do_nat(npf_cache_t *npc, npf_conn_t *con, const unsigned di) 717 { 718 nbuf_t *nbuf = npc->npc_nbuf; 719 npf_conn_t *ncon = NULL; 720 npf_natpolicy_t *np; 721 npf_flow_t flow; 722 npf_nat_t *nt; 723 int error; 724 725 /* All relevant data should be already cached. */ 726 if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) { 727 return 0; 728 } 729 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 730 731 /* 732 * Return the NAT entry associated with the connection, if any. 733 * Determines whether the stream is "forwards" or "backwards". 734 * Note: no need to lock, since reference on connection is held. 735 */ 736 if (con && (nt = npf_nat_lookup(npc, con, di, &flow)) != NULL) { 737 np = nt->nt_natpolicy; 738 goto translate; 739 } 740 741 /* 742 * Inspect the packet for a NAT policy, if there is no connection. 743 * Note: acquires a reference if found. 744 */ 745 np = npf_nat_inspect(npc, di); 746 if (np == NULL) { 747 /* If packet does not match - done. */ 748 return 0; 749 } 750 flow = NPF_FLOW_FORW; 751 752 /* Static NAT - just perform the translation. */ 753 if (np->n_flags & NPF_NAT_STATIC) { 754 if (nbuf_cksum_barrier(nbuf, di)) { 755 npf_recache(npc); 756 } 757 error = npf_snat_translate(npc, np, flow); 758 npf_natpolicy_release(np); 759 return error; 760 } 761 762 /* 763 * If there is no local connection (no "stateful" rule - unusual, 764 * but possible configuration), establish one before translation. 765 * Note that it is not a "pass" connection, therefore passing of 766 * "backwards" stream depends on other, stateless filtering rules. 767 */ 768 if (con == NULL) { 769 ncon = npf_conn_establish(npc, di, true); 770 if (ncon == NULL) { 771 npf_natpolicy_release(np); 772 return ENOMEM; 773 } 774 con = ncon; 775 } 776 777 /* 778 * Create a new NAT entry and associate with the connection. 779 * We will consume the reference on success (release on error). 780 */ 781 nt = npf_nat_create(npc, np, con); 782 if (nt == NULL) { 783 npf_natpolicy_release(np); 784 error = ENOMEM; 785 goto out; 786 } 787 788 /* Determine whether any ALG matches. */ 789 if (npf_alg_match(npc, nt, di)) { 790 KASSERT(nt->nt_alg != NULL); 791 } 792 793 /* Associate the NAT translation entry with the connection. */ 794 error = npf_conn_setnat(npc, con, nt, np->n_type); 795 if (error) { 796 /* Will release the reference. */ 797 npf_nat_destroy(con, nt); 798 goto out; 799 } 800 801 translate: 802 /* May need to process the delayed checksums first (XXX: NetBSD). */ 803 if (nbuf_cksum_barrier(nbuf, di)) { 804 npf_recache(npc); 805 } 806 807 /* Perform the translation. */ 808 error = npf_dnat_translate(npc, nt, flow); 809 out: 810 if (__predict_false(ncon)) { 811 if (error) { 812 /* It was created for NAT - just expire. */ 813 npf_conn_expire(ncon); 814 } 815 npf_conn_release(ncon); 816 } 817 return error; 818 } 819 820 /* 821 * npf_nat_gettrans: return translation IP address and port. 822 */ 823 void 824 npf_nat_gettrans(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port) 825 { 826 *addr = &nt->nt_taddr; 827 *port = nt->nt_tport; 828 } 829 830 /* 831 * npf_nat_getorig: return original IP address and port from translation entry. 832 */ 833 void 834 npf_nat_getorig(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port) 835 { 836 *addr = &nt->nt_oaddr; 837 *port = nt->nt_oport; 838 } 839 840 /* 841 * npf_nat_setalg: associate an ALG with the NAT entry. 842 */ 843 void 844 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg) 845 { 846 nt->nt_alg = alg; 847 nt->nt_alg_arg = arg; 848 } 849 850 npf_alg_t * 851 npf_nat_getalg(const npf_nat_t *nt) 852 { 853 return nt->nt_alg; 854 } 855 856 uintptr_t 857 npf_nat_getalgarg(const npf_nat_t *nt) 858 { 859 return nt->nt_alg_arg; 860 } 861 862 /* 863 * npf_nat_destroy: destroy NAT structure (performed on connection expiration). 864 */ 865 void 866 npf_nat_destroy(npf_conn_t *con, npf_nat_t *nt) 867 { 868 npf_natpolicy_t *np = nt->nt_natpolicy; 869 npf_t *npf = np->n_npfctx; 870 npf_alg_t *alg; 871 872 /* Execute the ALG destroy callback, if any. */ 873 if ((alg = npf_nat_getalg(nt)) != NULL) { 874 npf_alg_destroy(npf, alg, nt, con); 875 nt->nt_alg = NULL; 876 } 877 878 /* Return taken port to the portmap. */ 879 if ((np->n_flags & NPF_NAT_PORTMAP) != 0 && nt->nt_tport) { 880 npf_portmap_t *pm = npf->portmap; 881 npf_portmap_put(pm, nt->nt_alen, &nt->nt_taddr, nt->nt_tport); 882 } 883 npf_stats_inc(np->n_npfctx, NPF_STAT_NAT_DESTROY); 884 885 /* 886 * Remove the connection from the list and drop the reference on 887 * the NAT policy. Note: this might trigger its destruction. 888 */ 889 mutex_enter(&np->n_lock); 890 LIST_REMOVE(nt, nt_entry); 891 mutex_exit(&np->n_lock); 892 npf_natpolicy_release(np); 893 894 pool_cache_put(nat_cache, nt); 895 } 896 897 /* 898 * npf_nat_export: serialize the NAT entry with a NAT policy ID. 899 */ 900 void 901 npf_nat_export(npf_t *npf, const npf_nat_t *nt, nvlist_t *con_nv) 902 { 903 npf_natpolicy_t *np = nt->nt_natpolicy; 904 unsigned alen = nt->nt_alen; 905 nvlist_t *nat_nv; 906 907 nat_nv = nvlist_create(0); 908 if (nt->nt_ifid) { 909 char ifname[IFNAMSIZ]; 910 npf_ifmap_copyname(npf, nt->nt_ifid, ifname, sizeof(ifname)); 911 nvlist_add_string(nat_nv, "ifname", ifname); 912 } 913 nvlist_add_number(nat_nv, "alen", alen); 914 915 nvlist_add_binary(nat_nv, "oaddr", &nt->nt_oaddr, alen); 916 nvlist_add_number(nat_nv, "oport", nt->nt_oport); 917 918 nvlist_add_binary(nat_nv, "taddr", &nt->nt_taddr, alen); 919 nvlist_add_number(nat_nv, "tport", nt->nt_tport); 920 921 nvlist_add_number(nat_nv, "nat-policy", np->n_id); 922 nvlist_move_nvlist(con_nv, "nat", nat_nv); 923 } 924 925 /* 926 * npf_nat_import: find the NAT policy and unserialize the NAT entry. 927 */ 928 npf_nat_t * 929 npf_nat_import(npf_t *npf, const nvlist_t *nat, 930 npf_ruleset_t *natlist, npf_conn_t *con) 931 { 932 npf_natpolicy_t *np; 933 npf_nat_t *nt; 934 const char *ifname; 935 const void *taddr, *oaddr; 936 size_t alen, len; 937 uint64_t np_id; 938 939 np_id = dnvlist_get_number(nat, "nat-policy", UINT64_MAX); 940 if ((np = npf_ruleset_findnat(natlist, np_id)) == NULL) { 941 return NULL; 942 } 943 nt = pool_cache_get(nat_cache, PR_WAITOK); 944 memset(nt, 0, sizeof(npf_nat_t)); 945 946 ifname = dnvlist_get_string(nat, "ifname", NULL); 947 if (ifname && (nt->nt_ifid = npf_ifmap_register(npf, ifname)) == 0) { 948 goto err; 949 } 950 951 alen = dnvlist_get_number(nat, "alen", 0); 952 if (alen == 0 || alen > sizeof(npf_addr_t)) { 953 goto err; 954 } 955 956 taddr = dnvlist_get_binary(nat, "taddr", &len, NULL, 0); 957 if (!taddr || len != alen) { 958 goto err; 959 } 960 memcpy(&nt->nt_taddr, taddr, sizeof(npf_addr_t)); 961 962 oaddr = dnvlist_get_binary(nat, "oaddr", &len, NULL, 0); 963 if (!oaddr || len != alen) { 964 goto err; 965 } 966 memcpy(&nt->nt_oaddr, oaddr, sizeof(npf_addr_t)); 967 968 nt->nt_oport = dnvlist_get_number(nat, "oport", 0); 969 nt->nt_tport = dnvlist_get_number(nat, "tport", 0); 970 971 /* Take a specific port from port-map. */ 972 if ((np->n_flags & NPF_NAT_PORTMAP) != 0 && nt->nt_tport) { 973 npf_portmap_t *pm = npf->portmap; 974 975 if (!npf_portmap_take(pm, nt->nt_alen, 976 &nt->nt_taddr, nt->nt_tport)) { 977 goto err; 978 } 979 } 980 npf_stats_inc(npf, NPF_STAT_NAT_CREATE); 981 982 /* 983 * Associate, take a reference and insert. Unlocked/non-atomic 984 * since the policy is not yet globally visible. 985 */ 986 nt->nt_natpolicy = np; 987 nt->nt_conn = con; 988 atomic_store_relaxed(&np->n_refcnt, 989 atomic_load_relaxed(&np->n_refcnt) + 1); 990 LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry); 991 return nt; 992 err: 993 pool_cache_put(nat_cache, nt); 994 return NULL; 995 } 996 997 #if defined(DDB) || defined(_NPF_TESTING) 998 999 void 1000 npf_nat_dump(const npf_nat_t *nt) 1001 { 1002 const npf_natpolicy_t *np; 1003 struct in_addr ip; 1004 1005 np = nt->nt_natpolicy; 1006 memcpy(&ip, &nt->nt_taddr, sizeof(ip)); 1007 printf("\tNATP(%p): type %u flags 0x%x taddr %s tport %d\n", np, 1008 np->n_type, np->n_flags, inet_ntoa(ip), ntohs(np->n_tport)); 1009 memcpy(&ip, &nt->nt_oaddr, sizeof(ip)); 1010 printf("\tNAT: original address %s oport %d tport %d\n", 1011 inet_ntoa(ip), ntohs(nt->nt_oport), ntohs(nt->nt_tport)); 1012 if (nt->nt_alg) { 1013 printf("\tNAT ALG = %p, ARG = %p\n", 1014 nt->nt_alg, (void *)nt->nt_alg_arg); 1015 } 1016 } 1017 1018 #endif 1019