1 /*- 2 * Copyright (c) 2014-2020 Mindaugas Rasiukevicius <rmind at noxt eu> 3 * Copyright (c) 2010-2013 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This material is based upon work partially supported by The 7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * NPF network address port translation (NAPT) and other forms of NAT. 33 * Described in RFC 2663, RFC 3022, etc. 34 * 35 * Overview 36 * 37 * There are a few mechanisms: NAT policy, port map and translation. 38 * The NAT module has a separate ruleset where rules always have an 39 * associated NAT policy. 40 * 41 * Translation types 42 * 43 * There are two types of translation: outbound (NPF_NATOUT) and 44 * inbound (NPF_NATIN). It should not be confused with connection 45 * direction. See npf_nat_which() for the description of how the 46 * addresses are rewritten. The bi-directional NAT is a combined 47 * outbound and inbound translation, therefore is constructed as 48 * two policies. 49 * 50 * NAT policies and port maps 51 * 52 * The NAT (translation) policy is applied when packet matches the 53 * rule. Apart from the filter criteria, the NAT policy always has 54 * a translation IP address or a table. If port translation is set, 55 * then NAT mechanism relies on port map mechanism. 56 * 57 * Connections, translation entries and their life-cycle 58 * 59 * NAT relies on the connection tracking module. Each translated 60 * connection has an associated translation entry (npf_nat_t) which 61 * contains information used for backwards stream translation, i.e. 62 * the original IP address with port and translation port, allocated 63 * from the port map. Each NAT entry is associated with the policy, 64 * which contains translation IP address. Allocated port is returned 65 * to the port map and NAT entry is destroyed when connection expires. 66 */ 67 68 #ifdef _KERNEL 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.50 2020/05/30 14:16:56 rmind Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/types.h> 74 75 #include <sys/atomic.h> 76 #include <sys/condvar.h> 77 #include <sys/kmem.h> 78 #include <sys/mutex.h> 79 #include <sys/pool.h> 80 #include <sys/proc.h> 81 #endif 82 83 #include "npf_impl.h" 84 #include "npf_conn.h" 85 86 /* 87 * NAT policy structure. 88 */ 89 struct npf_natpolicy { 90 npf_t * n_npfctx; 91 kmutex_t n_lock; 92 LIST_HEAD(, npf_nat) n_nat_list; 93 unsigned n_refcnt; 94 uint64_t n_id; 95 96 /* 97 * Translation type, flags, address or table and the port. 98 * Additionally, there may be translation algorithm and any 99 * auxiliary data, e.g. NPTv6 adjustment value. 100 * 101 * NPF_NP_CMP_START mark starts here. 102 */ 103 unsigned n_type; 104 unsigned n_flags; 105 unsigned n_alen; 106 107 npf_addr_t n_taddr; 108 npf_netmask_t n_tmask; 109 in_port_t n_tport; 110 unsigned n_tid; 111 112 unsigned n_algo; 113 union { 114 unsigned n_rr_idx; 115 uint16_t n_npt66_adj; 116 }; 117 }; 118 119 /* 120 * Private flags - must be in the NPF_NAT_PRIVMASK range. 121 */ 122 #define NPF_NAT_USETABLE (0x01000000 & NPF_NAT_PRIVMASK) 123 124 #define NPF_NP_CMP_START offsetof(npf_natpolicy_t, n_type) 125 #define NPF_NP_CMP_SIZE (sizeof(npf_natpolicy_t) - NPF_NP_CMP_START) 126 127 /* 128 * NAT entry for a connection. 129 */ 130 struct npf_nat { 131 /* Associated NAT policy. */ 132 npf_natpolicy_t * nt_natpolicy; 133 134 uint16_t nt_ifid; 135 uint16_t nt_alen; 136 137 /* 138 * Translation address as well as the original address which is 139 * used for backwards translation. The same for ports. 140 */ 141 npf_addr_t nt_taddr; 142 npf_addr_t nt_oaddr; 143 144 in_port_t nt_oport; 145 in_port_t nt_tport; 146 147 /* ALG (if any) associated with this NAT entry. */ 148 npf_alg_t * nt_alg; 149 uintptr_t nt_alg_arg; 150 151 LIST_ENTRY(npf_nat) nt_entry; 152 npf_conn_t * nt_conn; 153 }; 154 155 static pool_cache_t nat_cache __read_mostly; 156 157 /* 158 * npf_nat_sys{init,fini}: initialize/destroy NAT subsystem structures. 159 */ 160 161 void 162 npf_nat_sysinit(void) 163 { 164 nat_cache = pool_cache_init(sizeof(npf_nat_t), 0, 165 0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL); 166 KASSERT(nat_cache != NULL); 167 } 168 169 void 170 npf_nat_sysfini(void) 171 { 172 /* All NAT policies should already be destroyed. */ 173 pool_cache_destroy(nat_cache); 174 } 175 176 /* 177 * npf_natpolicy_create: create a new NAT policy. 178 */ 179 npf_natpolicy_t * 180 npf_natpolicy_create(npf_t *npf, const nvlist_t *nat, npf_ruleset_t *rset) 181 { 182 npf_natpolicy_t *np; 183 const void *addr; 184 size_t len; 185 186 np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP); 187 atomic_store_relaxed(&np->n_refcnt, 1); 188 np->n_npfctx = npf; 189 190 /* The translation type, flags and policy ID. */ 191 np->n_type = dnvlist_get_number(nat, "type", 0); 192 np->n_flags = dnvlist_get_number(nat, "flags", 0) & ~NPF_NAT_PRIVMASK; 193 np->n_id = dnvlist_get_number(nat, "nat-policy", 0); 194 195 /* Should be exclusively either inbound or outbound NAT. */ 196 if (((np->n_type == NPF_NATIN) ^ (np->n_type == NPF_NATOUT)) == 0) { 197 goto err; 198 } 199 mutex_init(&np->n_lock, MUTEX_DEFAULT, IPL_SOFTNET); 200 LIST_INIT(&np->n_nat_list); 201 202 /* 203 * Translation IP, mask and port (if applicable). If using the 204 * the table, specified by the ID, then the nat-addr/nat-mask will 205 * be used as a filter for the addresses selected from table. 206 */ 207 if (nvlist_exists_number(nat, "nat-table-id")) { 208 if (np->n_flags & NPF_NAT_STATIC) { 209 goto err; 210 } 211 np->n_tid = nvlist_get_number(nat, "nat-table-id"); 212 np->n_tmask = NPF_NO_NETMASK; 213 np->n_flags |= NPF_NAT_USETABLE; 214 } else { 215 addr = dnvlist_get_binary(nat, "nat-addr", &len, NULL, 0); 216 if (!addr || len == 0 || len > sizeof(npf_addr_t)) { 217 goto err; 218 } 219 memcpy(&np->n_taddr, addr, len); 220 np->n_alen = len; 221 np->n_tmask = dnvlist_get_number(nat, "nat-mask", NPF_NO_NETMASK); 222 if (npf_netmask_check(np->n_alen, np->n_tmask)) { 223 goto err; 224 } 225 } 226 np->n_tport = dnvlist_get_number(nat, "nat-port", 0); 227 228 /* 229 * NAT algorithm. 230 */ 231 np->n_algo = dnvlist_get_number(nat, "nat-algo", 0); 232 switch (np->n_algo) { 233 case NPF_ALGO_NPT66: 234 np->n_npt66_adj = dnvlist_get_number(nat, "npt66-adj", 0); 235 break; 236 case NPF_ALGO_NETMAP: 237 break; 238 case NPF_ALGO_IPHASH: 239 case NPF_ALGO_RR: 240 default: 241 if (np->n_tmask != NPF_NO_NETMASK) { 242 goto err; 243 } 244 break; 245 } 246 return np; 247 err: 248 mutex_destroy(&np->n_lock); 249 kmem_free(np, sizeof(npf_natpolicy_t)); 250 return NULL; 251 } 252 253 int 254 npf_natpolicy_export(const npf_natpolicy_t *np, nvlist_t *nat) 255 { 256 nvlist_add_number(nat, "nat-policy", np->n_id); 257 nvlist_add_number(nat, "type", np->n_type); 258 nvlist_add_number(nat, "flags", np->n_flags); 259 260 if (np->n_flags & NPF_NAT_USETABLE) { 261 nvlist_add_number(nat, "nat-table-id", np->n_tid); 262 } else { 263 nvlist_add_binary(nat, "nat-addr", &np->n_taddr, np->n_alen); 264 nvlist_add_number(nat, "nat-mask", np->n_tmask); 265 } 266 nvlist_add_number(nat, "nat-port", np->n_tport); 267 nvlist_add_number(nat, "nat-algo", np->n_algo); 268 269 switch (np->n_algo) { 270 case NPF_ALGO_NPT66: 271 nvlist_add_number(nat, "npt66-adj", np->n_npt66_adj); 272 break; 273 } 274 return 0; 275 } 276 277 static void 278 npf_natpolicy_release(npf_natpolicy_t *np) 279 { 280 KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0); 281 282 if (atomic_dec_uint_nv(&np->n_refcnt) != 0) { 283 return; 284 } 285 KASSERT(LIST_EMPTY(&np->n_nat_list)); 286 mutex_destroy(&np->n_lock); 287 kmem_free(np, sizeof(npf_natpolicy_t)); 288 } 289 290 /* 291 * npf_natpolicy_destroy: free the NAT policy. 292 * 293 * => Called from npf_rule_free() during the reload via npf_ruleset_destroy(). 294 * => At this point, NAT policy cannot acquire new references. 295 */ 296 void 297 npf_natpolicy_destroy(npf_natpolicy_t *np) 298 { 299 /* 300 * Drain the references. If there are active NAT connections, 301 * then expire them and kick the worker. 302 */ 303 if (atomic_load_relaxed(&np->n_refcnt) > 1) { 304 npf_nat_t *nt; 305 306 mutex_enter(&np->n_lock); 307 LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { 308 npf_conn_t *con = nt->nt_conn; 309 KASSERT(con != NULL); 310 npf_conn_expire(con); 311 } 312 mutex_exit(&np->n_lock); 313 npf_worker_signal(np->n_npfctx); 314 } 315 KASSERT(atomic_load_relaxed(&np->n_refcnt) >= 1); 316 317 /* 318 * Drop the initial reference, but it might not be the last one. 319 * If so, the last reference will be triggered via: 320 * 321 * npf_conn_destroy() -> npf_nat_destroy() -> npf_natpolicy_release() 322 */ 323 npf_natpolicy_release(np); 324 } 325 326 void 327 npf_nat_freealg(npf_natpolicy_t *np, npf_alg_t *alg) 328 { 329 npf_nat_t *nt; 330 331 mutex_enter(&np->n_lock); 332 LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { 333 if (nt->nt_alg == alg) { 334 npf_alg_destroy(np->n_npfctx, alg, nt, nt->nt_conn); 335 nt->nt_alg = NULL; 336 } 337 } 338 mutex_exit(&np->n_lock); 339 } 340 341 /* 342 * npf_natpolicy_cmp: compare two NAT policies. 343 * 344 * => Return 0 on match, and non-zero otherwise. 345 */ 346 bool 347 npf_natpolicy_cmp(npf_natpolicy_t *np, npf_natpolicy_t *mnp) 348 { 349 const void *np_raw, *mnp_raw; 350 351 /* 352 * Compare the relevant NAT policy information (in its raw form) 353 * that is enough as a matching criteria. 354 */ 355 KASSERT(np && mnp && np != mnp); 356 np_raw = (const uint8_t *)np + NPF_NP_CMP_START; 357 mnp_raw = (const uint8_t *)mnp + NPF_NP_CMP_START; 358 return memcmp(np_raw, mnp_raw, NPF_NP_CMP_SIZE) == 0; 359 } 360 361 void 362 npf_nat_setid(npf_natpolicy_t *np, uint64_t id) 363 { 364 np->n_id = id; 365 } 366 367 uint64_t 368 npf_nat_getid(const npf_natpolicy_t *np) 369 { 370 return np->n_id; 371 } 372 373 /* 374 * npf_nat_which: tell which address (source or destination) should be 375 * rewritten given the combination of the NAT type and flow direction. 376 * 377 * => Returns NPF_SRC or NPF_DST constant. 378 */ 379 static inline unsigned 380 npf_nat_which(const unsigned type, const npf_flow_t flow) 381 { 382 unsigned which; 383 384 /* The logic below relies on these values being 0 or 1. */ 385 CTASSERT(NPF_SRC == 0 && NPF_DST == 1); 386 CTASSERT(NPF_FLOW_FORW == NPF_SRC && NPF_FLOW_BACK == NPF_DST); 387 388 KASSERT(type == NPF_NATIN || type == NPF_NATOUT); 389 KASSERT(flow == NPF_FLOW_FORW || flow == NPF_FLOW_BACK); 390 391 /* 392 * Outbound NAT rewrites: 393 * 394 * - Source (NPF_SRC) on "forwards" stream. 395 * - Destination (NPF_DST) on "backwards" stream. 396 * 397 * Inbound NAT is other way round. 398 */ 399 which = (type == NPF_NATOUT) ? flow : !flow; 400 KASSERT(which == NPF_SRC || which == NPF_DST); 401 return which; 402 } 403 404 /* 405 * npf_nat_inspect: inspect packet against NAT ruleset and return a policy. 406 * 407 * => Acquire a reference on the policy, if found. 408 * => NAT lookup is protected by EBR. 409 */ 410 static npf_natpolicy_t * 411 npf_nat_inspect(npf_cache_t *npc, const unsigned di) 412 { 413 npf_t *npf = npc->npc_ctx; 414 int slock = npf_config_read_enter(npf); 415 npf_ruleset_t *rlset = npf_config_natset(npf); 416 npf_natpolicy_t *np; 417 npf_rule_t *rl; 418 419 rl = npf_ruleset_inspect(npc, rlset, di, NPF_LAYER_3); 420 if (rl == NULL) { 421 npf_config_read_exit(npf, slock); 422 return NULL; 423 } 424 np = npf_rule_getnat(rl); 425 atomic_inc_uint(&np->n_refcnt); 426 npf_config_read_exit(npf, slock); 427 return np; 428 } 429 430 static void 431 npf_nat_algo_netmap(const npf_cache_t *npc, const npf_natpolicy_t *np, 432 const unsigned which, npf_addr_t *addr) 433 { 434 const npf_addr_t *orig_addr = npc->npc_ips[which]; 435 436 /* 437 * NETMAP: 438 * 439 * addr = net-addr | (orig-addr & ~mask) 440 */ 441 npf_addr_mask(&np->n_taddr, np->n_tmask, npc->npc_alen, addr); 442 npf_addr_bitor(orig_addr, np->n_tmask, npc->npc_alen, addr); 443 } 444 445 static inline npf_addr_t * 446 npf_nat_getaddr(npf_cache_t *npc, npf_natpolicy_t *np, const unsigned alen) 447 { 448 npf_tableset_t *ts = npf_config_tableset(np->n_npfctx); 449 npf_table_t *t = npf_tableset_getbyid(ts, np->n_tid); 450 unsigned idx; 451 452 /* 453 * Dynamically select the translation IP address. 454 */ 455 switch (np->n_algo) { 456 case NPF_ALGO_RR: 457 idx = atomic_inc_uint_nv(&np->n_rr_idx); 458 break; 459 case NPF_ALGO_IPHASH: 460 default: 461 idx = npf_addr_mix(alen, 462 npc->npc_ips[NPF_SRC], 463 npc->npc_ips[NPF_DST]); 464 break; 465 } 466 return npf_table_getsome(t, alen, idx); 467 } 468 469 /* 470 * npf_nat_create: create a new NAT translation entry. 471 * 472 * => The caller must pass the NAT policy with a reference acquired for us. 473 */ 474 static npf_nat_t * 475 npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np, npf_conn_t *con) 476 { 477 const unsigned proto = npc->npc_proto; 478 const unsigned alen = npc->npc_alen; 479 const nbuf_t *nbuf = npc->npc_nbuf; 480 npf_t *npf = npc->npc_ctx; 481 npf_addr_t *taddr; 482 npf_nat_t *nt; 483 484 KASSERT(npf_iscached(npc, NPC_IP46)); 485 KASSERT(npf_iscached(npc, NPC_LAYER4)); 486 487 /* Construct a new NAT entry and associate it with the connection. */ 488 nt = pool_cache_get(nat_cache, PR_NOWAIT); 489 if (__predict_false(!nt)) { 490 return NULL; 491 } 492 npf_stats_inc(npf, NPF_STAT_NAT_CREATE); 493 nt->nt_natpolicy = np; 494 nt->nt_conn = con; 495 nt->nt_alg = NULL; 496 497 /* 498 * Save the interface ID. 499 * 500 * Note: this can be different from the given connection if it 501 * was established on a different interface, using the global state 502 * mode (state.key.interface = 0). 503 */ 504 KASSERT(nbuf->nb_ifid != 0); 505 nt->nt_ifid = nbuf->nb_ifid; 506 507 /* 508 * Select the translation address. 509 */ 510 if (np->n_flags & NPF_NAT_USETABLE) { 511 int slock = npf_config_read_enter(npf); 512 taddr = npf_nat_getaddr(npc, np, alen); 513 if (__predict_false(!taddr)) { 514 npf_config_read_exit(npf, slock); 515 pool_cache_put(nat_cache, nt); 516 return NULL; 517 } 518 memcpy(&nt->nt_taddr, taddr, alen); 519 npf_config_read_exit(npf, slock); 520 521 } else if (np->n_algo == NPF_ALGO_NETMAP) { 522 const unsigned which = npf_nat_which(np->n_type, NPF_FLOW_FORW); 523 npf_nat_algo_netmap(npc, np, which, &nt->nt_taddr); 524 taddr = &nt->nt_taddr; 525 } else { 526 /* Static IP address. */ 527 taddr = &np->n_taddr; 528 memcpy(&nt->nt_taddr, taddr, alen); 529 } 530 nt->nt_alen = alen; 531 532 /* Save the original address which may be rewritten. */ 533 if (np->n_type == NPF_NATOUT) { 534 /* Outbound NAT: source (think internal) address. */ 535 memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_SRC], alen); 536 } else { 537 /* Inbound NAT: destination (think external) address. */ 538 KASSERT(np->n_type == NPF_NATIN); 539 memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_DST], alen); 540 } 541 542 /* 543 * Port translation, if required, and if it is TCP/UDP. 544 */ 545 if ((np->n_flags & NPF_NAT_PORTS) == 0 || 546 (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) { 547 nt->nt_oport = 0; 548 nt->nt_tport = 0; 549 goto out; 550 } 551 552 /* Save the relevant TCP/UDP port. */ 553 if (proto == IPPROTO_TCP) { 554 const struct tcphdr *th = npc->npc_l4.tcp; 555 nt->nt_oport = (np->n_type == NPF_NATOUT) ? 556 th->th_sport : th->th_dport; 557 } else { 558 const struct udphdr *uh = npc->npc_l4.udp; 559 nt->nt_oport = (np->n_type == NPF_NATOUT) ? 560 uh->uh_sport : uh->uh_dport; 561 } 562 563 /* Get a new port for translation. */ 564 if ((np->n_flags & NPF_NAT_PORTMAP) != 0) { 565 npf_portmap_t *pm = np->n_npfctx->portmap; 566 nt->nt_tport = npf_portmap_get(pm, alen, taddr); 567 } else { 568 nt->nt_tport = np->n_tport; 569 } 570 out: 571 mutex_enter(&np->n_lock); 572 LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry); 573 /* Note: we also consume the reference on policy. */ 574 mutex_exit(&np->n_lock); 575 return nt; 576 } 577 578 /* 579 * npf_dnat_translate: perform translation given the state data. 580 */ 581 static inline int 582 npf_dnat_translate(npf_cache_t *npc, npf_nat_t *nt, npf_flow_t flow) 583 { 584 const npf_natpolicy_t *np = nt->nt_natpolicy; 585 const unsigned which = npf_nat_which(np->n_type, flow); 586 const npf_addr_t *addr; 587 in_port_t port; 588 589 KASSERT(npf_iscached(npc, NPC_IP46)); 590 KASSERT(npf_iscached(npc, NPC_LAYER4)); 591 592 if (flow == NPF_FLOW_FORW) { 593 /* "Forwards" stream: use translation address/port. */ 594 addr = &nt->nt_taddr; 595 port = nt->nt_tport; 596 } else { 597 /* "Backwards" stream: use original address/port. */ 598 addr = &nt->nt_oaddr; 599 port = nt->nt_oport; 600 } 601 KASSERT((np->n_flags & NPF_NAT_PORTS) != 0 || port == 0); 602 603 /* Execute ALG translation first. */ 604 if ((npc->npc_info & NPC_ALG_EXEC) == 0) { 605 npc->npc_info |= NPC_ALG_EXEC; 606 npf_alg_exec(npc, nt, flow); 607 npf_recache(npc); 608 } 609 KASSERT(!nbuf_flag_p(npc->npc_nbuf, NBUF_DATAREF_RESET)); 610 611 /* Finally, perform the translation. */ 612 return npf_napt_rwr(npc, which, addr, port); 613 } 614 615 /* 616 * npf_snat_translate: perform translation given the algorithm. 617 */ 618 static inline int 619 npf_snat_translate(npf_cache_t *npc, const npf_natpolicy_t *np, npf_flow_t flow) 620 { 621 const unsigned which = npf_nat_which(np->n_type, flow); 622 const npf_addr_t *taddr; 623 npf_addr_t addr; 624 625 KASSERT(np->n_flags & NPF_NAT_STATIC); 626 627 switch (np->n_algo) { 628 case NPF_ALGO_NETMAP: 629 npf_nat_algo_netmap(npc, np, which, &addr); 630 taddr = &addr; 631 break; 632 case NPF_ALGO_NPT66: 633 return npf_npt66_rwr(npc, which, &np->n_taddr, 634 np->n_tmask, np->n_npt66_adj); 635 default: 636 taddr = &np->n_taddr; 637 break; 638 } 639 return npf_napt_rwr(npc, which, taddr, np->n_tport); 640 } 641 642 /* 643 * Associate NAT policy with an existing connection state. 644 */ 645 npf_nat_t * 646 npf_nat_share_policy(npf_cache_t *npc, npf_conn_t *con, npf_nat_t *src_nt) 647 { 648 npf_natpolicy_t *np = src_nt->nt_natpolicy; 649 npf_nat_t *nt; 650 int ret; 651 652 /* Create a new NAT entry. */ 653 nt = npf_nat_create(npc, np, con); 654 if (__predict_false(nt == NULL)) { 655 return NULL; 656 } 657 atomic_inc_uint(&np->n_refcnt); 658 659 /* Associate the NAT translation entry with the connection. */ 660 ret = npf_conn_setnat(npc, con, nt, np->n_type); 661 if (__predict_false(ret)) { 662 /* Will release the reference. */ 663 npf_nat_destroy(con, nt); 664 return NULL; 665 } 666 return nt; 667 } 668 669 /* 670 * npf_nat_lookup: lookup the (dynamic) NAT state and return its entry, 671 * 672 * => Checks that the packet is on the interface where NAT policy is applied. 673 * => Determines the flow direction in the context of the NAT policy. 674 */ 675 static npf_nat_t * 676 npf_nat_lookup(const npf_cache_t *npc, npf_conn_t *con, 677 const unsigned di, npf_flow_t *flow) 678 { 679 const nbuf_t *nbuf = npc->npc_nbuf; 680 const npf_natpolicy_t *np; 681 npf_nat_t *nt; 682 683 if ((nt = npf_conn_getnat(con)) == NULL) { 684 return NULL; 685 } 686 if (nt->nt_ifid != nbuf->nb_ifid) { 687 return NULL; 688 } 689 690 np = nt->nt_natpolicy; 691 KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0); 692 693 /* 694 * We rely on NPF_NAT{IN,OUT} being equal to PFIL_{IN,OUT}. 695 */ 696 CTASSERT(NPF_NATIN == PFIL_IN && NPF_NATOUT == PFIL_OUT); 697 *flow = (np->n_type == di) ? NPF_FLOW_FORW : NPF_FLOW_BACK; 698 return nt; 699 } 700 701 /* 702 * npf_do_nat: 703 * 704 * - Inspect packet for a NAT policy, unless a connection with a NAT 705 * association already exists. In such case, determine whether it 706 * is a "forwards" or "backwards" stream. 707 * 708 * - Perform translation: rewrite source or destination fields, 709 * depending on translation type and direction. 710 * 711 * - Associate a NAT policy with a connection (may establish a new). 712 */ 713 int 714 npf_do_nat(npf_cache_t *npc, npf_conn_t *con, const unsigned di) 715 { 716 nbuf_t *nbuf = npc->npc_nbuf; 717 npf_conn_t *ncon = NULL; 718 npf_natpolicy_t *np; 719 npf_flow_t flow; 720 npf_nat_t *nt; 721 int error; 722 723 /* All relevant data should be already cached. */ 724 if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) { 725 return 0; 726 } 727 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 728 729 /* 730 * Return the NAT entry associated with the connection, if any. 731 * Determines whether the stream is "forwards" or "backwards". 732 * Note: no need to lock, since reference on connection is held. 733 */ 734 if (con && (nt = npf_nat_lookup(npc, con, di, &flow)) != NULL) { 735 np = nt->nt_natpolicy; 736 goto translate; 737 } 738 739 /* 740 * Inspect the packet for a NAT policy, if there is no connection. 741 * Note: acquires a reference if found. 742 */ 743 np = npf_nat_inspect(npc, di); 744 if (np == NULL) { 745 /* If packet does not match - done. */ 746 return 0; 747 } 748 flow = NPF_FLOW_FORW; 749 750 /* Static NAT - just perform the translation. */ 751 if (np->n_flags & NPF_NAT_STATIC) { 752 if (nbuf_cksum_barrier(nbuf, di)) { 753 npf_recache(npc); 754 } 755 error = npf_snat_translate(npc, np, flow); 756 npf_natpolicy_release(np); 757 return error; 758 } 759 760 /* 761 * If there is no local connection (no "stateful" rule - unusual, 762 * but possible configuration), establish one before translation. 763 * Note that it is not a "pass" connection, therefore passing of 764 * "backwards" stream depends on other, stateless filtering rules. 765 */ 766 if (con == NULL) { 767 ncon = npf_conn_establish(npc, di, true); 768 if (ncon == NULL) { 769 npf_natpolicy_release(np); 770 return ENOMEM; 771 } 772 con = ncon; 773 } 774 775 /* 776 * Create a new NAT entry and associate with the connection. 777 * We will consume the reference on success (release on error). 778 */ 779 nt = npf_nat_create(npc, np, con); 780 if (nt == NULL) { 781 npf_natpolicy_release(np); 782 error = ENOMEM; 783 goto out; 784 } 785 786 /* Determine whether any ALG matches. */ 787 if (npf_alg_match(npc, nt, di)) { 788 KASSERT(nt->nt_alg != NULL); 789 } 790 791 /* Associate the NAT translation entry with the connection. */ 792 error = npf_conn_setnat(npc, con, nt, np->n_type); 793 if (error) { 794 /* Will release the reference. */ 795 npf_nat_destroy(con, nt); 796 goto out; 797 } 798 799 translate: 800 /* May need to process the delayed checksums first (XXX: NetBSD). */ 801 if (nbuf_cksum_barrier(nbuf, di)) { 802 npf_recache(npc); 803 } 804 805 /* Perform the translation. */ 806 error = npf_dnat_translate(npc, nt, flow); 807 out: 808 if (__predict_false(ncon)) { 809 if (error) { 810 /* It was created for NAT - just expire. */ 811 npf_conn_expire(ncon); 812 } 813 npf_conn_release(ncon); 814 } 815 return error; 816 } 817 818 /* 819 * npf_nat_gettrans: return translation IP address and port. 820 */ 821 void 822 npf_nat_gettrans(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port) 823 { 824 *addr = &nt->nt_taddr; 825 *port = nt->nt_tport; 826 } 827 828 /* 829 * npf_nat_getorig: return original IP address and port from translation entry. 830 */ 831 void 832 npf_nat_getorig(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port) 833 { 834 *addr = &nt->nt_oaddr; 835 *port = nt->nt_oport; 836 } 837 838 /* 839 * npf_nat_setalg: associate an ALG with the NAT entry. 840 */ 841 void 842 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg) 843 { 844 nt->nt_alg = alg; 845 nt->nt_alg_arg = arg; 846 } 847 848 npf_alg_t * 849 npf_nat_getalg(const npf_nat_t *nt) 850 { 851 return nt->nt_alg; 852 } 853 854 uintptr_t 855 npf_nat_getalgarg(const npf_nat_t *nt) 856 { 857 return nt->nt_alg_arg; 858 } 859 860 /* 861 * npf_nat_destroy: destroy NAT structure (performed on connection expiration). 862 */ 863 void 864 npf_nat_destroy(npf_conn_t *con, npf_nat_t *nt) 865 { 866 npf_natpolicy_t *np = nt->nt_natpolicy; 867 npf_t *npf = np->n_npfctx; 868 npf_alg_t *alg; 869 870 /* Execute the ALG destroy callback, if any. */ 871 if ((alg = npf_nat_getalg(nt)) != NULL) { 872 npf_alg_destroy(npf, alg, nt, con); 873 nt->nt_alg = NULL; 874 } 875 876 /* Return taken port to the portmap. */ 877 if ((np->n_flags & NPF_NAT_PORTMAP) != 0 && nt->nt_tport) { 878 npf_portmap_t *pm = npf->portmap; 879 npf_portmap_put(pm, nt->nt_alen, &nt->nt_taddr, nt->nt_tport); 880 } 881 npf_stats_inc(np->n_npfctx, NPF_STAT_NAT_DESTROY); 882 883 /* 884 * Remove the connection from the list and drop the reference on 885 * the NAT policy. Note: this might trigger its destruction. 886 */ 887 mutex_enter(&np->n_lock); 888 LIST_REMOVE(nt, nt_entry); 889 mutex_exit(&np->n_lock); 890 npf_natpolicy_release(np); 891 892 pool_cache_put(nat_cache, nt); 893 } 894 895 /* 896 * npf_nat_export: serialize the NAT entry with a NAT policy ID. 897 */ 898 void 899 npf_nat_export(npf_t *npf, const npf_nat_t *nt, nvlist_t *con_nv) 900 { 901 npf_natpolicy_t *np = nt->nt_natpolicy; 902 unsigned alen = nt->nt_alen; 903 nvlist_t *nat_nv; 904 905 nat_nv = nvlist_create(0); 906 if (nt->nt_ifid) { 907 char ifname[IFNAMSIZ]; 908 npf_ifmap_copyname(npf, nt->nt_ifid, ifname, sizeof(ifname)); 909 nvlist_add_string(nat_nv, "ifname", ifname); 910 } 911 nvlist_add_number(nat_nv, "alen", alen); 912 913 nvlist_add_binary(nat_nv, "oaddr", &nt->nt_oaddr, alen); 914 nvlist_add_number(nat_nv, "oport", nt->nt_oport); 915 916 nvlist_add_binary(nat_nv, "taddr", &nt->nt_taddr, alen); 917 nvlist_add_number(nat_nv, "tport", nt->nt_tport); 918 919 nvlist_add_number(nat_nv, "nat-policy", np->n_id); 920 nvlist_move_nvlist(con_nv, "nat", nat_nv); 921 } 922 923 /* 924 * npf_nat_import: find the NAT policy and unserialize the NAT entry. 925 */ 926 npf_nat_t * 927 npf_nat_import(npf_t *npf, const nvlist_t *nat, 928 npf_ruleset_t *natlist, npf_conn_t *con) 929 { 930 npf_natpolicy_t *np; 931 npf_nat_t *nt; 932 const char *ifname; 933 const void *taddr, *oaddr; 934 size_t alen, len; 935 uint64_t np_id; 936 937 np_id = dnvlist_get_number(nat, "nat-policy", UINT64_MAX); 938 if ((np = npf_ruleset_findnat(natlist, np_id)) == NULL) { 939 return NULL; 940 } 941 nt = pool_cache_get(nat_cache, PR_WAITOK); 942 memset(nt, 0, sizeof(npf_nat_t)); 943 944 ifname = dnvlist_get_string(nat, "ifname", NULL); 945 if (ifname && (nt->nt_ifid = npf_ifmap_register(npf, ifname)) == 0) { 946 goto err; 947 } 948 949 alen = dnvlist_get_number(nat, "alen", 0); 950 if (alen == 0 || alen > sizeof(npf_addr_t)) { 951 goto err; 952 } 953 954 taddr = dnvlist_get_binary(nat, "taddr", &len, NULL, 0); 955 if (!taddr || len != alen) { 956 goto err; 957 } 958 memcpy(&nt->nt_taddr, taddr, sizeof(npf_addr_t)); 959 960 oaddr = dnvlist_get_binary(nat, "oaddr", &len, NULL, 0); 961 if (!oaddr || len != alen) { 962 goto err; 963 } 964 memcpy(&nt->nt_oaddr, oaddr, sizeof(npf_addr_t)); 965 966 nt->nt_oport = dnvlist_get_number(nat, "oport", 0); 967 nt->nt_tport = dnvlist_get_number(nat, "tport", 0); 968 969 /* Take a specific port from port-map. */ 970 if ((np->n_flags & NPF_NAT_PORTMAP) != 0 && nt->nt_tport) { 971 npf_portmap_t *pm = npf->portmap; 972 973 if (!npf_portmap_take(pm, nt->nt_alen, 974 &nt->nt_taddr, nt->nt_tport)) { 975 goto err; 976 } 977 } 978 npf_stats_inc(npf, NPF_STAT_NAT_CREATE); 979 980 /* 981 * Associate, take a reference and insert. Unlocked/non-atomic 982 * since the policy is not yet globally visible. 983 */ 984 nt->nt_natpolicy = np; 985 nt->nt_conn = con; 986 atomic_store_relaxed(&np->n_refcnt, 987 atomic_load_relaxed(&np->n_refcnt) + 1); 988 LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry); 989 return nt; 990 err: 991 pool_cache_put(nat_cache, nt); 992 return NULL; 993 } 994 995 #if defined(DDB) || defined(_NPF_TESTING) 996 997 void 998 npf_nat_dump(const npf_nat_t *nt) 999 { 1000 const npf_natpolicy_t *np; 1001 struct in_addr ip; 1002 1003 np = nt->nt_natpolicy; 1004 memcpy(&ip, &nt->nt_taddr, sizeof(ip)); 1005 printf("\tNATP(%p): type %u flags 0x%x taddr %s tport %d\n", np, 1006 np->n_type, np->n_flags, inet_ntoa(ip), ntohs(np->n_tport)); 1007 memcpy(&ip, &nt->nt_oaddr, sizeof(ip)); 1008 printf("\tNAT: original address %s oport %d tport %d\n", 1009 inet_ntoa(ip), ntohs(nt->nt_oport), ntohs(nt->nt_tport)); 1010 if (nt->nt_alg) { 1011 printf("\tNAT ALG = %p, ARG = %p\n", 1012 nt->nt_alg, (void *)nt->nt_alg_arg); 1013 } 1014 } 1015 1016 #endif 1017