1 /* $NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * NPF network address port translation (NAPT). 34 * Described in RFC 2663, RFC 3022. Commonly just "NAT". 35 * 36 * Overview 37 * 38 * There are few mechanisms: NAT policy, port map and translation. 39 * NAT module has a separate ruleset, where rules contain associated 40 * NAT policy, thus flexible filter criteria can be used. 41 * 42 * NAT policies and port maps 43 * 44 * NAT policy is applied when a packet matches the rule. Apart from 45 * filter criteria, NAT policy has a translation (gateway) IP address 46 * and associated port map. Port map is a bitmap used to reserve and 47 * use unique TCP/UDP ports for translation. Port maps are unique to 48 * the IP addresses, therefore multiple NAT policies with the same IP 49 * will share the same port map. 50 * 51 * NAT sessions and translation entries 52 * 53 * NAT module relies on session management module. Each "NAT" session 54 * has an associated translation entry (npf_nat_t). It contains local 55 * i.e. original IP address with port and translation port, allocated 56 * from the port map. Each NAT translation entry is associated with 57 * the policy, which contains translation IP address. Allocated port 58 * is returned to the port map and translation entry destroyed when 59 * "NAT" session expires. 60 */ 61 62 #ifdef _KERNEL 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $"); 65 66 #include <sys/param.h> 67 #include <sys/kernel.h> 68 #endif 69 70 #include <sys/atomic.h> 71 #include <sys/bitops.h> 72 #include <sys/kmem.h> 73 #include <sys/pool.h> 74 #include <net/pfil.h> 75 #include <netinet/in.h> 76 77 #include "npf_impl.h" 78 79 /* 80 * NPF portmap structure. 81 */ 82 typedef struct { 83 u_int p_refcnt; 84 uint32_t p_bitmap[0]; 85 } npf_portmap_t; 86 87 /* Portmap range: [ 1024 .. 65535 ] */ 88 #define PORTMAP_FIRST (1024) 89 #define PORTMAP_SIZE ((65536 - PORTMAP_FIRST) / 32) 90 #define PORTMAP_FILLED ((uint32_t)~0) 91 #define PORTMAP_MASK (31) 92 #define PORTMAP_SHIFT (5) 93 94 /* NAT policy structure. */ 95 struct npf_natpolicy { 96 LIST_ENTRY(npf_natpolicy) n_entry; 97 in_addr_t n_gw_ip; 98 npf_portmap_t * n_portmap; 99 }; 100 101 /* NAT translation entry for a session. */ 102 struct npf_nat { 103 npf_natpolicy_t * nt_natpolicy; 104 /* Local address and port (for backwards translation). */ 105 in_addr_t nt_laddr; 106 in_port_t nt_lport; 107 /* Translation port (for forwards). */ 108 in_port_t nt_tport; 109 /* ALG (if any) associated with this NAT entry. */ 110 npf_alg_t * nt_alg; 111 uintptr_t nt_alg_arg; 112 }; 113 114 static npf_ruleset_t * nat_ruleset; 115 static LIST_HEAD(, npf_natpolicy) nat_policy_list; 116 static pool_cache_t nat_cache; 117 118 /* 119 * npf_nat_sys{init,fini}: initialise/destroy NAT subsystem structures. 120 */ 121 122 void 123 npf_nat_sysinit(void) 124 { 125 126 nat_cache = pool_cache_init(sizeof(npf_nat_t), coherency_unit, 127 0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL); 128 KASSERT(nat_cache != NULL); 129 nat_ruleset = npf_ruleset_create(); 130 LIST_INIT(&nat_policy_list); 131 } 132 133 void 134 npf_nat_sysfini(void) 135 { 136 137 /* Flush NAT policies. */ 138 npf_nat_reload(NULL); 139 KASSERT(LIST_EMPTY(&nat_policy_list)); 140 pool_cache_destroy(nat_cache); 141 } 142 143 /* 144 * npf_nat_newpolicy: allocate a new NAT policy. 145 * 146 * => Shares portmap if policy is on existing translation address. 147 * => XXX: serialise at upper layer. 148 */ 149 npf_natpolicy_t * 150 npf_nat_newpolicy(in_addr_t gip) 151 { 152 npf_natpolicy_t *np, *it; 153 npf_portmap_t *pm; 154 155 np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP); 156 if (np == NULL) { 157 return NULL; 158 } 159 np->n_gw_ip = gip; 160 161 /* Search for a NAT policy using the same translation address. */ 162 pm = NULL; 163 LIST_FOREACH(it, &nat_policy_list, n_entry) { 164 if (it->n_gw_ip != np->n_gw_ip) 165 continue; 166 pm = it->n_portmap; 167 break; 168 } 169 if (pm == NULL) { 170 /* Allocate a new port map for the NAT policy. */ 171 pm = kmem_zalloc(sizeof(npf_portmap_t) + 172 (PORTMAP_SIZE * sizeof(uint32_t)), KM_SLEEP); 173 if (pm == NULL) { 174 kmem_free(np, sizeof(npf_natpolicy_t)); 175 return NULL; 176 } 177 pm->p_refcnt = 1; 178 KASSERT((uintptr_t)pm->p_bitmap == (uintptr_t)pm + sizeof(*pm)); 179 } else { 180 /* Share the port map. */ 181 pm->p_refcnt++; 182 } 183 np->n_portmap = pm; 184 /* 185 * Note: old policies with new might co-exist in the list, 186 * while reload is in progress, but that is not an issue. 187 */ 188 LIST_INSERT_HEAD(&nat_policy_list, np, n_entry); 189 return np; 190 } 191 192 /* 193 * npf_nat_freepolicy: free NAT policy and, on last reference, free portmap. 194 * 195 * => Called from npf_rule_free() during the reload via npf_nat_reload(). 196 */ 197 void 198 npf_nat_freepolicy(npf_natpolicy_t *np) 199 { 200 npf_portmap_t *pm = np->n_portmap; 201 202 LIST_REMOVE(np, n_entry); 203 if (--pm->p_refcnt == 0) { 204 kmem_free(pm, sizeof(npf_portmap_t) + 205 (PORTMAP_SIZE * sizeof(uint32_t))); 206 } 207 kmem_free(np, sizeof(npf_natpolicy_t)); 208 } 209 210 /* 211 * npf_nat_reload: activate new ruleset of NAT policies and destroy old. 212 * 213 * => Destruction of ruleset will perform npf_nat_freepolicy() for each policy. 214 */ 215 void 216 npf_nat_reload(npf_ruleset_t *nset) 217 { 218 npf_ruleset_t *oldnset; 219 220 oldnset = atomic_swap_ptr(&nat_ruleset, nset); 221 if (oldnset) { 222 npf_ruleset_destroy(oldnset); 223 } 224 } 225 226 /* 227 * npf_nat_getport: allocate and return a port in the NAT policy portmap. 228 * 229 * => Returns in network byte-order. 230 * => Zero indicates failure. 231 */ 232 static in_port_t 233 npf_nat_getport(npf_natpolicy_t *np) 234 { 235 npf_portmap_t *pm = np->n_portmap; 236 u_int n = PORTMAP_SIZE, idx, bit; 237 uint32_t map, nmap; 238 239 idx = arc4random() % PORTMAP_SIZE; 240 for (;;) { 241 KASSERT(idx < PORTMAP_SIZE); 242 map = pm->p_bitmap[idx]; 243 if (__predict_false(map == PORTMAP_FILLED)) { 244 if (n-- == 0) { 245 /* No space. */ 246 return 0; 247 } 248 /* This bitmap is sfilled, next. */ 249 idx = (idx ? idx : PORTMAP_SIZE) - 1; 250 continue; 251 } 252 bit = ffs32(~map) - 1; 253 nmap = map | (1 << bit); 254 if (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) == map) { 255 /* Success. */ 256 break; 257 } 258 } 259 return htons(PORTMAP_FIRST + (idx << PORTMAP_SHIFT) + bit); 260 } 261 262 /* 263 * npf_nat_putport: return port as available in the NAT policy portmap. 264 * 265 * => Port should be in network byte-order. 266 */ 267 static void 268 npf_nat_putport(npf_natpolicy_t *np, in_port_t port) 269 { 270 npf_portmap_t *pm = np->n_portmap; 271 uint32_t map, nmap; 272 u_int idx, bit; 273 274 port = ntohs(port) - PORTMAP_FIRST; 275 idx = port >> PORTMAP_SHIFT; 276 bit = port & PORTMAP_MASK; 277 do { 278 map = pm->p_bitmap[idx]; 279 KASSERT(map | (1 << bit)); 280 nmap = map & ~(1 << bit); 281 } while (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) != map); 282 } 283 284 /* 285 * npf_natout: 286 * - Inspect packet for a NAT policy, unless session with NAT 287 * association already exists. 288 * - Perform "forwards" translation: rewrite source address, etc. 289 * - Establish sessions or if already exists, associate NAT policy. 290 */ 291 int 292 npf_natout(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, 293 struct ifnet *ifp, const int layer) 294 { 295 const int proto = npc->npc_proto; 296 void *n_ptr = nbuf_dataptr(nbuf); 297 npf_session_t *nse = NULL; /* XXXgcc */ 298 npf_natpolicy_t *np; 299 npf_nat_t *nt; 300 npf_rule_t *rl; 301 in_addr_t gwip; 302 in_port_t tport; 303 int error; 304 bool new; 305 306 /* All relevant IPv4 data should be already cached. */ 307 if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS)) { 308 return 0; 309 } 310 311 /* Detect if there is a linked session pointing to the NAT entry. */ 312 nt = se ? npf_session_retlinknat(se) : NULL; 313 if (nt) { 314 np = nt->nt_natpolicy; 315 new = false; 316 goto skip; 317 } 318 319 /* Inspect packet against NAT ruleset, return a policy. */ 320 rl = npf_ruleset_match(nat_ruleset, npc, nbuf, ifp, PFIL_OUT, layer); 321 np = rl ? npf_rule_getnat(rl) : NULL; 322 if (np == NULL) { 323 /* If packet does not match - done. */ 324 return 0; 325 } 326 327 /* New NAT association. */ 328 nt = pool_cache_get(nat_cache, PR_NOWAIT); 329 if (nt == NULL){ 330 return ENOMEM; 331 } 332 nt->nt_natpolicy = np; 333 nt->nt_alg = NULL; 334 new = true; 335 336 /* Save local (source) address. */ 337 nt->nt_laddr = npc->npc_srcip; 338 339 if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { 340 /* Also, save local TCP/UDP port. */ 341 KASSERT(npf_iscached(npc, NPC_PORTS)); 342 nt->nt_lport = npc->npc_sport; 343 /* Get a new port for translation. */ 344 nt->nt_tport = npf_nat_getport(np); 345 } else { 346 nt->nt_lport = 0; 347 nt->nt_tport = 0; 348 } 349 350 /* Match any ALGs. */ 351 npf_alg_exec(npc, nbuf, nt, PFIL_OUT); 352 353 /* If there is no local session, establish one before translation. */ 354 if (se == NULL) { 355 nse = npf_session_establish(npc, NULL, PFIL_OUT); 356 if (nse == NULL) { 357 error = ENOMEM; 358 goto out; 359 } 360 se = nse; 361 } else { 362 nse = NULL; 363 } 364 skip: 365 if (layer == NPF_LAYER_2 && /* XXX */ 366 (n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen)) == NULL) 367 return EINVAL; 368 369 /* Execute ALG hooks first. */ 370 npf_alg_exec(npc, nbuf, nt, PFIL_OUT); 371 372 gwip = np->n_gw_ip; 373 tport = nt->nt_tport; 374 375 /* 376 * Perform translation: rewrite source address et al. 377 * Note: cache may be used in npf_rwrport(), update only in the end. 378 */ 379 if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_OUT, gwip)) { 380 error = EINVAL; 381 goto out; 382 } 383 if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) { 384 KASSERT(tport != 0); 385 if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_OUT, tport, gwip)) { 386 error = EINVAL; 387 goto out; 388 } 389 } 390 /* Success: cache new address and port (if any). */ 391 npc->npc_srcip = gwip; 392 npc->npc_sport = tport; 393 error = 0; 394 395 if (__predict_false(new)) { 396 npf_session_t *natse; 397 /* 398 * Establish a new NAT session using translated address and 399 * associate NAT translation data with this session. 400 * 401 * Note: packet now has a translated address in the cache. 402 */ 403 natse = npf_session_establish(npc, nt, PFIL_OUT); 404 if (natse == NULL) { 405 error = ENOMEM; 406 goto out; 407 } 408 /* 409 * Link local session with NAT session, if no link already. 410 */ 411 npf_session_link(se, natse); 412 npf_session_release(natse); 413 out: 414 if (error) { 415 if (nse != NULL) { 416 /* XXX: expire local session if new? */ 417 } 418 /* Will free the structure and return the port. */ 419 npf_nat_expire(nt); 420 } 421 if (nse != NULL) { 422 /* Drop the reference local session was new. */ 423 npf_session_release(nse); 424 } 425 } 426 return error; 427 } 428 429 /* 430 * npf_natin: 431 * - Inspect packet for a session with associated NAT policy. 432 * - Perform "backwards" translation: rewrite destination address, etc. 433 */ 434 int 435 npf_natin(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, const int layer) 436 { 437 npf_nat_t *nt = se ? npf_session_retnat(se) : NULL; 438 439 if (nt == NULL) { 440 /* No association - no translation. */ 441 return 0; 442 } 443 KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS)); 444 445 void *n_ptr = nbuf_dataptr(nbuf); 446 in_addr_t laddr = nt->nt_laddr; 447 in_port_t lport = nt->nt_lport; 448 449 if (layer == NPF_LAYER_2) { 450 n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen); 451 if (n_ptr == NULL) { 452 return EINVAL; 453 } 454 } 455 456 /* Execute ALG hooks first. */ 457 npf_alg_exec(npc, nbuf, nt, PFIL_IN); 458 459 /* 460 * Address translation: rewrite destination address. 461 * Note: cache will be used in npf_rwrport(), update only in the end. 462 */ 463 if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_IN, laddr)) { 464 return EINVAL; 465 } 466 switch (npc->npc_proto) { 467 case IPPROTO_TCP: 468 case IPPROTO_UDP: 469 KASSERT(npf_iscached(npc, NPC_PORTS)); 470 /* Rewrite destination port. */ 471 if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_IN, lport, laddr)) { 472 return EINVAL; 473 } 474 break; 475 case IPPROTO_ICMP: 476 /* None. */ 477 break; 478 default: 479 return ENOTSUP; 480 } 481 /* Cache new address and port. */ 482 npc->npc_dstip = laddr; 483 npc->npc_dport = lport; 484 return 0; 485 } 486 487 /* 488 * npf_nat_getlocal: return local IP address and port from translation entry. 489 */ 490 void 491 npf_nat_getlocal(npf_nat_t *nt, in_addr_t *addr, in_port_t *port) 492 { 493 494 *addr = nt->nt_laddr; 495 *port = nt->nt_lport; 496 } 497 498 void 499 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg) 500 { 501 502 nt->nt_alg = alg; 503 nt->nt_alg_arg = arg; 504 } 505 506 /* 507 * npf_nat_expire: free NAT-related data structures on session expiration. 508 */ 509 void 510 npf_nat_expire(npf_nat_t *nt) 511 { 512 513 if (nt->nt_tport) { 514 npf_natpolicy_t *np = nt->nt_natpolicy; 515 npf_nat_putport(np, nt->nt_tport); 516 } 517 pool_cache_put(nat_cache, nt); 518 } 519 520 #if defined(DDB) || defined(_NPF_TESTING) 521 522 void 523 npf_nat_dump(npf_nat_t *nt) 524 { 525 npf_natpolicy_t *np; 526 struct in_addr ip; 527 528 if (nt) { 529 np = nt->nt_natpolicy; 530 goto skip; 531 } 532 LIST_FOREACH(np, &nat_policy_list, n_entry) { 533 skip: 534 ip.s_addr = np->n_gw_ip; 535 printf("\tNAT policy: gw_ip = %s\n", inet_ntoa(ip)); 536 if (nt == NULL) { 537 continue; 538 } 539 ip.s_addr = nt->nt_laddr; 540 printf("\tNAT: original address %s, lport %d, tport = %d\n", 541 inet_ntoa(ip), ntohs(nt->nt_lport), ntohs(nt->nt_tport)); 542 if (nt->nt_alg) { 543 printf("\tNAT ALG = %p, ARG = %p\n", 544 nt->nt_alg, (void *)nt->nt_alg_arg); 545 } 546 return; 547 } 548 } 549 550 #endif 551