xref: /netbsd-src/sys/net/npf/npf_nat.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
2 
3 /*-
4  * Copyright (c) 2010 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This material is based upon work partially supported by The
8  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * NPF network address port translation (NAPT).
34  * Described in RFC 2663, RFC 3022.  Commonly just "NAT".
35  *
36  * Overview
37  *
38  *	There are few mechanisms: NAT policy, port map and translation.
39  *	NAT module has a separate ruleset, where rules contain associated
40  *	NAT policy, thus flexible filter criteria can be used.
41  *
42  * NAT policies and port maps
43  *
44  *	NAT policy is applied when a packet matches the rule.  Apart from
45  *	filter criteria, NAT policy has a translation (gateway) IP address
46  *	and associated port map.  Port map is a bitmap used to reserve and
47  *	use unique TCP/UDP ports for translation.  Port maps are unique to
48  *	the IP addresses, therefore multiple NAT policies with the same IP
49  *	will share the same port map.
50  *
51  * NAT sessions and translation entries
52  *
53  *	NAT module relies on session management module.  Each "NAT" session
54  *	has an associated translation entry (npf_nat_t).  It contains local
55  *	i.e. original IP address with port and translation port, allocated
56  *	from the port map.  Each NAT translation entry is associated with
57  *	the policy, which contains translation IP address.  Allocated port
58  *	is returned to the port map and translation entry destroyed when
59  *	"NAT" session expires.
60  */
61 
62 #ifdef _KERNEL
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
65 
66 #include <sys/param.h>
67 #include <sys/kernel.h>
68 #endif
69 
70 #include <sys/atomic.h>
71 #include <sys/bitops.h>
72 #include <sys/kmem.h>
73 #include <sys/pool.h>
74 #include <net/pfil.h>
75 #include <netinet/in.h>
76 
77 #include "npf_impl.h"
78 
79 /*
80  * NPF portmap structure.
81  */
82 typedef struct {
83 	u_int				p_refcnt;
84 	uint32_t			p_bitmap[0];
85 } npf_portmap_t;
86 
87 /* Portmap range: [ 1024 .. 65535 ] */
88 #define	PORTMAP_FIRST			(1024)
89 #define	PORTMAP_SIZE			((65536 - PORTMAP_FIRST) / 32)
90 #define	PORTMAP_FILLED			((uint32_t)~0)
91 #define	PORTMAP_MASK			(31)
92 #define	PORTMAP_SHIFT			(5)
93 
94 /* NAT policy structure. */
95 struct npf_natpolicy {
96 	LIST_ENTRY(npf_natpolicy)	n_entry;
97 	in_addr_t			n_gw_ip;
98 	npf_portmap_t *			n_portmap;
99 };
100 
101 /* NAT translation entry for a session. */
102 struct npf_nat {
103 	npf_natpolicy_t *		nt_natpolicy;
104 	/* Local address and port (for backwards translation). */
105 	in_addr_t			nt_laddr;
106 	in_port_t			nt_lport;
107 	/* Translation port (for forwards). */
108 	in_port_t			nt_tport;
109 	/* ALG (if any) associated with this NAT entry. */
110 	npf_alg_t *			nt_alg;
111 	uintptr_t			nt_alg_arg;
112 };
113 
114 static npf_ruleset_t *			nat_ruleset;
115 static LIST_HEAD(, npf_natpolicy)	nat_policy_list;
116 static pool_cache_t			nat_cache;
117 
118 /*
119  * npf_nat_sys{init,fini}: initialise/destroy NAT subsystem structures.
120  */
121 
122 void
123 npf_nat_sysinit(void)
124 {
125 
126 	nat_cache = pool_cache_init(sizeof(npf_nat_t), coherency_unit,
127 	    0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL);
128 	KASSERT(nat_cache != NULL);
129 	nat_ruleset = npf_ruleset_create();
130 	LIST_INIT(&nat_policy_list);
131 }
132 
133 void
134 npf_nat_sysfini(void)
135 {
136 
137 	/* Flush NAT policies. */
138 	npf_nat_reload(NULL);
139 	KASSERT(LIST_EMPTY(&nat_policy_list));
140 	pool_cache_destroy(nat_cache);
141 }
142 
143 /*
144  * npf_nat_newpolicy: allocate a new NAT policy.
145  *
146  * => Shares portmap if policy is on existing translation address.
147  * => XXX: serialise at upper layer.
148  */
149 npf_natpolicy_t *
150 npf_nat_newpolicy(in_addr_t gip)
151 {
152 	npf_natpolicy_t *np, *it;
153 	npf_portmap_t *pm;
154 
155 	np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP);
156 	if (np == NULL) {
157 		return NULL;
158 	}
159 	np->n_gw_ip = gip;
160 
161 	/* Search for a NAT policy using the same translation address. */
162 	pm = NULL;
163 	LIST_FOREACH(it, &nat_policy_list, n_entry) {
164 		if (it->n_gw_ip != np->n_gw_ip)
165 			continue;
166 		pm = it->n_portmap;
167 		break;
168 	}
169 	if (pm == NULL) {
170 		/* Allocate a new port map for the NAT policy. */
171 		pm = kmem_zalloc(sizeof(npf_portmap_t) +
172 		    (PORTMAP_SIZE * sizeof(uint32_t)), KM_SLEEP);
173 		if (pm == NULL) {
174 			kmem_free(np, sizeof(npf_natpolicy_t));
175 			return NULL;
176 		}
177 		pm->p_refcnt = 1;
178 		KASSERT((uintptr_t)pm->p_bitmap == (uintptr_t)pm + sizeof(*pm));
179 	} else {
180 		/* Share the port map. */
181 		pm->p_refcnt++;
182 	}
183 	np->n_portmap = pm;
184 	/*
185 	 * Note: old policies with new might co-exist in the list,
186 	 * while reload is in progress, but that is not an issue.
187 	 */
188 	LIST_INSERT_HEAD(&nat_policy_list, np, n_entry);
189 	return np;
190 }
191 
192 /*
193  * npf_nat_freepolicy: free NAT policy and, on last reference, free portmap.
194  *
195  * => Called from npf_rule_free() during the reload via npf_nat_reload().
196  */
197 void
198 npf_nat_freepolicy(npf_natpolicy_t *np)
199 {
200 	npf_portmap_t *pm = np->n_portmap;
201 
202 	LIST_REMOVE(np, n_entry);
203 	if (--pm->p_refcnt == 0) {
204 		kmem_free(pm, sizeof(npf_portmap_t) +
205 		    (PORTMAP_SIZE * sizeof(uint32_t)));
206 	}
207 	kmem_free(np, sizeof(npf_natpolicy_t));
208 }
209 
210 /*
211  * npf_nat_reload: activate new ruleset of NAT policies and destroy old.
212  *
213  * => Destruction of ruleset will perform npf_nat_freepolicy() for each policy.
214  */
215 void
216 npf_nat_reload(npf_ruleset_t *nset)
217 {
218 	npf_ruleset_t *oldnset;
219 
220 	oldnset = atomic_swap_ptr(&nat_ruleset, nset);
221 	if (oldnset) {
222 		npf_ruleset_destroy(oldnset);
223 	}
224 }
225 
226 /*
227  * npf_nat_getport: allocate and return a port in the NAT policy portmap.
228  *
229  * => Returns in network byte-order.
230  * => Zero indicates failure.
231  */
232 static in_port_t
233 npf_nat_getport(npf_natpolicy_t *np)
234 {
235 	npf_portmap_t *pm = np->n_portmap;
236 	u_int n = PORTMAP_SIZE, idx, bit;
237 	uint32_t map, nmap;
238 
239 	idx = arc4random() % PORTMAP_SIZE;
240 	for (;;) {
241 		KASSERT(idx < PORTMAP_SIZE);
242 		map = pm->p_bitmap[idx];
243 		if (__predict_false(map == PORTMAP_FILLED)) {
244 			if (n-- == 0) {
245 				/* No space. */
246 				return 0;
247 			}
248 			/* This bitmap is sfilled, next. */
249 			idx = (idx ? idx : PORTMAP_SIZE) - 1;
250 			continue;
251 		}
252 		bit = ffs32(~map) - 1;
253 		nmap = map | (1 << bit);
254 		if (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) == map) {
255 			/* Success. */
256 			break;
257 		}
258 	}
259 	return htons(PORTMAP_FIRST + (idx << PORTMAP_SHIFT) + bit);
260 }
261 
262 /*
263  * npf_nat_putport: return port as available in the NAT policy portmap.
264  *
265  * => Port should be in network byte-order.
266  */
267 static void
268 npf_nat_putport(npf_natpolicy_t *np, in_port_t port)
269 {
270 	npf_portmap_t *pm = np->n_portmap;
271 	uint32_t map, nmap;
272 	u_int idx, bit;
273 
274 	port = ntohs(port) - PORTMAP_FIRST;
275 	idx = port >> PORTMAP_SHIFT;
276 	bit = port & PORTMAP_MASK;
277 	do {
278 		map = pm->p_bitmap[idx];
279 		KASSERT(map | (1 << bit));
280 		nmap = map & ~(1 << bit);
281 	} while (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) != map);
282 }
283 
284 /*
285  * npf_natout:
286  *	- Inspect packet for a NAT policy, unless session with NAT
287  *	  association already exists.
288  *	- Perform "forwards" translation: rewrite source address, etc.
289  *	- Establish sessions or if already exists, associate NAT policy.
290  */
291 int
292 npf_natout(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
293     struct ifnet *ifp, const int layer)
294 {
295 	const int proto = npc->npc_proto;
296 	void *n_ptr = nbuf_dataptr(nbuf);
297 	npf_session_t *nse = NULL; /* XXXgcc */
298 	npf_natpolicy_t *np;
299 	npf_nat_t *nt;
300 	npf_rule_t *rl;
301 	in_addr_t gwip;
302 	in_port_t tport;
303 	int error;
304 	bool new;
305 
306 	/* All relevant IPv4 data should be already cached. */
307 	if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS)) {
308 		return 0;
309 	}
310 
311 	/* Detect if there is a linked session pointing to the NAT entry. */
312 	nt = se ? npf_session_retlinknat(se) : NULL;
313 	if (nt) {
314 		np = nt->nt_natpolicy;
315 		new = false;
316 		goto skip;
317 	}
318 
319 	/* Inspect packet against NAT ruleset, return a policy. */
320 	rl = npf_ruleset_match(nat_ruleset, npc, nbuf, ifp, PFIL_OUT, layer);
321 	np = rl ? npf_rule_getnat(rl) : NULL;
322 	if (np == NULL) {
323 		/* If packet does not match - done. */
324 		return 0;
325 	}
326 
327 	/* New NAT association. */
328 	nt = pool_cache_get(nat_cache, PR_NOWAIT);
329 	if (nt == NULL){
330 		return ENOMEM;
331 	}
332 	nt->nt_natpolicy = np;
333 	nt->nt_alg = NULL;
334 	new = true;
335 
336 	/* Save local (source) address. */
337 	nt->nt_laddr = npc->npc_srcip;
338 
339 	if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
340 		/* Also, save local TCP/UDP port. */
341 		KASSERT(npf_iscached(npc, NPC_PORTS));
342 		nt->nt_lport = npc->npc_sport;
343 		/* Get a new port for translation. */
344 		nt->nt_tport = npf_nat_getport(np);
345 	} else {
346 		nt->nt_lport = 0;
347 		nt->nt_tport = 0;
348 	}
349 
350 	/* Match any ALGs. */
351 	npf_alg_exec(npc, nbuf, nt, PFIL_OUT);
352 
353 	/* If there is no local session, establish one before translation. */
354 	if (se == NULL) {
355 		nse = npf_session_establish(npc, NULL, PFIL_OUT);
356 		if (nse == NULL) {
357 			error = ENOMEM;
358 			goto out;
359 		}
360 		se = nse;
361 	} else {
362 		nse = NULL;
363 	}
364 skip:
365 	if (layer == NPF_LAYER_2 && /* XXX */
366 	    (n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen)) == NULL)
367 		return EINVAL;
368 
369 	/* Execute ALG hooks first. */
370 	npf_alg_exec(npc, nbuf, nt, PFIL_OUT);
371 
372 	gwip = np->n_gw_ip;
373 	tport = nt->nt_tport;
374 
375 	/*
376 	 * Perform translation: rewrite source address et al.
377 	 * Note: cache may be used in npf_rwrport(), update only in the end.
378 	 */
379 	if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_OUT, gwip)) {
380 		error = EINVAL;
381 		goto out;
382 	}
383 	if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
384 		KASSERT(tport != 0);
385 		if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_OUT, tport, gwip)) {
386 			error = EINVAL;
387 			goto out;
388 		}
389 	}
390 	/* Success: cache new address and port (if any). */
391 	npc->npc_srcip = gwip;
392 	npc->npc_sport = tport;
393 	error = 0;
394 
395 	if (__predict_false(new)) {
396 		npf_session_t *natse;
397 		/*
398 		 * Establish a new NAT session using translated address and
399 		 * associate NAT translation data with this session.
400 		 *
401 		 * Note: packet now has a translated address in the cache.
402 		 */
403 		natse = npf_session_establish(npc, nt, PFIL_OUT);
404 		if (natse == NULL) {
405 			error = ENOMEM;
406 			goto out;
407 		}
408 		/*
409 		 * Link local session with NAT session, if no link already.
410 		 */
411 		npf_session_link(se, natse);
412 		npf_session_release(natse);
413 out:
414 		if (error) {
415 			if (nse != NULL) {
416 				/* XXX: expire local session if new? */
417 			}
418 			/* Will free the structure and return the port. */
419 			npf_nat_expire(nt);
420 		}
421 		if (nse != NULL) {
422 			/* Drop the reference local session was new. */
423 			npf_session_release(nse);
424 		}
425 	}
426 	return error;
427 }
428 
429 /*
430  * npf_natin:
431  *	- Inspect packet for a session with associated NAT policy.
432  *	- Perform "backwards" translation: rewrite destination address, etc.
433  */
434 int
435 npf_natin(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, const int layer)
436 {
437 	npf_nat_t *nt = se ? npf_session_retnat(se) : NULL;
438 
439 	if (nt == NULL) {
440 		/* No association - no translation. */
441 		return 0;
442 	}
443 	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
444 
445 	void *n_ptr = nbuf_dataptr(nbuf);
446 	in_addr_t laddr = nt->nt_laddr;
447 	in_port_t lport = nt->nt_lport;
448 
449 	if (layer == NPF_LAYER_2) {
450 		n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen);
451 		if (n_ptr == NULL) {
452 			return EINVAL;
453 		}
454 	}
455 
456 	/* Execute ALG hooks first. */
457 	npf_alg_exec(npc, nbuf, nt, PFIL_IN);
458 
459 	/*
460 	 * Address translation: rewrite destination address.
461 	 * Note: cache will be used in npf_rwrport(), update only in the end.
462 	 */
463 	if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_IN, laddr)) {
464 		return EINVAL;
465 	}
466 	switch (npc->npc_proto) {
467 	case IPPROTO_TCP:
468 	case IPPROTO_UDP:
469 		KASSERT(npf_iscached(npc, NPC_PORTS));
470 		/* Rewrite destination port. */
471 		if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_IN, lport, laddr)) {
472 			return EINVAL;
473 		}
474 		break;
475 	case IPPROTO_ICMP:
476 		/* None. */
477 		break;
478 	default:
479 		return ENOTSUP;
480 	}
481 	/* Cache new address and port. */
482 	npc->npc_dstip = laddr;
483 	npc->npc_dport = lport;
484 	return 0;
485 }
486 
487 /*
488  * npf_nat_getlocal: return local IP address and port from translation entry.
489  */
490 void
491 npf_nat_getlocal(npf_nat_t *nt, in_addr_t *addr, in_port_t *port)
492 {
493 
494 	*addr = nt->nt_laddr;
495 	*port = nt->nt_lport;
496 }
497 
498 void
499 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg)
500 {
501 
502 	nt->nt_alg = alg;
503 	nt->nt_alg_arg = arg;
504 }
505 
506 /*
507  * npf_nat_expire: free NAT-related data structures on session expiration.
508  */
509 void
510 npf_nat_expire(npf_nat_t *nt)
511 {
512 
513 	if (nt->nt_tport) {
514 		npf_natpolicy_t *np = nt->nt_natpolicy;
515 		npf_nat_putport(np, nt->nt_tport);
516 	}
517 	pool_cache_put(nat_cache, nt);
518 }
519 
520 #if defined(DDB) || defined(_NPF_TESTING)
521 
522 void
523 npf_nat_dump(npf_nat_t *nt)
524 {
525 	npf_natpolicy_t *np;
526 	struct in_addr ip;
527 
528 	if (nt) {
529 		np = nt->nt_natpolicy;
530 		goto skip;
531 	}
532 	LIST_FOREACH(np, &nat_policy_list, n_entry) {
533 skip:
534 		ip.s_addr = np->n_gw_ip;
535 		printf("\tNAT policy: gw_ip = %s\n", inet_ntoa(ip));
536 		if (nt == NULL) {
537 			continue;
538 		}
539 		ip.s_addr = nt->nt_laddr;
540 		printf("\tNAT: original address %s, lport %d, tport = %d\n",
541 		    inet_ntoa(ip), ntohs(nt->nt_lport), ntohs(nt->nt_tport));
542 		if (nt->nt_alg) {
543 			printf("\tNAT ALG = %p, ARG = %p\n",
544 			    nt->nt_alg, (void *)nt->nt_alg_arg);
545 		}
546 		return;
547 	}
548 }
549 
550 #endif
551