1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/counter.h>
33 #include <sys/ck.h>
34 #include <sys/epoch.h>
35 #include <sys/errno.h>
36 #include <sys/hash.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/rmlock.h>
43 #include <sys/socket.h>
44 #include <sys/syslog.h>
45 #include <sys/sysctl.h>
46 
47 #include <net/if.h>
48 #include <net/if_var.h>
49 #include <net/if_pflog.h>
50 #include <net/pfil.h>
51 
52 #include <netinet/in.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_var.h>
55 #include <netinet/ip_fw.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <netinet/ip_icmp.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 #include <netinet6/in6_var.h>
62 #include <netinet6/ip6_var.h>
63 #include <netinet6/ip_fw_nat64.h>
64 
65 #include <netpfil/ipfw/ip_fw_private.h>
66 #include <netpfil/pf/pf.h>
67 
68 #include "nat64lsn.h"
69 
70 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
71 
72 #define	NAT64LSN_EPOCH_ENTER(et)  NET_EPOCH_ENTER(et)
73 #define	NAT64LSN_EPOCH_EXIT(et)   NET_EPOCH_EXIT(et)
74 #define	NAT64LSN_EPOCH_ASSERT()   NET_EPOCH_ASSERT()
75 #define	NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
76 
77 static uma_zone_t nat64lsn_host_zone;
78 static uma_zone_t nat64lsn_pgchunk_zone;
79 static uma_zone_t nat64lsn_pg_zone;
80 static uma_zone_t nat64lsn_aliaslink_zone;
81 static uma_zone_t nat64lsn_state_zone;
82 static uma_zone_t nat64lsn_job_zone;
83 
84 static void nat64lsn_periodic(void *data);
85 #define	PERIODIC_DELAY		4
86 #define	NAT64_LOOKUP(chain, cmd)	\
87 	(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
88 /*
89  * Delayed job queue, used to create new hosts
90  * and new portgroups
91  */
92 enum nat64lsn_jtype {
93 	JTYPE_NEWHOST = 1,
94 	JTYPE_NEWPORTGROUP,
95 	JTYPE_DESTROY,
96 };
97 
98 struct nat64lsn_job_item {
99 	STAILQ_ENTRY(nat64lsn_job_item)	entries;
100 	enum nat64lsn_jtype	jtype;
101 
102 	union {
103 		struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
104 			struct mbuf		*m;
105 			struct nat64lsn_host	*host;
106 			struct nat64lsn_state	*state;
107 			uint32_t		src6_hval;
108 			uint32_t		state_hval;
109 			struct ipfw_flow_id	f_id;
110 			in_addr_t		faddr;
111 			uint16_t		port;
112 			uint8_t			proto;
113 			uint8_t			done;
114 		};
115 		struct { /* used by JTYPE_DESTROY */
116 			struct nat64lsn_hosts_slist	hosts;
117 			struct nat64lsn_pg_slist	portgroups;
118 			struct nat64lsn_pgchunk		*pgchunk;
119 			struct epoch_context		epoch_ctx;
120 		};
121 	};
122 };
123 
124 static struct mtx jmtx;
125 #define	JQUEUE_LOCK_INIT()	mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
126 #define	JQUEUE_LOCK_DESTROY()	mtx_destroy(&jmtx)
127 #define	JQUEUE_LOCK()		mtx_lock(&jmtx)
128 #define	JQUEUE_UNLOCK()		mtx_unlock(&jmtx)
129 
130 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
131     struct nat64lsn_job_item *ji);
132 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
133     struct nat64lsn_job_item *ji);
134 static struct nat64lsn_job_item *nat64lsn_create_job(
135     struct nat64lsn_cfg *cfg, int jtype);
136 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
137     struct nat64lsn_job_item *ji);
138 static void nat64lsn_job_destroy(epoch_context_t ctx);
139 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
140 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
141 
142 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
143     const struct ipfw_flow_id *f_id, struct mbuf **mp);
144 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
145     struct ipfw_flow_id *f_id, struct mbuf **mp);
146 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
147     struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
148 
149 #define	NAT64_BIT_TCP_FIN	0	/* FIN was seen */
150 #define	NAT64_BIT_TCP_SYN	1	/* First syn in->out */
151 #define	NAT64_BIT_TCP_ESTAB	2	/* Packet with Ack */
152 #define	NAT64_BIT_READY_IPV4	6	/* state is ready for translate4 */
153 #define	NAT64_BIT_STALE		7	/* state is going to be expired */
154 
155 #define	NAT64_FLAG_FIN		(1 << NAT64_BIT_TCP_FIN)
156 #define	NAT64_FLAG_SYN		(1 << NAT64_BIT_TCP_SYN)
157 #define	NAT64_FLAG_ESTAB	(1 << NAT64_BIT_TCP_ESTAB)
158 #define	NAT64_FLAGS_TCP	(NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
159 
160 #define	NAT64_FLAG_READY	(1 << NAT64_BIT_READY_IPV4)
161 #define	NAT64_FLAG_STALE	(1 << NAT64_BIT_STALE)
162 
163 static inline uint8_t
convert_tcp_flags(uint8_t flags)164 convert_tcp_flags(uint8_t flags)
165 {
166 	uint8_t result;
167 
168 	result = flags & (TH_FIN|TH_SYN);
169 	result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
170 	result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
171 
172 	return (result);
173 }
174 
175 static void
nat64lsn_log(struct pfloghdr * plog,struct mbuf * m,sa_family_t family,struct nat64lsn_state * state)176 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
177     struct nat64lsn_state *state)
178 {
179 
180 	memset(plog, 0, sizeof(*plog));
181 	plog->length = PFLOG_HDRLEN;
182 	plog->af = family;
183 	plog->action = PF_NAT;
184 	plog->dir = PF_IN;
185 	plog->rulenr = htonl(state->ip_src);
186 	plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
187 	    (state->proto << 8) | (state->ip_dst & 0xff));
188 	plog->ruleset[0] = '\0';
189 	strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
190 	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
191 }
192 
193 #define	HVAL(p, n, s)	jenkins_hash32((const uint32_t *)(p), (n), (s))
194 #define	HOST_HVAL(c, a)	HVAL((a),\
195     sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
196 #define	HOSTS(c, v)	((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
197 
198 #define	ALIASLINK_HVAL(c, f)	HVAL(&(f)->dst_ip6,\
199     sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
200 #define	ALIAS_BYHASH(c, v)	\
201     ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
202 static struct nat64lsn_aliaslink*
nat64lsn_get_aliaslink(struct nat64lsn_cfg * cfg __unused,struct nat64lsn_host * host,const struct ipfw_flow_id * f_id __unused)203 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
204     struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
205 {
206 
207 	/*
208 	 * We can implement some different algorithms how
209 	 * select an alias address.
210 	 * XXX: for now we use first available.
211 	 */
212 	return (CK_SLIST_FIRST(&host->aliases));
213 }
214 
215 #define	STATE_HVAL(c, d)	HVAL((d), 2, (c)->hash_seed)
216 #define	STATE_HASH(h, v)	\
217     ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
218 #define	STATES_CHUNK(p, v)	\
219     ((p)->chunks_count == 1 ? (p)->states : \
220 	((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
221 
222 #ifdef __LP64__
223 #define	FREEMASK_FFSLL(pg, faddr)		\
224     ffsll(*FREEMASK_CHUNK((pg), (faddr)))
225 #define	FREEMASK_BTR(pg, faddr, bit)	\
226     ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
227 #define	FREEMASK_BTS(pg, faddr, bit)	\
228     ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
229 #define	FREEMASK_ISSET(pg, faddr, bit)	\
230     ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
231 #define	FREEMASK_COPY(pg, n, out)	\
232     (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
233 #else
234 static inline int
freemask_ffsll(uint32_t * freemask)235 freemask_ffsll(uint32_t *freemask)
236 {
237 	int i;
238 
239 	if ((i = ffsl(freemask[0])) != 0)
240 		return (i);
241 	if ((i = ffsl(freemask[1])) != 0)
242 		return (i + 32);
243 	return (0);
244 }
245 #define	FREEMASK_FFSLL(pg, faddr)		\
246     freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
247 #define	FREEMASK_BTR(pg, faddr, bit)	\
248     ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
249 #define	FREEMASK_BTS(pg, faddr, bit)	\
250     ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
251 #define	FREEMASK_ISSET(pg, faddr, bit)	\
252     ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
253 #define	FREEMASK_COPY(pg, n, out)	\
254     (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
255 	((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
256 #endif /* !__LP64__ */
257 
258 #define	NAT64LSN_TRY_PGCNT	32
259 static struct nat64lsn_pg*
nat64lsn_get_pg(uint32_t * chunkmask,uint32_t * pgmask,struct nat64lsn_pgchunk ** chunks,struct nat64lsn_pg ** pgptr,uint32_t * pgidx,in_addr_t faddr)260 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
261     struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
262     uint32_t *pgidx, in_addr_t faddr)
263 {
264 	struct nat64lsn_pg *pg, *oldpg;
265 	uint32_t idx, oldidx;
266 	int cnt;
267 
268 	cnt = 0;
269 	/* First try last used PG */
270 	oldpg = pg = ck_pr_load_ptr(pgptr);
271 	idx = oldidx = ck_pr_load_32(pgidx);
272 	/* If pgidx is out of range, reset it to the first pgchunk */
273 	if (!ISSET32(*chunkmask, idx / 32))
274 		idx = 0;
275 	do {
276 		ck_pr_fence_load();
277 		if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
278 			/*
279 			 * If last used PG has not free states,
280 			 * try to update pointer.
281 			 * NOTE: it can be already updated by jobs handler,
282 			 *	 thus we use CAS operation.
283 			 */
284 			if (cnt > 0)
285 				ck_pr_cas_ptr(pgptr, oldpg, pg);
286 			return (pg);
287 		}
288 		/* Stop if idx is out of range */
289 		if (!ISSET32(*chunkmask, idx / 32))
290 			break;
291 
292 		if (ISSET32(pgmask[idx / 32], idx % 32))
293 			pg = ck_pr_load_ptr(
294 			    &chunks[idx / 32]->pgptr[idx % 32]);
295 		else
296 			pg = NULL;
297 
298 		idx++;
299 	} while (++cnt < NAT64LSN_TRY_PGCNT);
300 
301 	/* If pgidx is out of range, reset it to the first pgchunk */
302 	if (!ISSET32(*chunkmask, idx / 32))
303 		idx = 0;
304 	ck_pr_cas_32(pgidx, oldidx, idx);
305 	return (NULL);
306 }
307 
308 static struct nat64lsn_state*
nat64lsn_get_state6to4(struct nat64lsn_cfg * cfg,struct nat64lsn_host * host,const struct ipfw_flow_id * f_id,uint32_t hval,in_addr_t faddr,uint16_t port,uint8_t proto)309 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
310     const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
311     uint16_t port, uint8_t proto)
312 {
313 	struct nat64lsn_aliaslink *link;
314 	struct nat64lsn_state *state;
315 	struct nat64lsn_pg *pg;
316 	int i, offset;
317 
318 	NAT64LSN_EPOCH_ASSERT();
319 
320 	/* Check that we already have state for given arguments */
321 	CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
322 		if (state->proto == proto && state->ip_dst == faddr &&
323 		    state->sport == port && state->dport == f_id->dst_port)
324 			return (state);
325 	}
326 
327 	link = nat64lsn_get_aliaslink(cfg, host, f_id);
328 	if (link == NULL)
329 		return (NULL);
330 
331 	switch (proto) {
332 	case IPPROTO_TCP:
333 		pg = nat64lsn_get_pg(
334 		    &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
335 		    link->alias->tcp, &link->alias->tcp_pg,
336 		    &link->alias->tcp_pgidx, faddr);
337 		break;
338 	case IPPROTO_UDP:
339 		pg = nat64lsn_get_pg(
340 		    &link->alias->udp_chunkmask, link->alias->udp_pgmask,
341 		    link->alias->udp, &link->alias->udp_pg,
342 		    &link->alias->udp_pgidx, faddr);
343 		break;
344 	case IPPROTO_ICMP:
345 		pg = nat64lsn_get_pg(
346 		    &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
347 		    link->alias->icmp, &link->alias->icmp_pg,
348 		    &link->alias->icmp_pgidx, faddr);
349 		break;
350 	default:
351 		panic("%s: wrong proto %d", __func__, proto);
352 	}
353 	if (pg == NULL)
354 		return (NULL);
355 
356 	/* Check that PG has some free states */
357 	state = NULL;
358 	i = FREEMASK_BITCOUNT(pg, faddr);
359 	while (i-- > 0) {
360 		offset = FREEMASK_FFSLL(pg, faddr);
361 		if (offset == 0) {
362 			/*
363 			 * We lost the race.
364 			 * No more free states in this PG.
365 			 */
366 			break;
367 		}
368 
369 		/* Lets try to atomically grab the state */
370 		if (FREEMASK_BTR(pg, faddr, offset - 1)) {
371 			state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
372 			/* Initialize */
373 			state->flags = proto != IPPROTO_TCP ? 0 :
374 			    convert_tcp_flags(f_id->_flags);
375 			state->proto = proto;
376 			state->aport = pg->base_port + offset - 1;
377 			state->dport = f_id->dst_port;
378 			state->sport = port;
379 			state->ip6_dst = f_id->dst_ip6;
380 			state->ip_dst = faddr;
381 			state->ip_src = link->alias->addr;
382 			state->hval = hval;
383 			state->host = host;
384 			SET_AGE(state->timestamp);
385 
386 			/* Insert new state into host's hash table */
387 			HOST_LOCK(host);
388 			CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
389 			    state, entries);
390 			host->states_count++;
391 			/*
392 			 * XXX: In case if host is going to be expired,
393 			 * reset NAT64LSN_DEADHOST flag.
394 			 */
395 			host->flags &= ~NAT64LSN_DEADHOST;
396 			HOST_UNLOCK(host);
397 			NAT64STAT_INC(&cfg->base.stats, screated);
398 			/* Mark the state as ready for translate4 */
399 			ck_pr_fence_store();
400 			ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
401 			break;
402 		}
403 	}
404 	return (state);
405 }
406 
407 /*
408  * Inspects icmp packets to see if the message contains different
409  * packet header so we need to alter @addr and @port.
410  */
411 static int
inspect_icmp_mbuf(struct mbuf ** mp,uint8_t * proto,uint32_t * addr,uint16_t * port)412 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
413     uint16_t *port)
414 {
415 	struct icmp *icmp;
416 	struct ip *ip;
417 	int off;
418 	uint8_t inner_proto;
419 
420 	ip = mtod(*mp, struct ip *); /* Outer IP header */
421 	off = (ip->ip_hl << 2) + ICMP_MINLEN;
422 	if ((*mp)->m_len < off)
423 		*mp = m_pullup(*mp, off);
424 	if (*mp == NULL)
425 		return (ENOMEM);
426 
427 	ip = mtod(*mp, struct ip *); /* Outer IP header */
428 	icmp = L3HDR(ip, struct icmp *);
429 	switch (icmp->icmp_type) {
430 	case ICMP_ECHO:
431 	case ICMP_ECHOREPLY:
432 		/* Use icmp ID as distinguisher */
433 		*port = ntohs(icmp->icmp_id);
434 		return (0);
435 	case ICMP_UNREACH:
436 	case ICMP_TIMXCEED:
437 		break;
438 	default:
439 		return (EOPNOTSUPP);
440 	}
441 	/*
442 	 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
443 	 * of ULP header.
444 	 */
445 	if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
446 		return (EINVAL);
447 	if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
448 		*mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
449 	if (*mp == NULL)
450 		return (ENOMEM);
451 	ip = mtodo(*mp, off); /* Inner IP header */
452 	inner_proto = ip->ip_p;
453 	off += ip->ip_hl << 2; /* Skip inner IP header */
454 	*addr = ntohl(ip->ip_src.s_addr);
455 	if ((*mp)->m_len < off + ICMP_MINLEN)
456 		*mp = m_pullup(*mp, off + ICMP_MINLEN);
457 	if (*mp == NULL)
458 		return (ENOMEM);
459 	switch (inner_proto) {
460 	case IPPROTO_TCP:
461 	case IPPROTO_UDP:
462 		/* Copy source port from the header */
463 		*port = ntohs(*((uint16_t *)mtodo(*mp, off)));
464 		*proto = inner_proto;
465 		return (0);
466 	case IPPROTO_ICMP:
467 		/*
468 		 * We will translate only ICMP errors for our ICMP
469 		 * echo requests.
470 		 */
471 		icmp = mtodo(*mp, off);
472 		if (icmp->icmp_type != ICMP_ECHO)
473 			return (EOPNOTSUPP);
474 		*port = ntohs(icmp->icmp_id);
475 		return (0);
476 	};
477 	return (EOPNOTSUPP);
478 }
479 
480 static struct nat64lsn_state*
nat64lsn_get_state4to6(struct nat64lsn_cfg * cfg,struct nat64lsn_alias * alias,in_addr_t faddr,uint16_t port,uint8_t proto)481 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
482     in_addr_t faddr, uint16_t port, uint8_t proto)
483 {
484 	struct nat64lsn_state *state;
485 	struct nat64lsn_pg *pg;
486 	int chunk_idx, pg_idx, state_idx;
487 
488 	NAT64LSN_EPOCH_ASSERT();
489 
490 	if (port < NAT64_MIN_PORT)
491 		return (NULL);
492 	/*
493 	 * Alias keeps 32 pgchunks for each protocol.
494 	 * Each pgchunk has 32 pointers to portgroup.
495 	 * Each portgroup has 64 states for ports.
496 	 */
497 	port -= NAT64_MIN_PORT;
498 	chunk_idx = port / 2048;
499 
500 	port -= chunk_idx * 2048;
501 	pg_idx = port / 64;
502 	state_idx = port % 64;
503 
504 	/*
505 	 * First check in proto_chunkmask that we have allocated PG chunk.
506 	 * Then check in proto_pgmask that we have valid PG pointer.
507 	 */
508 	pg = NULL;
509 	switch (proto) {
510 	case IPPROTO_TCP:
511 		if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
512 		    ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
513 			pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
514 			break;
515 		}
516 		return (NULL);
517 	case IPPROTO_UDP:
518 		if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
519 		    ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
520 			pg = alias->udp[chunk_idx]->pgptr[pg_idx];
521 			break;
522 		}
523 		return (NULL);
524 	case IPPROTO_ICMP:
525 		if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
526 		    ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
527 			pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
528 			break;
529 		}
530 		return (NULL);
531 	default:
532 		panic("%s: wrong proto %d", __func__, proto);
533 	}
534 	if (pg == NULL)
535 		return (NULL);
536 
537 	if (FREEMASK_ISSET(pg, faddr, state_idx))
538 		return (NULL);
539 
540 	state = &STATES_CHUNK(pg, faddr)->state[state_idx];
541 	ck_pr_fence_load();
542 	if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
543 		return (state);
544 	return (NULL);
545 }
546 
547 /*
548  * Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields
549  * that might be unknown until reassembling is completed.
550  */
551 static struct mbuf*
nat64lsn_reassemble4(struct nat64lsn_cfg * cfg,struct mbuf * m,uint16_t * port)552 nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
553     uint16_t *port)
554 {
555 	struct ip *ip;
556 	int len;
557 
558 	m = ip_reass(m);
559 	if (m == NULL)
560 		return (NULL);
561 	/* IP header must be contigious after ip_reass() */
562 	ip = mtod(m, struct ip *);
563 	len = ip->ip_hl << 2;
564 	switch (ip->ip_p) {
565 	case IPPROTO_ICMP:
566 		len += ICMP_MINLEN; /* Enough to get icmp_id */
567 		break;
568 	case IPPROTO_TCP:
569 		len += sizeof(struct tcphdr);
570 		break;
571 	case IPPROTO_UDP:
572 		len += sizeof(struct udphdr);
573 		break;
574 	default:
575 		m_freem(m);
576 		NAT64STAT_INC(&cfg->base.stats, noproto);
577 		return (NULL);
578 	}
579 	if (m->m_len < len) {
580 		m = m_pullup(m, len);
581 		if (m == NULL) {
582 			NAT64STAT_INC(&cfg->base.stats, nomem);
583 			return (NULL);
584 		}
585 		ip = mtod(m, struct ip *);
586 	}
587 	switch (ip->ip_p) {
588 	case IPPROTO_TCP:
589 		*port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
590 		break;
591 	case IPPROTO_UDP:
592 		*port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
593 		break;
594 	}
595 	return (m);
596 }
597 
598 static int
nat64lsn_translate4(struct nat64lsn_cfg * cfg,const struct ipfw_flow_id * f_id,struct mbuf ** mp)599 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
600     const struct ipfw_flow_id *f_id, struct mbuf **mp)
601 {
602 	struct pfloghdr loghdr, *logdata;
603 	struct in6_addr src6;
604 	struct nat64lsn_state *state;
605 	struct nat64lsn_alias *alias;
606 	uint32_t addr, flags;
607 	uint16_t port, ts;
608 	int ret;
609 	uint8_t proto;
610 
611 	addr = f_id->dst_ip;
612 	port = f_id->dst_port;
613 	proto = f_id->proto;
614 	if (addr < cfg->prefix4 || addr > cfg->pmask4) {
615 		NAT64STAT_INC(&cfg->base.stats, nomatch4);
616 		return (cfg->nomatch_verdict);
617 	}
618 
619 	/* Reassemble fragments if needed */
620 	ret = ntohs(mtod(*mp, struct ip *)->ip_off);
621 	if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
622 		*mp = nat64lsn_reassemble4(cfg, *mp, &port);
623 		if (*mp == NULL)
624 			return (IP_FW_DENY);
625 	}
626 
627 	/* Check if protocol is supported */
628 	switch (proto) {
629 	case IPPROTO_ICMP:
630 		ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
631 		if (ret != 0) {
632 			if (ret == ENOMEM) {
633 				NAT64STAT_INC(&cfg->base.stats, nomem);
634 				return (IP_FW_DENY);
635 			}
636 			NAT64STAT_INC(&cfg->base.stats, noproto);
637 			return (cfg->nomatch_verdict);
638 		}
639 		if (addr < cfg->prefix4 || addr > cfg->pmask4) {
640 			NAT64STAT_INC(&cfg->base.stats, nomatch4);
641 			return (cfg->nomatch_verdict);
642 		}
643 		/* FALLTHROUGH */
644 	case IPPROTO_TCP:
645 	case IPPROTO_UDP:
646 		break;
647 	default:
648 		NAT64STAT_INC(&cfg->base.stats, noproto);
649 		return (cfg->nomatch_verdict);
650 	}
651 
652 	alias = &ALIAS_BYHASH(cfg, addr);
653 	MPASS(addr == alias->addr);
654 
655 	/* Check that we have state for this port */
656 	state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
657 	    port, proto);
658 	if (state == NULL) {
659 		NAT64STAT_INC(&cfg->base.stats, nomatch4);
660 		return (cfg->nomatch_verdict);
661 	}
662 
663 	/* TODO: Check flags to see if we need to do some static mapping */
664 
665 	/* Update some state fields if need */
666 	SET_AGE(ts);
667 	if (f_id->proto == IPPROTO_TCP)
668 		flags = convert_tcp_flags(f_id->_flags);
669 	else
670 		flags = 0;
671 	if (state->timestamp != ts)
672 		state->timestamp = ts;
673 	if ((state->flags & flags) != flags)
674 		state->flags |= flags;
675 
676 	port = htons(state->sport);
677 	src6 = state->ip6_dst;
678 
679 	if (cfg->base.flags & NAT64_LOG) {
680 		logdata = &loghdr;
681 		nat64lsn_log(logdata, *mp, AF_INET, state);
682 	} else
683 		logdata = NULL;
684 
685 	/*
686 	 * We already have src6 with embedded address, but it is possible,
687 	 * that src_ip is different than state->ip_dst, this is why we
688 	 * do embedding again.
689 	 */
690 	nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
691 	ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
692 	    &cfg->base, logdata);
693 	if (ret == NAT64SKIP)
694 		return (cfg->nomatch_verdict);
695 	if (ret == NAT64RETURN)
696 		*mp = NULL;
697 	return (IP_FW_DENY);
698 }
699 
700 /*
701  * Check if particular state is stale and should be deleted.
702  * Return 1 if true, 0 otherwise.
703  */
704 static int
nat64lsn_check_state(struct nat64lsn_cfg * cfg,struct nat64lsn_state * state)705 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
706 {
707 	int age, ttl;
708 
709 	/* State was marked as stale in previous pass. */
710 	if (ISSET32(state->flags, NAT64_BIT_STALE))
711 		return (1);
712 
713 	/* State is not yet initialized, it is going to be READY */
714 	if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
715 		return (0);
716 
717 	age = GET_AGE(state->timestamp);
718 	switch (state->proto) {
719 	case IPPROTO_TCP:
720 		if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
721 			ttl = cfg->st_close_ttl;
722 		else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
723 			ttl = cfg->st_estab_ttl;
724 		else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
725 			ttl = cfg->st_syn_ttl;
726 		else
727 			ttl = cfg->st_syn_ttl;
728 		if (age > ttl)
729 			return (1);
730 		break;
731 	case IPPROTO_UDP:
732 		if (age > cfg->st_udp_ttl)
733 			return (1);
734 		break;
735 	case IPPROTO_ICMP:
736 		if (age > cfg->st_icmp_ttl)
737 			return (1);
738 		break;
739 	}
740 	return (0);
741 }
742 
743 static int
nat64lsn_maintain_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_pg * pg)744 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
745 {
746 	struct nat64lsn_state *state;
747 	struct nat64lsn_host *host;
748 	uint64_t freemask;
749 	int c, i, update_age;
750 
751 	update_age = 0;
752 	for (c = 0; c < pg->chunks_count; c++) {
753 		FREEMASK_COPY(pg, c, freemask);
754 		for (i = 0; i < 64; i++) {
755 			if (ISSET64(freemask, i))
756 				continue;
757 			state = &STATES_CHUNK(pg, c)->state[i];
758 			if (nat64lsn_check_state(cfg, state) == 0) {
759 				update_age = 1;
760 				continue;
761 			}
762 			/*
763 			 * Expire state:
764 			 * 1. Mark as STALE and unlink from host's hash.
765 			 * 2. Set bit in freemask.
766 			 */
767 			if (ISSET32(state->flags, NAT64_BIT_STALE)) {
768 				/*
769 				 * State was marked as STALE in previous
770 				 * pass. Now it is safe to release it.
771 				 */
772 				state->flags = 0;
773 				ck_pr_fence_store();
774 				FREEMASK_BTS(pg, c, i);
775 				NAT64STAT_INC(&cfg->base.stats, sdeleted);
776 				continue;
777 			}
778 			MPASS(state->flags & NAT64_FLAG_READY);
779 
780 			host = state->host;
781 			HOST_LOCK(host);
782 			CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
783 			    state, nat64lsn_state, entries);
784 			host->states_count--;
785 			HOST_UNLOCK(host);
786 
787 			/* Reset READY flag */
788 			ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
789 			/* And set STALE flag */
790 			ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
791 			ck_pr_fence_store();
792 			/*
793 			 * Now translate6 will not use this state, wait
794 			 * until it become safe for translate4, then mark
795 			 * state as free.
796 			 */
797 		}
798 	}
799 
800 	/*
801 	 * We have some alive states, update timestamp.
802 	 */
803 	if (update_age)
804 		SET_AGE(pg->timestamp);
805 
806 	if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
807 		return (0);
808 
809 	return (1);
810 }
811 
812 static void
nat64lsn_expire_portgroups(struct nat64lsn_cfg * cfg,struct nat64lsn_pg_slist * portgroups)813 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
814     struct nat64lsn_pg_slist *portgroups)
815 {
816 	struct nat64lsn_alias *alias;
817 	struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
818 	uint32_t *pgmask, *pgidx;
819 	int i, idx;
820 
821 	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
822 		alias = &cfg->aliases[i];
823 		CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
824 			if (nat64lsn_maintain_pg(cfg, pg) == 0)
825 				continue;
826 			/* Always keep first PG */
827 			if (pg->base_port == NAT64_MIN_PORT)
828 				continue;
829 			/*
830 			 * PG is expired, unlink it and schedule for
831 			 * deferred destroying.
832 			 */
833 			idx = (pg->base_port - NAT64_MIN_PORT) / 64;
834 			switch (pg->proto) {
835 			case IPPROTO_TCP:
836 				pgmask = alias->tcp_pgmask;
837 				pgptr = &alias->tcp_pg;
838 				pgidx = &alias->tcp_pgidx;
839 				firstpg = alias->tcp[0]->pgptr[0];
840 				break;
841 			case IPPROTO_UDP:
842 				pgmask = alias->udp_pgmask;
843 				pgptr = &alias->udp_pg;
844 				pgidx = &alias->udp_pgidx;
845 				firstpg = alias->udp[0]->pgptr[0];
846 				break;
847 			case IPPROTO_ICMP:
848 				pgmask = alias->icmp_pgmask;
849 				pgptr = &alias->icmp_pg;
850 				pgidx = &alias->icmp_pgidx;
851 				firstpg = alias->icmp[0]->pgptr[0];
852 				break;
853 			}
854 			/* Reset the corresponding bit in pgmask array. */
855 			ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
856 			ck_pr_fence_store();
857 			/* If last used PG points to this PG, reset it. */
858 			ck_pr_cas_ptr(pgptr, pg, firstpg);
859 			ck_pr_cas_32(pgidx, idx, 0);
860 			/* Unlink PG from alias's chain */
861 			ALIAS_LOCK(alias);
862 			CK_SLIST_REMOVE(&alias->portgroups, pg,
863 			    nat64lsn_pg, entries);
864 			alias->portgroups_count--;
865 			ALIAS_UNLOCK(alias);
866 			/* And link to job's chain for deferred destroying */
867 			NAT64STAT_INC(&cfg->base.stats, spgdeleted);
868 			CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
869 		}
870 	}
871 }
872 
873 static void
nat64lsn_expire_hosts(struct nat64lsn_cfg * cfg,struct nat64lsn_hosts_slist * hosts)874 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
875     struct nat64lsn_hosts_slist *hosts)
876 {
877 	struct nat64lsn_host *host, *tmp;
878 	int i;
879 
880 	for (i = 0; i < cfg->hosts_hashsize; i++) {
881 		CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
882 		    entries, tmp) {
883 			/* Is host was marked in previous call? */
884 			if (host->flags & NAT64LSN_DEADHOST) {
885 				if (host->states_count > 0) {
886 					host->flags &= ~NAT64LSN_DEADHOST;
887 					continue;
888 				}
889 				/*
890 				 * Unlink host from hash table and schedule
891 				 * it for deferred destroying.
892 				 */
893 				CFG_LOCK(cfg);
894 				CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
895 				    nat64lsn_host, entries);
896 				cfg->hosts_count--;
897 				CFG_UNLOCK(cfg);
898 				CK_SLIST_INSERT_HEAD(hosts, host, entries);
899 				continue;
900 			}
901 			if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
902 				continue;
903 			if (host->states_count > 0)
904 				continue;
905 			/* Mark host as going to be expired in next pass */
906 			host->flags |= NAT64LSN_DEADHOST;
907 			ck_pr_fence_store();
908 		}
909 	}
910 }
911 
912 static struct nat64lsn_pgchunk*
nat64lsn_expire_pgchunk(struct nat64lsn_cfg * cfg)913 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
914 {
915 #if 0
916 	struct nat64lsn_alias *alias;
917 	struct nat64lsn_pgchunk *chunk;
918 	uint32_t pgmask;
919 	int i, c;
920 
921 	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
922 		alias = &cfg->aliases[i];
923 		if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
924 			continue;
925 		/* Always keep single chunk allocated */
926 		for (c = 1; c < 32; c++) {
927 			if ((alias->tcp_chunkmask & (1 << c)) == 0)
928 				break;
929 			chunk = ck_pr_load_ptr(&alias->tcp[c]);
930 			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
931 				continue;
932 			ck_pr_btr_32(&alias->tcp_chunkmask, c);
933 			ck_pr_fence_load();
934 			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
935 				continue;
936 		}
937 	}
938 #endif
939 	return (NULL);
940 }
941 
942 #if 0
943 static void
944 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
945 {
946 	struct nat64lsn_host *h;
947 	struct nat64lsn_states_slist *hash;
948 	int i, j, hsize;
949 
950 	for (i = 0; i < cfg->hosts_hashsize; i++) {
951 		CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
952 			 if (h->states_count / 2 < h->states_hashsize ||
953 			     h->states_hashsize >= NAT64LSN_MAX_HSIZE)
954 				 continue;
955 			 hsize = h->states_hashsize * 2;
956 			 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
957 			 if (hash == NULL)
958 				 continue;
959 			 for (j = 0; j < hsize; j++)
960 				CK_SLIST_INIT(&hash[i]);
961 
962 			 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
963 		}
964 	}
965 }
966 #endif
967 
968 /*
969  * This procedure is used to perform various maintenance
970  * on dynamic hash list. Currently it is called every 4 seconds.
971  */
972 static void
nat64lsn_periodic(void * data)973 nat64lsn_periodic(void *data)
974 {
975 	struct nat64lsn_job_item *ji;
976 	struct nat64lsn_cfg *cfg;
977 
978 	cfg = (struct nat64lsn_cfg *) data;
979 	CURVNET_SET(cfg->vp);
980 	if (cfg->hosts_count > 0) {
981 		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
982 		if (ji != NULL) {
983 			ji->jtype = JTYPE_DESTROY;
984 			CK_SLIST_INIT(&ji->hosts);
985 			CK_SLIST_INIT(&ji->portgroups);
986 			nat64lsn_expire_hosts(cfg, &ji->hosts);
987 			nat64lsn_expire_portgroups(cfg, &ji->portgroups);
988 			ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
989 			NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
990 			    nat64lsn_job_destroy);
991 		} else
992 			NAT64STAT_INC(&cfg->base.stats, jnomem);
993 	}
994 	callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
995 	CURVNET_RESTORE();
996 }
997 
998 #define	ALLOC_ERROR(stage, type)	((stage) ? 10 * (type) + (stage): 0)
999 #define	HOST_ERROR(stage)		ALLOC_ERROR(stage, 1)
1000 #define	PG_ERROR(stage)			ALLOC_ERROR(stage, 2)
1001 static int
nat64lsn_alloc_host(struct nat64lsn_cfg * cfg,struct nat64lsn_job_item * ji)1002 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1003 {
1004 	char a[INET6_ADDRSTRLEN];
1005 	struct nat64lsn_aliaslink *link;
1006 	struct nat64lsn_host *host;
1007 	struct nat64lsn_state *state;
1008 	uint32_t hval, data[2];
1009 	int i;
1010 
1011 	/* Check that host was not yet added. */
1012 	NAT64LSN_EPOCH_ASSERT();
1013 	CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
1014 		if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
1015 			/* The host was allocated in previous call. */
1016 			ji->host = host;
1017 			goto get_state;
1018 		}
1019 	}
1020 
1021 	host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
1022 	if (ji->host == NULL)
1023 		return (HOST_ERROR(1));
1024 
1025 	host->states_hashsize = NAT64LSN_HSIZE;
1026 	host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
1027 	    host->states_hashsize, M_NAT64LSN, M_NOWAIT);
1028 	if (host->states_hash == NULL) {
1029 		uma_zfree(nat64lsn_host_zone, host);
1030 		return (HOST_ERROR(2));
1031 	}
1032 
1033 	link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
1034 	if (link == NULL) {
1035 		free(host->states_hash, M_NAT64LSN);
1036 		uma_zfree(nat64lsn_host_zone, host);
1037 		return (HOST_ERROR(3));
1038 	}
1039 
1040 	/* Initialize */
1041 	HOST_LOCK_INIT(host);
1042 	SET_AGE(host->timestamp);
1043 	host->addr = ji->f_id.src_ip6;
1044 	host->hval = ji->src6_hval;
1045 	host->flags = 0;
1046 	host->states_count = 0;
1047 	host->states_hashsize = NAT64LSN_HSIZE;
1048 	CK_SLIST_INIT(&host->aliases);
1049 	for (i = 0; i < host->states_hashsize; i++)
1050 		CK_SLIST_INIT(&host->states_hash[i]);
1051 
1052 	/* Determine alias from flow hash. */
1053 	hval = ALIASLINK_HVAL(cfg, &ji->f_id);
1054 	link->alias = &ALIAS_BYHASH(cfg, hval);
1055 	CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1056 
1057 	ALIAS_LOCK(link->alias);
1058 	CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1059 	link->alias->hosts_count++;
1060 	ALIAS_UNLOCK(link->alias);
1061 
1062 	CFG_LOCK(cfg);
1063 	CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1064 	cfg->hosts_count++;
1065 	CFG_UNLOCK(cfg);
1066 
1067 get_state:
1068 	data[0] = ji->faddr;
1069 	data[1] = (ji->f_id.dst_port << 16) | ji->port;
1070 	ji->state_hval = hval = STATE_HVAL(cfg, data);
1071 	state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1072 	    ji->faddr, ji->port, ji->proto);
1073 	/*
1074 	 * We failed to obtain new state, used alias needs new PG.
1075 	 * XXX: or another alias should be used.
1076 	 */
1077 	if (state == NULL) {
1078 		/* Try to allocate new PG */
1079 		if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1080 			return (HOST_ERROR(4));
1081 		/* We assume that nat64lsn_alloc_pg() got state */
1082 	} else
1083 		ji->state = state;
1084 
1085 	ji->done = 1;
1086 	DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1087 	    inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1088 	return (HOST_ERROR(0));
1089 }
1090 
1091 static int
nat64lsn_find_pg_place(uint32_t * data)1092 nat64lsn_find_pg_place(uint32_t *data)
1093 {
1094 	int i;
1095 
1096 	for (i = 0; i < 32; i++) {
1097 		if (~data[i] == 0)
1098 			continue;
1099 		return (i * 32 + ffs(~data[i]) - 1);
1100 	}
1101 	return (-1);
1102 }
1103 
1104 static int
nat64lsn_alloc_proto_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_alias * alias,uint32_t * chunkmask,uint32_t * pgmask,struct nat64lsn_pgchunk ** chunks,struct nat64lsn_pg ** pgptr,uint8_t proto)1105 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1106     struct nat64lsn_alias *alias, uint32_t *chunkmask,
1107     uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1108     struct nat64lsn_pg **pgptr, uint8_t proto)
1109 {
1110 	struct nat64lsn_pg *pg;
1111 	int i, pg_idx, chunk_idx;
1112 
1113 	/* Find place in pgchunk where PG can be added */
1114 	pg_idx = nat64lsn_find_pg_place(pgmask);
1115 	if (pg_idx < 0)	/* no more PGs */
1116 		return (PG_ERROR(1));
1117 	/* Check that we have allocated pgchunk for given PG index */
1118 	chunk_idx = pg_idx / 32;
1119 	if (!ISSET32(*chunkmask, chunk_idx)) {
1120 		chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1121 		    M_NOWAIT);
1122 		if (chunks[chunk_idx] == NULL)
1123 			return (PG_ERROR(2));
1124 		ck_pr_bts_32(chunkmask, chunk_idx);
1125 		ck_pr_fence_store();
1126 	}
1127 	/* Allocate PG and states chunks */
1128 	pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1129 	if (pg == NULL)
1130 		return (PG_ERROR(3));
1131 	pg->chunks_count = cfg->states_chunks;
1132 	if (pg->chunks_count > 1) {
1133 		pg->freemask_chunk = malloc(pg->chunks_count *
1134 		    sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1135 		if (pg->freemask_chunk == NULL) {
1136 			uma_zfree(nat64lsn_pg_zone, pg);
1137 			return (PG_ERROR(4));
1138 		}
1139 		pg->states_chunk = malloc(pg->chunks_count *
1140 		    sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1141 		    M_NOWAIT | M_ZERO);
1142 		if (pg->states_chunk == NULL) {
1143 			free(pg->freemask_chunk, M_NAT64LSN);
1144 			uma_zfree(nat64lsn_pg_zone, pg);
1145 			return (PG_ERROR(5));
1146 		}
1147 		for (i = 0; i < pg->chunks_count; i++) {
1148 			pg->states_chunk[i] = uma_zalloc(
1149 			    nat64lsn_state_zone, M_NOWAIT);
1150 			if (pg->states_chunk[i] == NULL)
1151 				goto states_failed;
1152 		}
1153 		memset(pg->freemask_chunk, 0xff,
1154 		    sizeof(uint64_t) * pg->chunks_count);
1155 	} else {
1156 		pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1157 		if (pg->states == NULL) {
1158 			uma_zfree(nat64lsn_pg_zone, pg);
1159 			return (PG_ERROR(6));
1160 		}
1161 		memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1162 	}
1163 
1164 	/* Initialize PG and hook it to pgchunk */
1165 	SET_AGE(pg->timestamp);
1166 	pg->proto = proto;
1167 	pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1168 	ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1169 	ck_pr_fence_store();
1170 	ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1171 	ck_pr_store_ptr(pgptr, pg);
1172 
1173 	ALIAS_LOCK(alias);
1174 	CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1175 	SET_AGE(alias->timestamp);
1176 	alias->portgroups_count++;
1177 	ALIAS_UNLOCK(alias);
1178 	NAT64STAT_INC(&cfg->base.stats, spgcreated);
1179 	return (PG_ERROR(0));
1180 
1181 states_failed:
1182 	for (i = 0; i < pg->chunks_count; i++)
1183 		uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1184 	free(pg->freemask_chunk, M_NAT64LSN);
1185 	free(pg->states_chunk, M_NAT64LSN);
1186 	uma_zfree(nat64lsn_pg_zone, pg);
1187 	return (PG_ERROR(7));
1188 }
1189 
1190 static int
nat64lsn_alloc_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_job_item * ji)1191 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1192 {
1193 	struct nat64lsn_aliaslink *link;
1194 	struct nat64lsn_alias *alias;
1195 	int ret;
1196 
1197 	link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1198 	if (link == NULL)
1199 		return (PG_ERROR(1));
1200 
1201 	/*
1202 	 * TODO: check that we did not already allocated PG in
1203 	 *	 previous call.
1204 	 */
1205 
1206 	ret = 0;
1207 	alias = link->alias;
1208 	/* Find place in pgchunk where PG can be added */
1209 	switch (ji->proto) {
1210 	case IPPROTO_TCP:
1211 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1212 		    &alias->tcp_chunkmask, alias->tcp_pgmask,
1213 		    alias->tcp, &alias->tcp_pg, ji->proto);
1214 		break;
1215 	case IPPROTO_UDP:
1216 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1217 		    &alias->udp_chunkmask, alias->udp_pgmask,
1218 		    alias->udp, &alias->udp_pg, ji->proto);
1219 		break;
1220 	case IPPROTO_ICMP:
1221 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1222 		    &alias->icmp_chunkmask, alias->icmp_pgmask,
1223 		    alias->icmp, &alias->icmp_pg, ji->proto);
1224 		break;
1225 	default:
1226 		panic("%s: wrong proto %d", __func__, ji->proto);
1227 	}
1228 	if (ret == PG_ERROR(1)) {
1229 		/*
1230 		 * PG_ERROR(1) means that alias lacks free PGs
1231 		 * XXX: try next alias.
1232 		 */
1233 		printf("NAT64LSN: %s: failed to obtain PG\n",
1234 		    __func__);
1235 		return (ret);
1236 	}
1237 	if (ret == PG_ERROR(0)) {
1238 		ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1239 		    ji->state_hval, ji->faddr, ji->port, ji->proto);
1240 		if (ji->state == NULL)
1241 			ret = PG_ERROR(8);
1242 		else
1243 			ji->done = 1;
1244 	}
1245 	return (ret);
1246 }
1247 
1248 static void
nat64lsn_do_request(void * data)1249 nat64lsn_do_request(void *data)
1250 {
1251 	struct epoch_tracker et;
1252 	struct nat64lsn_job_head jhead;
1253 	struct nat64lsn_job_item *ji, *ji2;
1254 	struct nat64lsn_cfg *cfg;
1255 	int jcount;
1256 	uint8_t flags;
1257 
1258 	cfg = (struct nat64lsn_cfg *)data;
1259 	if (cfg->jlen == 0)
1260 		return;
1261 
1262 	CURVNET_SET(cfg->vp);
1263 	STAILQ_INIT(&jhead);
1264 
1265 	/* Grab queue */
1266 	JQUEUE_LOCK();
1267 	STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1268 	jcount = cfg->jlen;
1269 	cfg->jlen = 0;
1270 	JQUEUE_UNLOCK();
1271 
1272 	/* TODO: check if we need to resize hash */
1273 
1274 	NAT64STAT_INC(&cfg->base.stats, jcalls);
1275 	DPRINTF(DP_JQUEUE, "count=%d", jcount);
1276 
1277 	/*
1278 	 * TODO:
1279 	 * What we should do here is to build a hash
1280 	 * to ensure we don't have lots of duplicate requests.
1281 	 * Skip this for now.
1282 	 *
1283 	 * TODO: Limit per-call number of items
1284 	 */
1285 
1286 	NAT64LSN_EPOCH_ENTER(et);
1287 	STAILQ_FOREACH(ji, &jhead, entries) {
1288 		switch (ji->jtype) {
1289 		case JTYPE_NEWHOST:
1290 			if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1291 				NAT64STAT_INC(&cfg->base.stats, jhostfails);
1292 			break;
1293 		case JTYPE_NEWPORTGROUP:
1294 			if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1295 				NAT64STAT_INC(&cfg->base.stats, jportfails);
1296 			break;
1297 		default:
1298 			continue;
1299 		}
1300 		if (ji->done != 0) {
1301 			flags = ji->proto != IPPROTO_TCP ? 0 :
1302 			    convert_tcp_flags(ji->f_id._flags);
1303 			nat64lsn_translate6_internal(cfg, &ji->m,
1304 			    ji->state, flags);
1305 			NAT64STAT_INC(&cfg->base.stats, jreinjected);
1306 		}
1307 	}
1308 	NAT64LSN_EPOCH_EXIT(et);
1309 
1310 	ji = STAILQ_FIRST(&jhead);
1311 	while (ji != NULL) {
1312 		ji2 = STAILQ_NEXT(ji, entries);
1313 		/*
1314 		 * In any case we must free mbuf if
1315 		 * translator did not consumed it.
1316 		 */
1317 		m_freem(ji->m);
1318 		uma_zfree(nat64lsn_job_zone, ji);
1319 		ji = ji2;
1320 	}
1321 	CURVNET_RESTORE();
1322 }
1323 
1324 static struct nat64lsn_job_item *
nat64lsn_create_job(struct nat64lsn_cfg * cfg,int jtype)1325 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1326 {
1327 	struct nat64lsn_job_item *ji;
1328 
1329 	/*
1330 	 * Do not try to lock possibly contested mutex if we're near the
1331 	 * limit. Drop packet instead.
1332 	 */
1333 	ji = NULL;
1334 	if (cfg->jlen >= cfg->jmaxlen)
1335 		NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1336 	else {
1337 		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1338 		if (ji == NULL)
1339 			NAT64STAT_INC(&cfg->base.stats, jnomem);
1340 	}
1341 	if (ji == NULL) {
1342 		NAT64STAT_INC(&cfg->base.stats, dropped);
1343 		DPRINTF(DP_DROPS, "failed to create job");
1344 	} else {
1345 		ji->jtype = jtype;
1346 		ji->done = 0;
1347 	}
1348 	return (ji);
1349 }
1350 
1351 static void
nat64lsn_enqueue_job(struct nat64lsn_cfg * cfg,struct nat64lsn_job_item * ji)1352 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1353 {
1354 
1355 	JQUEUE_LOCK();
1356 	STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1357 	NAT64STAT_INC(&cfg->base.stats, jrequests);
1358 	cfg->jlen++;
1359 
1360 	if (callout_pending(&cfg->jcallout) == 0)
1361 		callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1362 	JQUEUE_UNLOCK();
1363 }
1364 
1365 static void
nat64lsn_job_destroy(epoch_context_t ctx)1366 nat64lsn_job_destroy(epoch_context_t ctx)
1367 {
1368 	struct nat64lsn_job_item *ji;
1369 	struct nat64lsn_host *host;
1370 	struct nat64lsn_pg *pg;
1371 	int i;
1372 
1373 	ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1374 	MPASS(ji->jtype == JTYPE_DESTROY);
1375 	while (!CK_SLIST_EMPTY(&ji->hosts)) {
1376 		host = CK_SLIST_FIRST(&ji->hosts);
1377 		CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1378 		if (host->states_count > 0) {
1379 			/*
1380 			 * XXX: The state has been created
1381 			 * during host deletion.
1382 			 */
1383 			printf("NAT64LSN: %s: destroying host with %d "
1384 			    "states\n", __func__, host->states_count);
1385 		}
1386 		nat64lsn_destroy_host(host);
1387 	}
1388 	while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1389 		pg = CK_SLIST_FIRST(&ji->portgroups);
1390 		CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1391 		for (i = 0; i < pg->chunks_count; i++) {
1392 			if (FREEMASK_BITCOUNT(pg, i) != 64) {
1393 				/*
1394 				 * XXX: The state has been created during
1395 				 * PG deletion.
1396 				 */
1397 				printf("NAT64LSN: %s: destroying PG %p "
1398 				    "with non-empty chunk %d\n", __func__,
1399 				    pg, i);
1400 			}
1401 		}
1402 		nat64lsn_destroy_pg(pg);
1403 	}
1404 	uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1405 	uma_zfree(nat64lsn_job_zone, ji);
1406 }
1407 
1408 static int
nat64lsn_request_host(struct nat64lsn_cfg * cfg,const struct ipfw_flow_id * f_id,struct mbuf ** mp,uint32_t hval,in_addr_t faddr,uint16_t port,uint8_t proto)1409 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1410     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1411     in_addr_t faddr, uint16_t port, uint8_t proto)
1412 {
1413 	struct nat64lsn_job_item *ji;
1414 
1415 	ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1416 	if (ji != NULL) {
1417 		ji->m = *mp;
1418 		ji->f_id = *f_id;
1419 		ji->faddr = faddr;
1420 		ji->port = port;
1421 		ji->proto = proto;
1422 		ji->src6_hval = hval;
1423 
1424 		nat64lsn_enqueue_job(cfg, ji);
1425 		NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1426 		*mp = NULL;
1427 	}
1428 	return (IP_FW_DENY);
1429 }
1430 
1431 static int
nat64lsn_request_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_host * host,const struct ipfw_flow_id * f_id,struct mbuf ** mp,uint32_t hval,in_addr_t faddr,uint16_t port,uint8_t proto)1432 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1433     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1434     in_addr_t faddr, uint16_t port, uint8_t proto)
1435 {
1436 	struct nat64lsn_job_item *ji;
1437 
1438 	ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1439 	if (ji != NULL) {
1440 		ji->m = *mp;
1441 		ji->f_id = *f_id;
1442 		ji->faddr = faddr;
1443 		ji->port = port;
1444 		ji->proto = proto;
1445 		ji->state_hval = hval;
1446 		ji->host = host;
1447 
1448 		nat64lsn_enqueue_job(cfg, ji);
1449 		NAT64STAT_INC(&cfg->base.stats, jportreq);
1450 		*mp = NULL;
1451 	}
1452 	return (IP_FW_DENY);
1453 }
1454 
1455 static int
nat64lsn_translate6_internal(struct nat64lsn_cfg * cfg,struct mbuf ** mp,struct nat64lsn_state * state,uint8_t flags)1456 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1457     struct nat64lsn_state *state, uint8_t flags)
1458 {
1459 	struct pfloghdr loghdr, *logdata;
1460 	int ret;
1461 	uint16_t ts;
1462 
1463 	/* Update timestamp and flags if needed */
1464 	SET_AGE(ts);
1465 	if (state->timestamp != ts)
1466 		state->timestamp = ts;
1467 	if ((state->flags & flags) != 0)
1468 		state->flags |= flags;
1469 
1470 	if (cfg->base.flags & NAT64_LOG) {
1471 		logdata = &loghdr;
1472 		nat64lsn_log(logdata, *mp, AF_INET6, state);
1473 	} else
1474 		logdata = NULL;
1475 
1476 	ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1477 	    htons(state->aport), &cfg->base, logdata);
1478 	if (ret == NAT64SKIP)
1479 		return (cfg->nomatch_verdict);
1480 	if (ret == NAT64RETURN)
1481 		*mp = NULL;
1482 	return (IP_FW_DENY);
1483 }
1484 
1485 static int
nat64lsn_translate6(struct nat64lsn_cfg * cfg,struct ipfw_flow_id * f_id,struct mbuf ** mp)1486 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1487     struct mbuf **mp)
1488 {
1489 	struct nat64lsn_state *state;
1490 	struct nat64lsn_host *host;
1491 	struct icmp6_hdr *icmp6;
1492 	uint32_t addr, hval, data[2];
1493 	int offset, proto;
1494 	uint16_t port;
1495 	uint8_t flags;
1496 
1497 	/* Check if protocol is supported */
1498 	port = f_id->src_port;
1499 	proto = f_id->proto;
1500 	switch (f_id->proto) {
1501 	case IPPROTO_ICMPV6:
1502 		/*
1503 		 * For ICMPv6 echo reply/request we use icmp6_id as
1504 		 * local port.
1505 		 */
1506 		offset = 0;
1507 		proto = nat64_getlasthdr(*mp, &offset);
1508 		if (proto < 0) {
1509 			NAT64STAT_INC(&cfg->base.stats, dropped);
1510 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
1511 			return (IP_FW_DENY);
1512 		}
1513 		if (proto == IPPROTO_ICMPV6) {
1514 			icmp6 = mtodo(*mp, offset);
1515 			if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1516 			    icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1517 				port = ntohs(icmp6->icmp6_id);
1518 		}
1519 		proto = IPPROTO_ICMP;
1520 		/* FALLTHROUGH */
1521 	case IPPROTO_TCP:
1522 	case IPPROTO_UDP:
1523 		break;
1524 	default:
1525 		NAT64STAT_INC(&cfg->base.stats, noproto);
1526 		return (cfg->nomatch_verdict);
1527 	}
1528 
1529 	/* Extract IPv4 from destination IPv6 address */
1530 	addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1531 	if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1532 		char a[INET_ADDRSTRLEN];
1533 
1534 		NAT64STAT_INC(&cfg->base.stats, dropped);
1535 		DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1536 		    inet_ntop(AF_INET, &addr, a, sizeof(a)));
1537 		return (IP_FW_DENY); /* XXX: add extra stats? */
1538 	}
1539 
1540 	/* Try to find host */
1541 	hval = HOST_HVAL(cfg, &f_id->src_ip6);
1542 	CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1543 		if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1544 			break;
1545 	}
1546 	/* We use IPv4 address in host byte order */
1547 	addr = ntohl(addr);
1548 	if (host == NULL)
1549 		return (nat64lsn_request_host(cfg, f_id, mp,
1550 		    hval, addr, port, proto));
1551 
1552 	flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1553 
1554 	data[0] = addr;
1555 	data[1] = (f_id->dst_port << 16) | port;
1556 	hval = STATE_HVAL(cfg, data);
1557 	state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1558 	    port, proto);
1559 	if (state == NULL)
1560 		return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1561 		    port, proto));
1562 	return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1563 }
1564 
1565 /*
1566  * Main dataplane entry point.
1567  */
1568 int
ipfw_nat64lsn(struct ip_fw_chain * ch,struct ip_fw_args * args,ipfw_insn * cmd,int * done)1569 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1570     ipfw_insn *cmd, int *done)
1571 {
1572 	struct nat64lsn_cfg *cfg;
1573 	ipfw_insn *icmd;
1574 	int ret;
1575 
1576 	IPFW_RLOCK_ASSERT(ch);
1577 
1578 	*done = 0;	/* continue the search in case of failure */
1579 	icmd = cmd + 1;
1580 	if (cmd->opcode != O_EXTERNAL_ACTION ||
1581 	    cmd->arg1 != V_nat64lsn_eid ||
1582 	    icmd->opcode != O_EXTERNAL_INSTANCE ||
1583 	    (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1584 		return (IP_FW_DENY);
1585 
1586 	*done = 1;	/* terminate the search */
1587 
1588 	switch (args->f_id.addr_type) {
1589 	case 4:
1590 		ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1591 		break;
1592 	case 6:
1593 		/*
1594 		 * Check that destination IPv6 address matches our prefix6.
1595 		 */
1596 		if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1597 		    memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1598 		    cfg->base.plat_plen / 8) != 0) {
1599 			ret = cfg->nomatch_verdict;
1600 			break;
1601 		}
1602 		ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1603 		break;
1604 	default:
1605 		ret = cfg->nomatch_verdict;
1606 	}
1607 
1608 	if (ret != IP_FW_PASS && args->m != NULL) {
1609 		m_freem(args->m);
1610 		args->m = NULL;
1611 	}
1612 	return (ret);
1613 }
1614 
1615 static int
nat64lsn_state_ctor(void * mem,int size,void * arg,int flags)1616 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1617 {
1618 	struct nat64lsn_states_chunk *chunk;
1619 	int i;
1620 
1621 	chunk = (struct nat64lsn_states_chunk *)mem;
1622 	for (i = 0; i < 64; i++)
1623 		chunk->state[i].flags = 0;
1624 	return (0);
1625 }
1626 
1627 void
nat64lsn_init_internal(void)1628 nat64lsn_init_internal(void)
1629 {
1630 
1631 	nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1632 	    sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1633 	    UMA_ALIGN_PTR, 0);
1634 	nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1635 	    sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1636 	    UMA_ALIGN_PTR, 0);
1637 	nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1638 	    sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1639 	    UMA_ALIGN_PTR, 0);
1640 	nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1641 	    sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1642 	    UMA_ALIGN_PTR, 0);
1643 	nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1644 	    sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1645 	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1646 	nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1647 	    sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1648 	    UMA_ALIGN_PTR, 0);
1649 	JQUEUE_LOCK_INIT();
1650 }
1651 
1652 void
nat64lsn_uninit_internal(void)1653 nat64lsn_uninit_internal(void)
1654 {
1655 
1656 	/* XXX: epoch_task drain */
1657 	JQUEUE_LOCK_DESTROY();
1658 	uma_zdestroy(nat64lsn_host_zone);
1659 	uma_zdestroy(nat64lsn_pgchunk_zone);
1660 	uma_zdestroy(nat64lsn_pg_zone);
1661 	uma_zdestroy(nat64lsn_aliaslink_zone);
1662 	uma_zdestroy(nat64lsn_state_zone);
1663 	uma_zdestroy(nat64lsn_job_zone);
1664 }
1665 
1666 void
nat64lsn_start_instance(struct nat64lsn_cfg * cfg)1667 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1668 {
1669 
1670 	CALLOUT_LOCK(cfg);
1671 	callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1672 	    nat64lsn_periodic, cfg);
1673 	CALLOUT_UNLOCK(cfg);
1674 }
1675 
1676 struct nat64lsn_cfg *
nat64lsn_init_instance(struct ip_fw_chain * ch,in_addr_t prefix,int plen)1677 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1678 {
1679 	struct nat64lsn_cfg *cfg;
1680 	struct nat64lsn_alias *alias;
1681 	int i, naddr;
1682 
1683 	cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1684 	    M_WAITOK | M_ZERO);
1685 
1686 	CFG_LOCK_INIT(cfg);
1687 	CALLOUT_LOCK_INIT(cfg);
1688 	STAILQ_INIT(&cfg->jhead);
1689 	cfg->vp = curvnet;
1690 	COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1691 
1692 	cfg->hash_seed = arc4random();
1693 	cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1694 	cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1695 	    cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1696 	for (i = 0; i < cfg->hosts_hashsize; i++)
1697 		CK_SLIST_INIT(&cfg->hosts_hash[i]);
1698 
1699 	naddr = 1 << (32 - plen);
1700 	cfg->prefix4 = prefix;
1701 	cfg->pmask4 = prefix | (naddr - 1);
1702 	cfg->plen4 = plen;
1703 	cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1704 	    M_NAT64LSN, M_WAITOK | M_ZERO);
1705 	for (i = 0; i < naddr; i++) {
1706 		alias = &cfg->aliases[i];
1707 		alias->addr = prefix + i; /* host byte order */
1708 		CK_SLIST_INIT(&alias->hosts);
1709 		ALIAS_LOCK_INIT(alias);
1710 	}
1711 
1712 	callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1713 	callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1714 
1715 	return (cfg);
1716 }
1717 
1718 static void
nat64lsn_destroy_pg(struct nat64lsn_pg * pg)1719 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1720 {
1721 	int i;
1722 
1723 	if (pg->chunks_count == 1) {
1724 		uma_zfree(nat64lsn_state_zone, pg->states);
1725 	} else {
1726 		for (i = 0; i < pg->chunks_count; i++)
1727 			uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1728 		free(pg->states_chunk, M_NAT64LSN);
1729 		free(pg->freemask_chunk, M_NAT64LSN);
1730 	}
1731 	uma_zfree(nat64lsn_pg_zone, pg);
1732 }
1733 
1734 static void
nat64lsn_destroy_alias(struct nat64lsn_cfg * cfg,struct nat64lsn_alias * alias)1735 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1736     struct nat64lsn_alias *alias)
1737 {
1738 	struct nat64lsn_pg *pg;
1739 	int i;
1740 
1741 	while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1742 		pg = CK_SLIST_FIRST(&alias->portgroups);
1743 		CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1744 		nat64lsn_destroy_pg(pg);
1745 	}
1746 	for (i = 0; i < 32; i++) {
1747 		if (ISSET32(alias->tcp_chunkmask, i))
1748 			uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1749 		if (ISSET32(alias->udp_chunkmask, i))
1750 			uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1751 		if (ISSET32(alias->icmp_chunkmask, i))
1752 			uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1753 	}
1754 	ALIAS_LOCK_DESTROY(alias);
1755 }
1756 
1757 static void
nat64lsn_destroy_host(struct nat64lsn_host * host)1758 nat64lsn_destroy_host(struct nat64lsn_host *host)
1759 {
1760 	struct nat64lsn_aliaslink *link;
1761 
1762 	while (!CK_SLIST_EMPTY(&host->aliases)) {
1763 		link = CK_SLIST_FIRST(&host->aliases);
1764 		CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1765 
1766 		ALIAS_LOCK(link->alias);
1767 		CK_SLIST_REMOVE(&link->alias->hosts, link,
1768 		    nat64lsn_aliaslink, alias_entries);
1769 		link->alias->hosts_count--;
1770 		ALIAS_UNLOCK(link->alias);
1771 
1772 		uma_zfree(nat64lsn_aliaslink_zone, link);
1773 	}
1774 	HOST_LOCK_DESTROY(host);
1775 	free(host->states_hash, M_NAT64LSN);
1776 	uma_zfree(nat64lsn_host_zone, host);
1777 }
1778 
1779 void
nat64lsn_destroy_instance(struct nat64lsn_cfg * cfg)1780 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1781 {
1782 	struct nat64lsn_host *host;
1783 	int i;
1784 
1785 	CALLOUT_LOCK(cfg);
1786 	callout_drain(&cfg->periodic);
1787 	CALLOUT_UNLOCK(cfg);
1788 	callout_drain(&cfg->jcallout);
1789 
1790 	for (i = 0; i < cfg->hosts_hashsize; i++) {
1791 		while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1792 			host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1793 			CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1794 			nat64lsn_destroy_host(host);
1795 		}
1796 	}
1797 
1798 	for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1799 		nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1800 
1801 	CALLOUT_LOCK_DESTROY(cfg);
1802 	CFG_LOCK_DESTROY(cfg);
1803 	COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1804 	free(cfg->hosts_hash, M_NAT64LSN);
1805 	free(cfg->aliases, M_NAT64LSN);
1806 	free(cfg, M_NAT64LSN);
1807 }
1808