xref: /freebsd-src/sys/netpfil/ipfw/nat64/nat64lsn.c (revision 58f351825a371d1a3dd693d6f64a1245ea851a51)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
36 #include <sys/ck.h>
37 #include <sys/epoch.h>
38 #include <sys/errno.h>
39 #include <sys/hash.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/module.h>
45 #include <sys/rmlock.h>
46 #include <sys/socket.h>
47 #include <sys/syslog.h>
48 #include <sys/sysctl.h>
49 
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/if_pflog.h>
53 #include <net/pfil.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_fw.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/ip_icmp.h>
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6_var.h>
66 #include <netinet6/ip_fw_nat64.h>
67 
68 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <netpfil/pf/pf.h>
70 
71 #include "nat64lsn.h"
72 
73 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
74 
75 #define	NAT64LSN_EPOCH_ENTER(et)  NET_EPOCH_ENTER(et)
76 #define	NAT64LSN_EPOCH_EXIT(et)   NET_EPOCH_EXIT(et)
77 #define	NAT64LSN_EPOCH_ASSERT()   NET_EPOCH_ASSERT()
78 #define	NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
79 
80 static uma_zone_t nat64lsn_host_zone;
81 static uma_zone_t nat64lsn_pgchunk_zone;
82 static uma_zone_t nat64lsn_pg_zone;
83 static uma_zone_t nat64lsn_aliaslink_zone;
84 static uma_zone_t nat64lsn_state_zone;
85 static uma_zone_t nat64lsn_job_zone;
86 
87 static void nat64lsn_periodic(void *data);
88 #define	PERIODIC_DELAY		4
89 #define	NAT64_LOOKUP(chain, cmd)	\
90 	(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
91 /*
92  * Delayed job queue, used to create new hosts
93  * and new portgroups
94  */
95 enum nat64lsn_jtype {
96 	JTYPE_NEWHOST = 1,
97 	JTYPE_NEWPORTGROUP,
98 	JTYPE_DESTROY,
99 };
100 
101 struct nat64lsn_job_item {
102 	STAILQ_ENTRY(nat64lsn_job_item)	entries;
103 	enum nat64lsn_jtype	jtype;
104 
105 	union {
106 		struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
107 			struct mbuf		*m;
108 			struct nat64lsn_host	*host;
109 			struct nat64lsn_state	*state;
110 			uint32_t		src6_hval;
111 			uint32_t		state_hval;
112 			struct ipfw_flow_id	f_id;
113 			in_addr_t		faddr;
114 			uint16_t		port;
115 			uint8_t			proto;
116 			uint8_t			done;
117 		};
118 		struct { /* used by JTYPE_DESTROY */
119 			struct nat64lsn_hosts_slist	hosts;
120 			struct nat64lsn_pg_slist	portgroups;
121 			struct nat64lsn_pgchunk		*pgchunk;
122 			struct epoch_context		epoch_ctx;
123 		};
124 	};
125 };
126 
127 static struct mtx jmtx;
128 #define	JQUEUE_LOCK_INIT()	mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
129 #define	JQUEUE_LOCK_DESTROY()	mtx_destroy(&jmtx)
130 #define	JQUEUE_LOCK()		mtx_lock(&jmtx)
131 #define	JQUEUE_UNLOCK()		mtx_unlock(&jmtx)
132 
133 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
134     struct nat64lsn_job_item *ji);
135 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
136     struct nat64lsn_job_item *ji);
137 static struct nat64lsn_job_item *nat64lsn_create_job(
138     struct nat64lsn_cfg *cfg, int jtype);
139 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
140     struct nat64lsn_job_item *ji);
141 static void nat64lsn_job_destroy(epoch_context_t ctx);
142 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
143 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
144 
145 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
146     const struct ipfw_flow_id *f_id, struct mbuf **mp);
147 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
148     struct ipfw_flow_id *f_id, struct mbuf **mp);
149 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
150     struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
151 
152 #define	NAT64_BIT_TCP_FIN	0	/* FIN was seen */
153 #define	NAT64_BIT_TCP_SYN	1	/* First syn in->out */
154 #define	NAT64_BIT_TCP_ESTAB	2	/* Packet with Ack */
155 #define	NAT64_BIT_READY_IPV4	6	/* state is ready for translate4 */
156 #define	NAT64_BIT_STALE		7	/* state is going to be expired */
157 
158 #define	NAT64_FLAG_FIN		(1 << NAT64_BIT_TCP_FIN)
159 #define	NAT64_FLAG_SYN		(1 << NAT64_BIT_TCP_SYN)
160 #define	NAT64_FLAG_ESTAB	(1 << NAT64_BIT_TCP_ESTAB)
161 #define	NAT64_FLAGS_TCP	(NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
162 
163 #define	NAT64_FLAG_READY	(1 << NAT64_BIT_READY_IPV4)
164 #define	NAT64_FLAG_STALE	(1 << NAT64_BIT_STALE)
165 
166 static inline uint8_t
167 convert_tcp_flags(uint8_t flags)
168 {
169 	uint8_t result;
170 
171 	result = flags & (TH_FIN|TH_SYN);
172 	result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
173 	result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
174 
175 	return (result);
176 }
177 
178 static void
179 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
180     struct nat64lsn_state *state)
181 {
182 
183 	memset(plog, 0, sizeof(*plog));
184 	plog->length = PFLOG_REAL_HDRLEN;
185 	plog->af = family;
186 	plog->action = PF_NAT;
187 	plog->dir = PF_IN;
188 	plog->rulenr = htonl(state->ip_src);
189 	plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
190 	    (state->proto << 8) | (state->ip_dst & 0xff));
191 	plog->ruleset[0] = '\0';
192 	strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
193 	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
194 }
195 
196 #define	HVAL(p, n, s)	jenkins_hash32((const uint32_t *)(p), (n), (s))
197 #define	HOST_HVAL(c, a)	HVAL((a),\
198     sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
199 #define	HOSTS(c, v)	((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
200 
201 #define	ALIASLINK_HVAL(c, f)	HVAL(&(f)->dst_ip6,\
202     sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
203 #define	ALIAS_BYHASH(c, v)	\
204     ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
205 static struct nat64lsn_aliaslink*
206 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
207     struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
208 {
209 
210 	/*
211 	 * We can implement some different algorithms how
212 	 * select an alias address.
213 	 * XXX: for now we use first available.
214 	 */
215 	return (CK_SLIST_FIRST(&host->aliases));
216 }
217 
218 #define	STATE_HVAL(c, d)	HVAL((d), 2, (c)->hash_seed)
219 #define	STATE_HASH(h, v)	\
220     ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
221 #define	STATES_CHUNK(p, v)	\
222     ((p)->chunks_count == 1 ? (p)->states : \
223 	((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
224 
225 #ifdef __LP64__
226 #define	FREEMASK_FFSLL(pg, faddr)		\
227     ffsll(*FREEMASK_CHUNK((pg), (faddr)))
228 #define	FREEMASK_BTR(pg, faddr, bit)	\
229     ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
230 #define	FREEMASK_BTS(pg, faddr, bit)	\
231     ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
232 #define	FREEMASK_ISSET(pg, faddr, bit)	\
233     ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
234 #define	FREEMASK_COPY(pg, n, out)	\
235     (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
236 #else
237 static inline int
238 freemask_ffsll(uint32_t *freemask)
239 {
240 	int i;
241 
242 	if ((i = ffsl(freemask[0])) != 0)
243 		return (i);
244 	if ((i = ffsl(freemask[1])) != 0)
245 		return (i + 32);
246 	return (0);
247 }
248 #define	FREEMASK_FFSLL(pg, faddr)		\
249     freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
250 #define	FREEMASK_BTR(pg, faddr, bit)	\
251     ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
252 #define	FREEMASK_BTS(pg, faddr, bit)	\
253     ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
254 #define	FREEMASK_ISSET(pg, faddr, bit)	\
255     ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
256 #define	FREEMASK_COPY(pg, n, out)	\
257     (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
258 	((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
259 #endif /* !__LP64__ */
260 
261 #define	NAT64LSN_TRY_PGCNT	32
262 static struct nat64lsn_pg*
263 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
264     struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
265     uint32_t *pgidx, in_addr_t faddr)
266 {
267 	struct nat64lsn_pg *pg, *oldpg;
268 	uint32_t idx, oldidx;
269 	int cnt;
270 
271 	cnt = 0;
272 	/* First try last used PG */
273 	oldpg = pg = ck_pr_load_ptr(pgptr);
274 	idx = oldidx = ck_pr_load_32(pgidx);
275 	/* If pgidx is out of range, reset it to the first pgchunk */
276 	if (!ISSET32(*chunkmask, idx / 32))
277 		idx = 0;
278 	do {
279 		ck_pr_fence_load();
280 		if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
281 			/*
282 			 * If last used PG has not free states,
283 			 * try to update pointer.
284 			 * NOTE: it can be already updated by jobs handler,
285 			 *	 thus we use CAS operation.
286 			 */
287 			if (cnt > 0)
288 				ck_pr_cas_ptr(pgptr, oldpg, pg);
289 			return (pg);
290 		}
291 		/* Stop if idx is out of range */
292 		if (!ISSET32(*chunkmask, idx / 32))
293 			break;
294 
295 		if (ISSET32(pgmask[idx / 32], idx % 32))
296 			pg = ck_pr_load_ptr(
297 			    &chunks[idx / 32]->pgptr[idx % 32]);
298 		else
299 			pg = NULL;
300 
301 		idx++;
302 	} while (++cnt < NAT64LSN_TRY_PGCNT);
303 
304 	/* If pgidx is out of range, reset it to the first pgchunk */
305 	if (!ISSET32(*chunkmask, idx / 32))
306 		idx = 0;
307 	ck_pr_cas_32(pgidx, oldidx, idx);
308 	return (NULL);
309 }
310 
311 static struct nat64lsn_state*
312 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
313     const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
314     uint16_t port, uint8_t proto)
315 {
316 	struct nat64lsn_aliaslink *link;
317 	struct nat64lsn_state *state;
318 	struct nat64lsn_pg *pg;
319 	int i, offset;
320 
321 	NAT64LSN_EPOCH_ASSERT();
322 
323 	/* Check that we already have state for given arguments */
324 	CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
325 		if (state->proto == proto && state->ip_dst == faddr &&
326 		    state->sport == port && state->dport == f_id->dst_port)
327 			return (state);
328 	}
329 
330 	link = nat64lsn_get_aliaslink(cfg, host, f_id);
331 	if (link == NULL)
332 		return (NULL);
333 
334 	switch (proto) {
335 	case IPPROTO_TCP:
336 		pg = nat64lsn_get_pg(
337 		    &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
338 		    link->alias->tcp, &link->alias->tcp_pg,
339 		    &link->alias->tcp_pgidx, faddr);
340 		break;
341 	case IPPROTO_UDP:
342 		pg = nat64lsn_get_pg(
343 		    &link->alias->udp_chunkmask, link->alias->udp_pgmask,
344 		    link->alias->udp, &link->alias->udp_pg,
345 		    &link->alias->udp_pgidx, faddr);
346 		break;
347 	case IPPROTO_ICMP:
348 		pg = nat64lsn_get_pg(
349 		    &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
350 		    link->alias->icmp, &link->alias->icmp_pg,
351 		    &link->alias->icmp_pgidx, faddr);
352 		break;
353 	default:
354 		panic("%s: wrong proto %d", __func__, proto);
355 	}
356 	if (pg == NULL)
357 		return (NULL);
358 
359 	/* Check that PG has some free states */
360 	state = NULL;
361 	i = FREEMASK_BITCOUNT(pg, faddr);
362 	while (i-- > 0) {
363 		offset = FREEMASK_FFSLL(pg, faddr);
364 		if (offset == 0) {
365 			/*
366 			 * We lost the race.
367 			 * No more free states in this PG.
368 			 */
369 			break;
370 		}
371 
372 		/* Lets try to atomically grab the state */
373 		if (FREEMASK_BTR(pg, faddr, offset - 1)) {
374 			state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
375 			/* Initialize */
376 			state->flags = proto != IPPROTO_TCP ? 0 :
377 			    convert_tcp_flags(f_id->_flags);
378 			state->proto = proto;
379 			state->aport = pg->base_port + offset - 1;
380 			state->dport = f_id->dst_port;
381 			state->sport = port;
382 			state->ip6_dst = f_id->dst_ip6;
383 			state->ip_dst = faddr;
384 			state->ip_src = link->alias->addr;
385 			state->hval = hval;
386 			state->host = host;
387 			SET_AGE(state->timestamp);
388 
389 			/* Insert new state into host's hash table */
390 			HOST_LOCK(host);
391 			CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
392 			    state, entries);
393 			host->states_count++;
394 			/*
395 			 * XXX: In case if host is going to be expired,
396 			 * reset NAT64LSN_DEADHOST flag.
397 			 */
398 			host->flags &= ~NAT64LSN_DEADHOST;
399 			HOST_UNLOCK(host);
400 			NAT64STAT_INC(&cfg->base.stats, screated);
401 			/* Mark the state as ready for translate4 */
402 			ck_pr_fence_store();
403 			ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
404 			break;
405 		}
406 	}
407 	return (state);
408 }
409 
410 /*
411  * Inspects icmp packets to see if the message contains different
412  * packet header so we need to alter @addr and @port.
413  */
414 static int
415 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
416     uint16_t *port)
417 {
418 	struct icmp *icmp;
419 	struct ip *ip;
420 	int off;
421 	uint8_t inner_proto;
422 
423 	ip = mtod(*mp, struct ip *); /* Outer IP header */
424 	off = (ip->ip_hl << 2) + ICMP_MINLEN;
425 	if ((*mp)->m_len < off)
426 		*mp = m_pullup(*mp, off);
427 	if (*mp == NULL)
428 		return (ENOMEM);
429 
430 	ip = mtod(*mp, struct ip *); /* Outer IP header */
431 	icmp = L3HDR(ip, struct icmp *);
432 	switch (icmp->icmp_type) {
433 	case ICMP_ECHO:
434 	case ICMP_ECHOREPLY:
435 		/* Use icmp ID as distinguisher */
436 		*port = ntohs(icmp->icmp_id);
437 		return (0);
438 	case ICMP_UNREACH:
439 	case ICMP_TIMXCEED:
440 		break;
441 	default:
442 		return (EOPNOTSUPP);
443 	}
444 	/*
445 	 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
446 	 * of ULP header.
447 	 */
448 	if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
449 		return (EINVAL);
450 	if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
451 		*mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
452 	if (*mp == NULL)
453 		return (ENOMEM);
454 	ip = mtodo(*mp, off); /* Inner IP header */
455 	inner_proto = ip->ip_p;
456 	off += ip->ip_hl << 2; /* Skip inner IP header */
457 	*addr = ntohl(ip->ip_src.s_addr);
458 	if ((*mp)->m_len < off + ICMP_MINLEN)
459 		*mp = m_pullup(*mp, off + ICMP_MINLEN);
460 	if (*mp == NULL)
461 		return (ENOMEM);
462 	switch (inner_proto) {
463 	case IPPROTO_TCP:
464 	case IPPROTO_UDP:
465 		/* Copy source port from the header */
466 		*port = ntohs(*((uint16_t *)mtodo(*mp, off)));
467 		*proto = inner_proto;
468 		return (0);
469 	case IPPROTO_ICMP:
470 		/*
471 		 * We will translate only ICMP errors for our ICMP
472 		 * echo requests.
473 		 */
474 		icmp = mtodo(*mp, off);
475 		if (icmp->icmp_type != ICMP_ECHO)
476 			return (EOPNOTSUPP);
477 		*port = ntohs(icmp->icmp_id);
478 		return (0);
479 	};
480 	return (EOPNOTSUPP);
481 }
482 
483 static struct nat64lsn_state*
484 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
485     in_addr_t faddr, uint16_t port, uint8_t proto)
486 {
487 	struct nat64lsn_state *state;
488 	struct nat64lsn_pg *pg;
489 	int chunk_idx, pg_idx, state_idx;
490 
491 	NAT64LSN_EPOCH_ASSERT();
492 
493 	if (port < NAT64_MIN_PORT)
494 		return (NULL);
495 	/*
496 	 * Alias keeps 32 pgchunks for each protocol.
497 	 * Each pgchunk has 32 pointers to portgroup.
498 	 * Each portgroup has 64 states for ports.
499 	 */
500 	port -= NAT64_MIN_PORT;
501 	chunk_idx = port / 2048;
502 
503 	port -= chunk_idx * 2048;
504 	pg_idx = port / 64;
505 	state_idx = port % 64;
506 
507 	/*
508 	 * First check in proto_chunkmask that we have allocated PG chunk.
509 	 * Then check in proto_pgmask that we have valid PG pointer.
510 	 */
511 	pg = NULL;
512 	switch (proto) {
513 	case IPPROTO_TCP:
514 		if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
515 		    ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
516 			pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
517 			break;
518 		}
519 		return (NULL);
520 	case IPPROTO_UDP:
521 		if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
522 		    ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
523 			pg = alias->udp[chunk_idx]->pgptr[pg_idx];
524 			break;
525 		}
526 		return (NULL);
527 	case IPPROTO_ICMP:
528 		if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
529 		    ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
530 			pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
531 			break;
532 		}
533 		return (NULL);
534 	default:
535 		panic("%s: wrong proto %d", __func__, proto);
536 	}
537 	if (pg == NULL)
538 		return (NULL);
539 
540 	if (FREEMASK_ISSET(pg, faddr, state_idx))
541 		return (NULL);
542 
543 	state = &STATES_CHUNK(pg, faddr)->state[state_idx];
544 	ck_pr_fence_load();
545 	if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
546 		return (state);
547 	return (NULL);
548 }
549 
550 static int
551 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
552     const struct ipfw_flow_id *f_id, struct mbuf **mp)
553 {
554 	struct pfloghdr loghdr, *logdata;
555 	struct in6_addr src6;
556 	struct nat64lsn_state *state;
557 	struct nat64lsn_alias *alias;
558 	uint32_t addr, flags;
559 	uint16_t port, ts;
560 	int ret;
561 	uint8_t proto;
562 
563 	addr = f_id->dst_ip;
564 	port = f_id->dst_port;
565 	proto = f_id->proto;
566 	if (addr < cfg->prefix4 || addr > cfg->pmask4) {
567 		NAT64STAT_INC(&cfg->base.stats, nomatch4);
568 		return (cfg->nomatch_verdict);
569 	}
570 
571 	/* Check if protocol is supported */
572 	switch (proto) {
573 	case IPPROTO_ICMP:
574 		ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
575 		if (ret != 0) {
576 			if (ret == ENOMEM) {
577 				NAT64STAT_INC(&cfg->base.stats, nomem);
578 				return (IP_FW_DENY);
579 			}
580 			NAT64STAT_INC(&cfg->base.stats, noproto);
581 			return (cfg->nomatch_verdict);
582 		}
583 		if (addr < cfg->prefix4 || addr > cfg->pmask4) {
584 			NAT64STAT_INC(&cfg->base.stats, nomatch4);
585 			return (cfg->nomatch_verdict);
586 		}
587 		/* FALLTHROUGH */
588 	case IPPROTO_TCP:
589 	case IPPROTO_UDP:
590 		break;
591 	default:
592 		NAT64STAT_INC(&cfg->base.stats, noproto);
593 		return (cfg->nomatch_verdict);
594 	}
595 
596 	alias = &ALIAS_BYHASH(cfg, addr);
597 	MPASS(addr == alias->addr);
598 
599 	/* Check that we have state for this port */
600 	state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
601 	    port, proto);
602 	if (state == NULL) {
603 		NAT64STAT_INC(&cfg->base.stats, nomatch4);
604 		return (cfg->nomatch_verdict);
605 	}
606 
607 	/* TODO: Check flags to see if we need to do some static mapping */
608 
609 	/* Update some state fields if need */
610 	SET_AGE(ts);
611 	if (f_id->proto == IPPROTO_TCP)
612 		flags = convert_tcp_flags(f_id->_flags);
613 	else
614 		flags = 0;
615 	if (state->timestamp != ts)
616 		state->timestamp = ts;
617 	if ((state->flags & flags) != flags)
618 		state->flags |= flags;
619 
620 	port = htons(state->sport);
621 	src6 = state->ip6_dst;
622 
623 	if (cfg->base.flags & NAT64_LOG) {
624 		logdata = &loghdr;
625 		nat64lsn_log(logdata, *mp, AF_INET, state);
626 	} else
627 		logdata = NULL;
628 
629 	/*
630 	 * We already have src6 with embedded address, but it is possible,
631 	 * that src_ip is different than state->ip_dst, this is why we
632 	 * do embedding again.
633 	 */
634 	nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
635 	ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
636 	    &cfg->base, logdata);
637 	if (ret == NAT64SKIP)
638 		return (cfg->nomatch_verdict);
639 	if (ret == NAT64RETURN)
640 		*mp = NULL;
641 	return (IP_FW_DENY);
642 }
643 
644 /*
645  * Check if particular state is stale and should be deleted.
646  * Return 1 if true, 0 otherwise.
647  */
648 static int
649 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
650 {
651 	int age, ttl;
652 
653 	/* State was marked as stale in previous pass. */
654 	if (ISSET32(state->flags, NAT64_BIT_STALE))
655 		return (1);
656 
657 	/* State is not yet initialized, it is going to be READY */
658 	if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
659 		return (0);
660 
661 	age = GET_AGE(state->timestamp);
662 	switch (state->proto) {
663 	case IPPROTO_TCP:
664 		if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
665 			ttl = cfg->st_close_ttl;
666 		else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
667 			ttl = cfg->st_estab_ttl;
668 		else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
669 			ttl = cfg->st_syn_ttl;
670 		else
671 			ttl = cfg->st_syn_ttl;
672 		if (age > ttl)
673 			return (1);
674 		break;
675 	case IPPROTO_UDP:
676 		if (age > cfg->st_udp_ttl)
677 			return (1);
678 		break;
679 	case IPPROTO_ICMP:
680 		if (age > cfg->st_icmp_ttl)
681 			return (1);
682 		break;
683 	}
684 	return (0);
685 }
686 
687 static int
688 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
689 {
690 	struct nat64lsn_state *state;
691 	struct nat64lsn_host *host;
692 	uint64_t freemask;
693 	int c, i, update_age;
694 
695 	update_age = 0;
696 	for (c = 0; c < pg->chunks_count; c++) {
697 		FREEMASK_COPY(pg, c, freemask);
698 		for (i = 0; i < 64; i++) {
699 			if (ISSET64(freemask, i))
700 				continue;
701 			state = &STATES_CHUNK(pg, c)->state[i];
702 			if (nat64lsn_check_state(cfg, state) == 0) {
703 				update_age = 1;
704 				continue;
705 			}
706 			/*
707 			 * Expire state:
708 			 * 1. Mark as STALE and unlink from host's hash.
709 			 * 2. Set bit in freemask.
710 			 */
711 			if (ISSET32(state->flags, NAT64_BIT_STALE)) {
712 				/*
713 				 * State was marked as STALE in previous
714 				 * pass. Now it is safe to release it.
715 				 */
716 				state->flags = 0;
717 				ck_pr_fence_store();
718 				FREEMASK_BTS(pg, c, i);
719 				NAT64STAT_INC(&cfg->base.stats, sdeleted);
720 				continue;
721 			}
722 			MPASS(state->flags & NAT64_FLAG_READY);
723 
724 			host = state->host;
725 			HOST_LOCK(host);
726 			CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
727 			    state, nat64lsn_state, entries);
728 			host->states_count--;
729 			HOST_UNLOCK(host);
730 
731 			/* Reset READY flag */
732 			ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
733 			/* And set STALE flag */
734 			ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
735 			ck_pr_fence_store();
736 			/*
737 			 * Now translate6 will not use this state, wait
738 			 * until it become safe for translate4, then mark
739 			 * state as free.
740 			 */
741 		}
742 	}
743 
744 	/*
745 	 * We have some alive states, update timestamp.
746 	 */
747 	if (update_age)
748 		SET_AGE(pg->timestamp);
749 
750 	if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
751 		return (0);
752 
753 	return (1);
754 }
755 
756 static void
757 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
758     struct nat64lsn_pg_slist *portgroups)
759 {
760 	struct nat64lsn_alias *alias;
761 	struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
762 	uint32_t *pgmask, *pgidx;
763 	int i, idx;
764 
765 	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
766 		alias = &cfg->aliases[i];
767 		CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
768 			if (nat64lsn_maintain_pg(cfg, pg) == 0)
769 				continue;
770 			/* Always keep first PG */
771 			if (pg->base_port == NAT64_MIN_PORT)
772 				continue;
773 			/*
774 			 * PG is expired, unlink it and schedule for
775 			 * deferred destroying.
776 			 */
777 			idx = (pg->base_port - NAT64_MIN_PORT) / 64;
778 			switch (pg->proto) {
779 			case IPPROTO_TCP:
780 				pgmask = alias->tcp_pgmask;
781 				pgptr = &alias->tcp_pg;
782 				pgidx = &alias->tcp_pgidx;
783 				firstpg = alias->tcp[0]->pgptr[0];
784 				break;
785 			case IPPROTO_UDP:
786 				pgmask = alias->udp_pgmask;
787 				pgptr = &alias->udp_pg;
788 				pgidx = &alias->udp_pgidx;
789 				firstpg = alias->udp[0]->pgptr[0];
790 				break;
791 			case IPPROTO_ICMP:
792 				pgmask = alias->icmp_pgmask;
793 				pgptr = &alias->icmp_pg;
794 				pgidx = &alias->icmp_pgidx;
795 				firstpg = alias->icmp[0]->pgptr[0];
796 				break;
797 			}
798 			/* Reset the corresponding bit in pgmask array. */
799 			ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
800 			ck_pr_fence_store();
801 			/* If last used PG points to this PG, reset it. */
802 			ck_pr_cas_ptr(pgptr, pg, firstpg);
803 			ck_pr_cas_32(pgidx, idx, 0);
804 			/* Unlink PG from alias's chain */
805 			ALIAS_LOCK(alias);
806 			CK_SLIST_REMOVE(&alias->portgroups, pg,
807 			    nat64lsn_pg, entries);
808 			alias->portgroups_count--;
809 			ALIAS_UNLOCK(alias);
810 			/* And link to job's chain for deferred destroying */
811 			NAT64STAT_INC(&cfg->base.stats, spgdeleted);
812 			CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
813 		}
814 	}
815 }
816 
817 static void
818 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
819     struct nat64lsn_hosts_slist *hosts)
820 {
821 	struct nat64lsn_host *host, *tmp;
822 	int i;
823 
824 	for (i = 0; i < cfg->hosts_hashsize; i++) {
825 		CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
826 		    entries, tmp) {
827 			/* Is host was marked in previous call? */
828 			if (host->flags & NAT64LSN_DEADHOST) {
829 				if (host->states_count > 0) {
830 					host->flags &= ~NAT64LSN_DEADHOST;
831 					continue;
832 				}
833 				/*
834 				 * Unlink host from hash table and schedule
835 				 * it for deferred destroying.
836 				 */
837 				CFG_LOCK(cfg);
838 				CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
839 				    nat64lsn_host, entries);
840 				cfg->hosts_count--;
841 				CFG_UNLOCK(cfg);
842 				CK_SLIST_INSERT_HEAD(hosts, host, entries);
843 				continue;
844 			}
845 			if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
846 				continue;
847 			if (host->states_count > 0)
848 				continue;
849 			/* Mark host as going to be expired in next pass */
850 			host->flags |= NAT64LSN_DEADHOST;
851 			ck_pr_fence_store();
852 		}
853 	}
854 }
855 
856 static struct nat64lsn_pgchunk*
857 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
858 {
859 #if 0
860 	struct nat64lsn_alias *alias;
861 	struct nat64lsn_pgchunk *chunk;
862 	uint32_t pgmask;
863 	int i, c;
864 
865 	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
866 		alias = &cfg->aliases[i];
867 		if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
868 			continue;
869 		/* Always keep single chunk allocated */
870 		for (c = 1; c < 32; c++) {
871 			if ((alias->tcp_chunkmask & (1 << c)) == 0)
872 				break;
873 			chunk = ck_pr_load_ptr(&alias->tcp[c]);
874 			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
875 				continue;
876 			ck_pr_btr_32(&alias->tcp_chunkmask, c);
877 			ck_pr_fence_load();
878 			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
879 				continue;
880 		}
881 	}
882 #endif
883 	return (NULL);
884 }
885 
886 #if 0
887 static void
888 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
889 {
890 	struct nat64lsn_host *h;
891 	struct nat64lsn_states_slist *hash;
892 	int i, j, hsize;
893 
894 	for (i = 0; i < cfg->hosts_hashsize; i++) {
895 		CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
896 			 if (h->states_count / 2 < h->states_hashsize ||
897 			     h->states_hashsize >= NAT64LSN_MAX_HSIZE)
898 				 continue;
899 			 hsize = h->states_hashsize * 2;
900 			 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
901 			 if (hash == NULL)
902 				 continue;
903 			 for (j = 0; j < hsize; j++)
904 				CK_SLIST_INIT(&hash[i]);
905 
906 			 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
907 		}
908 	}
909 }
910 #endif
911 
912 /*
913  * This procedure is used to perform various maintance
914  * on dynamic hash list. Currently it is called every 4 seconds.
915  */
916 static void
917 nat64lsn_periodic(void *data)
918 {
919 	struct nat64lsn_job_item *ji;
920 	struct nat64lsn_cfg *cfg;
921 
922 	cfg = (struct nat64lsn_cfg *) data;
923 	CURVNET_SET(cfg->vp);
924 	if (cfg->hosts_count > 0) {
925 		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
926 		if (ji != NULL) {
927 			ji->jtype = JTYPE_DESTROY;
928 			CK_SLIST_INIT(&ji->hosts);
929 			CK_SLIST_INIT(&ji->portgroups);
930 			nat64lsn_expire_hosts(cfg, &ji->hosts);
931 			nat64lsn_expire_portgroups(cfg, &ji->portgroups);
932 			ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
933 			NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
934 			    nat64lsn_job_destroy);
935 		} else
936 			NAT64STAT_INC(&cfg->base.stats, jnomem);
937 	}
938 	callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
939 	CURVNET_RESTORE();
940 }
941 
942 #define	ALLOC_ERROR(stage, type)	((stage) ? 10 * (type) + (stage): 0)
943 #define	HOST_ERROR(stage)		ALLOC_ERROR(stage, 1)
944 #define	PG_ERROR(stage)			ALLOC_ERROR(stage, 2)
945 static int
946 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
947 {
948 	char a[INET6_ADDRSTRLEN];
949 	struct nat64lsn_aliaslink *link;
950 	struct nat64lsn_host *host;
951 	struct nat64lsn_state *state;
952 	uint32_t hval, data[2];
953 	int i;
954 
955 	/* Check that host was not yet added. */
956 	NAT64LSN_EPOCH_ASSERT();
957 	CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
958 		if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
959 			/* The host was allocated in previous call. */
960 			ji->host = host;
961 			goto get_state;
962 		}
963 	}
964 
965 	host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
966 	if (ji->host == NULL)
967 		return (HOST_ERROR(1));
968 
969 	host->states_hashsize = NAT64LSN_HSIZE;
970 	host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
971 	    host->states_hashsize, M_NAT64LSN, M_NOWAIT);
972 	if (host->states_hash == NULL) {
973 		uma_zfree(nat64lsn_host_zone, host);
974 		return (HOST_ERROR(2));
975 	}
976 
977 	link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
978 	if (link == NULL) {
979 		free(host->states_hash, M_NAT64LSN);
980 		uma_zfree(nat64lsn_host_zone, host);
981 		return (HOST_ERROR(3));
982 	}
983 
984 	/* Initialize */
985 	HOST_LOCK_INIT(host);
986 	SET_AGE(host->timestamp);
987 	host->addr = ji->f_id.src_ip6;
988 	host->hval = ji->src6_hval;
989 	host->flags = 0;
990 	host->states_count = 0;
991 	host->states_hashsize = NAT64LSN_HSIZE;
992 	CK_SLIST_INIT(&host->aliases);
993 	for (i = 0; i < host->states_hashsize; i++)
994 		CK_SLIST_INIT(&host->states_hash[i]);
995 
996 	/* Determine alias from flow hash. */
997 	hval = ALIASLINK_HVAL(cfg, &ji->f_id);
998 	link->alias = &ALIAS_BYHASH(cfg, hval);
999 	CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1000 
1001 	ALIAS_LOCK(link->alias);
1002 	CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1003 	link->alias->hosts_count++;
1004 	ALIAS_UNLOCK(link->alias);
1005 
1006 	CFG_LOCK(cfg);
1007 	CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1008 	cfg->hosts_count++;
1009 	CFG_UNLOCK(cfg);
1010 
1011 get_state:
1012 	data[0] = ji->faddr;
1013 	data[1] = (ji->f_id.dst_port << 16) | ji->port;
1014 	ji->state_hval = hval = STATE_HVAL(cfg, data);
1015 	state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1016 	    ji->faddr, ji->port, ji->proto);
1017 	/*
1018 	 * We failed to obtain new state, used alias needs new PG.
1019 	 * XXX: or another alias should be used.
1020 	 */
1021 	if (state == NULL) {
1022 		/* Try to allocate new PG */
1023 		if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1024 			return (HOST_ERROR(4));
1025 		/* We assume that nat64lsn_alloc_pg() got state */
1026 	} else
1027 		ji->state = state;
1028 
1029 	ji->done = 1;
1030 	DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1031 	    inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1032 	return (HOST_ERROR(0));
1033 }
1034 
1035 static int
1036 nat64lsn_find_pg_place(uint32_t *data)
1037 {
1038 	int i;
1039 
1040 	for (i = 0; i < 32; i++) {
1041 		if (~data[i] == 0)
1042 			continue;
1043 		return (i * 32 + ffs(~data[i]) - 1);
1044 	}
1045 	return (-1);
1046 }
1047 
1048 static int
1049 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1050     struct nat64lsn_alias *alias, uint32_t *chunkmask,
1051     uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1052     struct nat64lsn_pg **pgptr, uint8_t proto)
1053 {
1054 	struct nat64lsn_pg *pg;
1055 	int i, pg_idx, chunk_idx;
1056 
1057 	/* Find place in pgchunk where PG can be added */
1058 	pg_idx = nat64lsn_find_pg_place(pgmask);
1059 	if (pg_idx < 0)	/* no more PGs */
1060 		return (PG_ERROR(1));
1061 	/* Check that we have allocated pgchunk for given PG index */
1062 	chunk_idx = pg_idx / 32;
1063 	if (!ISSET32(*chunkmask, chunk_idx)) {
1064 		chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1065 		    M_NOWAIT);
1066 		if (chunks[chunk_idx] == NULL)
1067 			return (PG_ERROR(2));
1068 		ck_pr_bts_32(chunkmask, chunk_idx);
1069 		ck_pr_fence_store();
1070 	}
1071 	/* Allocate PG and states chunks */
1072 	pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1073 	if (pg == NULL)
1074 		return (PG_ERROR(3));
1075 	pg->chunks_count = cfg->states_chunks;
1076 	if (pg->chunks_count > 1) {
1077 		pg->freemask_chunk = malloc(pg->chunks_count *
1078 		    sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1079 		if (pg->freemask_chunk == NULL) {
1080 			uma_zfree(nat64lsn_pg_zone, pg);
1081 			return (PG_ERROR(4));
1082 		}
1083 		pg->states_chunk = malloc(pg->chunks_count *
1084 		    sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1085 		    M_NOWAIT | M_ZERO);
1086 		if (pg->states_chunk == NULL) {
1087 			free(pg->freemask_chunk, M_NAT64LSN);
1088 			uma_zfree(nat64lsn_pg_zone, pg);
1089 			return (PG_ERROR(5));
1090 		}
1091 		for (i = 0; i < pg->chunks_count; i++) {
1092 			pg->states_chunk[i] = uma_zalloc(
1093 			    nat64lsn_state_zone, M_NOWAIT);
1094 			if (pg->states_chunk[i] == NULL)
1095 				goto states_failed;
1096 		}
1097 		memset(pg->freemask_chunk, 0xff,
1098 		    sizeof(uint64_t) * pg->chunks_count);
1099 	} else {
1100 		pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1101 		if (pg->states == NULL) {
1102 			uma_zfree(nat64lsn_pg_zone, pg);
1103 			return (PG_ERROR(6));
1104 		}
1105 		memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1106 	}
1107 
1108 	/* Initialize PG and hook it to pgchunk */
1109 	SET_AGE(pg->timestamp);
1110 	pg->proto = proto;
1111 	pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1112 	ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1113 	ck_pr_fence_store();
1114 	ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1115 	ck_pr_store_ptr(pgptr, pg);
1116 
1117 	ALIAS_LOCK(alias);
1118 	CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1119 	SET_AGE(alias->timestamp);
1120 	alias->portgroups_count++;
1121 	ALIAS_UNLOCK(alias);
1122 	NAT64STAT_INC(&cfg->base.stats, spgcreated);
1123 	return (PG_ERROR(0));
1124 
1125 states_failed:
1126 	for (i = 0; i < pg->chunks_count; i++)
1127 		uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1128 	free(pg->freemask_chunk, M_NAT64LSN);
1129 	free(pg->states_chunk, M_NAT64LSN);
1130 	uma_zfree(nat64lsn_pg_zone, pg);
1131 	return (PG_ERROR(7));
1132 }
1133 
1134 static int
1135 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1136 {
1137 	struct nat64lsn_aliaslink *link;
1138 	struct nat64lsn_alias *alias;
1139 	int ret;
1140 
1141 	link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1142 	if (link == NULL)
1143 		return (PG_ERROR(1));
1144 
1145 	/*
1146 	 * TODO: check that we did not already allocated PG in
1147 	 *	 previous call.
1148 	 */
1149 
1150 	ret = 0;
1151 	alias = link->alias;
1152 	/* Find place in pgchunk where PG can be added */
1153 	switch (ji->proto) {
1154 	case IPPROTO_TCP:
1155 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1156 		    &alias->tcp_chunkmask, alias->tcp_pgmask,
1157 		    alias->tcp, &alias->tcp_pg, ji->proto);
1158 		break;
1159 	case IPPROTO_UDP:
1160 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1161 		    &alias->udp_chunkmask, alias->udp_pgmask,
1162 		    alias->udp, &alias->udp_pg, ji->proto);
1163 		break;
1164 	case IPPROTO_ICMP:
1165 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1166 		    &alias->icmp_chunkmask, alias->icmp_pgmask,
1167 		    alias->icmp, &alias->icmp_pg, ji->proto);
1168 		break;
1169 	default:
1170 		panic("%s: wrong proto %d", __func__, ji->proto);
1171 	}
1172 	if (ret == PG_ERROR(1)) {
1173 		/*
1174 		 * PG_ERROR(1) means that alias lacks free PGs
1175 		 * XXX: try next alias.
1176 		 */
1177 		printf("NAT64LSN: %s: failed to obtain PG\n",
1178 		    __func__);
1179 		return (ret);
1180 	}
1181 	if (ret == PG_ERROR(0)) {
1182 		ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1183 		    ji->state_hval, ji->faddr, ji->port, ji->proto);
1184 		if (ji->state == NULL)
1185 			ret = PG_ERROR(8);
1186 		else
1187 			ji->done = 1;
1188 	}
1189 	return (ret);
1190 }
1191 
1192 static void
1193 nat64lsn_do_request(void *data)
1194 {
1195 	struct epoch_tracker et;
1196 	struct nat64lsn_job_head jhead;
1197 	struct nat64lsn_job_item *ji, *ji2;
1198 	struct nat64lsn_cfg *cfg;
1199 	int jcount;
1200 	uint8_t flags;
1201 
1202 	cfg = (struct nat64lsn_cfg *)data;
1203 	if (cfg->jlen == 0)
1204 		return;
1205 
1206 	CURVNET_SET(cfg->vp);
1207 	STAILQ_INIT(&jhead);
1208 
1209 	/* Grab queue */
1210 	JQUEUE_LOCK();
1211 	STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1212 	jcount = cfg->jlen;
1213 	cfg->jlen = 0;
1214 	JQUEUE_UNLOCK();
1215 
1216 	/* TODO: check if we need to resize hash */
1217 
1218 	NAT64STAT_INC(&cfg->base.stats, jcalls);
1219 	DPRINTF(DP_JQUEUE, "count=%d", jcount);
1220 
1221 	/*
1222 	 * TODO:
1223 	 * What we should do here is to build a hash
1224 	 * to ensure we don't have lots of duplicate requests.
1225 	 * Skip this for now.
1226 	 *
1227 	 * TODO: Limit per-call number of items
1228 	 */
1229 
1230 	NAT64LSN_EPOCH_ENTER(et);
1231 	STAILQ_FOREACH(ji, &jhead, entries) {
1232 		switch (ji->jtype) {
1233 		case JTYPE_NEWHOST:
1234 			if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1235 				NAT64STAT_INC(&cfg->base.stats, jhostfails);
1236 			break;
1237 		case JTYPE_NEWPORTGROUP:
1238 			if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1239 				NAT64STAT_INC(&cfg->base.stats, jportfails);
1240 			break;
1241 		default:
1242 			continue;
1243 		}
1244 		if (ji->done != 0) {
1245 			flags = ji->proto != IPPROTO_TCP ? 0 :
1246 			    convert_tcp_flags(ji->f_id._flags);
1247 			nat64lsn_translate6_internal(cfg, &ji->m,
1248 			    ji->state, flags);
1249 			NAT64STAT_INC(&cfg->base.stats, jreinjected);
1250 		}
1251 	}
1252 	NAT64LSN_EPOCH_EXIT(et);
1253 
1254 	ji = STAILQ_FIRST(&jhead);
1255 	while (ji != NULL) {
1256 		ji2 = STAILQ_NEXT(ji, entries);
1257 		/*
1258 		 * In any case we must free mbuf if
1259 		 * translator did not consumed it.
1260 		 */
1261 		m_freem(ji->m);
1262 		uma_zfree(nat64lsn_job_zone, ji);
1263 		ji = ji2;
1264 	}
1265 	CURVNET_RESTORE();
1266 }
1267 
1268 static struct nat64lsn_job_item *
1269 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1270 {
1271 	struct nat64lsn_job_item *ji;
1272 
1273 	/*
1274 	 * Do not try to lock possibly contested mutex if we're near the
1275 	 * limit. Drop packet instead.
1276 	 */
1277 	ji = NULL;
1278 	if (cfg->jlen >= cfg->jmaxlen)
1279 		NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1280 	else {
1281 		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1282 		if (ji == NULL)
1283 			NAT64STAT_INC(&cfg->base.stats, jnomem);
1284 	}
1285 	if (ji == NULL) {
1286 		NAT64STAT_INC(&cfg->base.stats, dropped);
1287 		DPRINTF(DP_DROPS, "failed to create job");
1288 	} else {
1289 		ji->jtype = jtype;
1290 		ji->done = 0;
1291 	}
1292 	return (ji);
1293 }
1294 
1295 static void
1296 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1297 {
1298 
1299 	JQUEUE_LOCK();
1300 	STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1301 	NAT64STAT_INC(&cfg->base.stats, jrequests);
1302 	cfg->jlen++;
1303 
1304 	if (callout_pending(&cfg->jcallout) == 0)
1305 		callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1306 	JQUEUE_UNLOCK();
1307 }
1308 
1309 static void
1310 nat64lsn_job_destroy(epoch_context_t ctx)
1311 {
1312 	struct nat64lsn_job_item *ji;
1313 	struct nat64lsn_host *host;
1314 	struct nat64lsn_pg *pg;
1315 	int i;
1316 
1317 	ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1318 	MPASS(ji->jtype == JTYPE_DESTROY);
1319 	while (!CK_SLIST_EMPTY(&ji->hosts)) {
1320 		host = CK_SLIST_FIRST(&ji->hosts);
1321 		CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1322 		if (host->states_count > 0) {
1323 			/*
1324 			 * XXX: The state has been created
1325 			 * during host deletion.
1326 			 */
1327 			printf("NAT64LSN: %s: destroying host with %d "
1328 			    "states\n", __func__, host->states_count);
1329 		}
1330 		nat64lsn_destroy_host(host);
1331 	}
1332 	while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1333 		pg = CK_SLIST_FIRST(&ji->portgroups);
1334 		CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1335 		for (i = 0; i < pg->chunks_count; i++) {
1336 			if (FREEMASK_BITCOUNT(pg, i) != 64) {
1337 				/*
1338 				 * XXX: The state has been created during
1339 				 * PG deletion.
1340 				 */
1341 				printf("NAT64LSN: %s: destroying PG %p "
1342 				    "with non-empty chunk %d\n", __func__,
1343 				    pg, i);
1344 			}
1345 		}
1346 		nat64lsn_destroy_pg(pg);
1347 	}
1348 	uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1349 	uma_zfree(nat64lsn_job_zone, ji);
1350 }
1351 
1352 static int
1353 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1354     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1355     in_addr_t faddr, uint16_t port, uint8_t proto)
1356 {
1357 	struct nat64lsn_job_item *ji;
1358 
1359 	ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1360 	if (ji != NULL) {
1361 		ji->m = *mp;
1362 		ji->f_id = *f_id;
1363 		ji->faddr = faddr;
1364 		ji->port = port;
1365 		ji->proto = proto;
1366 		ji->src6_hval = hval;
1367 
1368 		nat64lsn_enqueue_job(cfg, ji);
1369 		NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1370 		*mp = NULL;
1371 	}
1372 	return (IP_FW_DENY);
1373 }
1374 
1375 static int
1376 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1377     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1378     in_addr_t faddr, uint16_t port, uint8_t proto)
1379 {
1380 	struct nat64lsn_job_item *ji;
1381 
1382 	ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1383 	if (ji != NULL) {
1384 		ji->m = *mp;
1385 		ji->f_id = *f_id;
1386 		ji->faddr = faddr;
1387 		ji->port = port;
1388 		ji->proto = proto;
1389 		ji->state_hval = hval;
1390 		ji->host = host;
1391 
1392 		nat64lsn_enqueue_job(cfg, ji);
1393 		NAT64STAT_INC(&cfg->base.stats, jportreq);
1394 		*mp = NULL;
1395 	}
1396 	return (IP_FW_DENY);
1397 }
1398 
1399 static int
1400 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1401     struct nat64lsn_state *state, uint8_t flags)
1402 {
1403 	struct pfloghdr loghdr, *logdata;
1404 	int ret;
1405 	uint16_t ts;
1406 
1407 	/* Update timestamp and flags if needed */
1408 	SET_AGE(ts);
1409 	if (state->timestamp != ts)
1410 		state->timestamp = ts;
1411 	if ((state->flags & flags) != 0)
1412 		state->flags |= flags;
1413 
1414 	if (cfg->base.flags & NAT64_LOG) {
1415 		logdata = &loghdr;
1416 		nat64lsn_log(logdata, *mp, AF_INET6, state);
1417 	} else
1418 		logdata = NULL;
1419 
1420 	ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1421 	    htons(state->aport), &cfg->base, logdata);
1422 	if (ret == NAT64SKIP)
1423 		return (cfg->nomatch_verdict);
1424 	if (ret == NAT64RETURN)
1425 		*mp = NULL;
1426 	return (IP_FW_DENY);
1427 }
1428 
1429 static int
1430 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1431     struct mbuf **mp)
1432 {
1433 	struct nat64lsn_state *state;
1434 	struct nat64lsn_host *host;
1435 	struct icmp6_hdr *icmp6;
1436 	uint32_t addr, hval, data[2];
1437 	int offset, proto;
1438 	uint16_t port;
1439 	uint8_t flags;
1440 
1441 	/* Check if protocol is supported */
1442 	port = f_id->src_port;
1443 	proto = f_id->proto;
1444 	switch (f_id->proto) {
1445 	case IPPROTO_ICMPV6:
1446 		/*
1447 		 * For ICMPv6 echo reply/request we use icmp6_id as
1448 		 * local port.
1449 		 */
1450 		offset = 0;
1451 		proto = nat64_getlasthdr(*mp, &offset);
1452 		if (proto < 0) {
1453 			NAT64STAT_INC(&cfg->base.stats, dropped);
1454 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
1455 			return (IP_FW_DENY);
1456 		}
1457 		if (proto == IPPROTO_ICMPV6) {
1458 			icmp6 = mtodo(*mp, offset);
1459 			if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1460 			    icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1461 				port = ntohs(icmp6->icmp6_id);
1462 		}
1463 		proto = IPPROTO_ICMP;
1464 		/* FALLTHROUGH */
1465 	case IPPROTO_TCP:
1466 	case IPPROTO_UDP:
1467 		break;
1468 	default:
1469 		NAT64STAT_INC(&cfg->base.stats, noproto);
1470 		return (cfg->nomatch_verdict);
1471 	}
1472 
1473 	/* Extract IPv4 from destination IPv6 address */
1474 	addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1475 	if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1476 		char a[INET_ADDRSTRLEN];
1477 
1478 		NAT64STAT_INC(&cfg->base.stats, dropped);
1479 		DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1480 		    inet_ntop(AF_INET, &addr, a, sizeof(a)));
1481 		return (IP_FW_DENY); /* XXX: add extra stats? */
1482 	}
1483 
1484 	/* Try to find host */
1485 	hval = HOST_HVAL(cfg, &f_id->src_ip6);
1486 	CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1487 		if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1488 			break;
1489 	}
1490 	/* We use IPv4 address in host byte order */
1491 	addr = ntohl(addr);
1492 	if (host == NULL)
1493 		return (nat64lsn_request_host(cfg, f_id, mp,
1494 		    hval, addr, port, proto));
1495 
1496 	flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1497 
1498 	data[0] = addr;
1499 	data[1] = (f_id->dst_port << 16) | port;
1500 	hval = STATE_HVAL(cfg, data);
1501 	state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1502 	    port, proto);
1503 	if (state == NULL)
1504 		return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1505 		    port, proto));
1506 	return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1507 }
1508 
1509 /*
1510  * Main dataplane entry point.
1511  */
1512 int
1513 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1514     ipfw_insn *cmd, int *done)
1515 {
1516 	struct nat64lsn_cfg *cfg;
1517 	ipfw_insn *icmd;
1518 	int ret;
1519 
1520 	IPFW_RLOCK_ASSERT(ch);
1521 
1522 	*done = 0;	/* continue the search in case of failure */
1523 	icmd = cmd + 1;
1524 	if (cmd->opcode != O_EXTERNAL_ACTION ||
1525 	    cmd->arg1 != V_nat64lsn_eid ||
1526 	    icmd->opcode != O_EXTERNAL_INSTANCE ||
1527 	    (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1528 		return (IP_FW_DENY);
1529 
1530 	*done = 1;	/* terminate the search */
1531 
1532 	switch (args->f_id.addr_type) {
1533 	case 4:
1534 		ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1535 		break;
1536 	case 6:
1537 		/*
1538 		 * Check that destination IPv6 address matches our prefix6.
1539 		 */
1540 		if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1541 		    memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1542 		    cfg->base.plat_plen / 8) != 0) {
1543 			ret = cfg->nomatch_verdict;
1544 			break;
1545 		}
1546 		ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1547 		break;
1548 	default:
1549 		ret = cfg->nomatch_verdict;
1550 	}
1551 
1552 	if (ret != IP_FW_PASS && args->m != NULL) {
1553 		m_freem(args->m);
1554 		args->m = NULL;
1555 	}
1556 	return (ret);
1557 }
1558 
1559 static int
1560 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1561 {
1562 	struct nat64lsn_states_chunk *chunk;
1563 	int i;
1564 
1565 	chunk = (struct nat64lsn_states_chunk *)mem;
1566 	for (i = 0; i < 64; i++)
1567 		chunk->state[i].flags = 0;
1568 	return (0);
1569 }
1570 
1571 void
1572 nat64lsn_init_internal(void)
1573 {
1574 
1575 	nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1576 	    sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1577 	    UMA_ALIGN_PTR, 0);
1578 	nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1579 	    sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1580 	    UMA_ALIGN_PTR, 0);
1581 	nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1582 	    sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1583 	    UMA_ALIGN_PTR, 0);
1584 	nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1585 	    sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1586 	    UMA_ALIGN_PTR, 0);
1587 	nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1588 	    sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1589 	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1590 	nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1591 	    sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1592 	    UMA_ALIGN_PTR, 0);
1593 	JQUEUE_LOCK_INIT();
1594 }
1595 
1596 void
1597 nat64lsn_uninit_internal(void)
1598 {
1599 
1600 	/* XXX: epoch_task drain */
1601 	JQUEUE_LOCK_DESTROY();
1602 	uma_zdestroy(nat64lsn_host_zone);
1603 	uma_zdestroy(nat64lsn_pgchunk_zone);
1604 	uma_zdestroy(nat64lsn_pg_zone);
1605 	uma_zdestroy(nat64lsn_aliaslink_zone);
1606 	uma_zdestroy(nat64lsn_state_zone);
1607 	uma_zdestroy(nat64lsn_job_zone);
1608 }
1609 
1610 void
1611 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1612 {
1613 
1614 	CALLOUT_LOCK(cfg);
1615 	callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1616 	    nat64lsn_periodic, cfg);
1617 	CALLOUT_UNLOCK(cfg);
1618 }
1619 
1620 struct nat64lsn_cfg *
1621 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1622 {
1623 	struct nat64lsn_cfg *cfg;
1624 	struct nat64lsn_alias *alias;
1625 	int i, naddr;
1626 
1627 	cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1628 	    M_WAITOK | M_ZERO);
1629 
1630 	CFG_LOCK_INIT(cfg);
1631 	CALLOUT_LOCK_INIT(cfg);
1632 	STAILQ_INIT(&cfg->jhead);
1633 	cfg->vp = curvnet;
1634 	COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1635 
1636 	cfg->hash_seed = arc4random();
1637 	cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1638 	cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1639 	    cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1640 	for (i = 0; i < cfg->hosts_hashsize; i++)
1641 		CK_SLIST_INIT(&cfg->hosts_hash[i]);
1642 
1643 	naddr = 1 << (32 - plen);
1644 	cfg->prefix4 = prefix;
1645 	cfg->pmask4 = prefix | (naddr - 1);
1646 	cfg->plen4 = plen;
1647 	cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1648 	    M_NAT64LSN, M_WAITOK | M_ZERO);
1649 	for (i = 0; i < naddr; i++) {
1650 		alias = &cfg->aliases[i];
1651 		alias->addr = prefix + i; /* host byte order */
1652 		CK_SLIST_INIT(&alias->hosts);
1653 		ALIAS_LOCK_INIT(alias);
1654 	}
1655 
1656         callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1657         callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1658 
1659 	return (cfg);
1660 }
1661 
1662 static void
1663 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1664 {
1665 	int i;
1666 
1667 	if (pg->chunks_count == 1) {
1668 		uma_zfree(nat64lsn_state_zone, pg->states);
1669 	} else {
1670 		for (i = 0; i < pg->chunks_count; i++)
1671 			uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1672 		free(pg->states_chunk, M_NAT64LSN);
1673 		free(pg->freemask_chunk, M_NAT64LSN);
1674 	}
1675 	uma_zfree(nat64lsn_pg_zone, pg);
1676 }
1677 
1678 static void
1679 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1680     struct nat64lsn_alias *alias)
1681 {
1682 	struct nat64lsn_pg *pg;
1683 	int i;
1684 
1685 	while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1686 		pg = CK_SLIST_FIRST(&alias->portgroups);
1687 		CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1688 		nat64lsn_destroy_pg(pg);
1689 	}
1690 	for (i = 0; i < 32; i++) {
1691 		if (ISSET32(alias->tcp_chunkmask, i))
1692 			uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1693 		if (ISSET32(alias->udp_chunkmask, i))
1694 			uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1695 		if (ISSET32(alias->icmp_chunkmask, i))
1696 			uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1697 	}
1698 	ALIAS_LOCK_DESTROY(alias);
1699 }
1700 
1701 static void
1702 nat64lsn_destroy_host(struct nat64lsn_host *host)
1703 {
1704 	struct nat64lsn_aliaslink *link;
1705 
1706 	while (!CK_SLIST_EMPTY(&host->aliases)) {
1707 		link = CK_SLIST_FIRST(&host->aliases);
1708 		CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1709 
1710 		ALIAS_LOCK(link->alias);
1711 		CK_SLIST_REMOVE(&link->alias->hosts, link,
1712 		    nat64lsn_aliaslink, alias_entries);
1713 		link->alias->hosts_count--;
1714 		ALIAS_UNLOCK(link->alias);
1715 
1716 		uma_zfree(nat64lsn_aliaslink_zone, link);
1717 	}
1718 	HOST_LOCK_DESTROY(host);
1719 	free(host->states_hash, M_NAT64LSN);
1720 	uma_zfree(nat64lsn_host_zone, host);
1721 }
1722 
1723 void
1724 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1725 {
1726 	struct nat64lsn_host *host;
1727 	int i;
1728 
1729 	CALLOUT_LOCK(cfg);
1730 	callout_drain(&cfg->periodic);
1731 	CALLOUT_UNLOCK(cfg);
1732 	callout_drain(&cfg->jcallout);
1733 
1734 	for (i = 0; i < cfg->hosts_hashsize; i++) {
1735 		while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1736 			host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1737 			CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1738 			nat64lsn_destroy_host(host);
1739 		}
1740 	}
1741 
1742 	for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1743 		nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1744 
1745 	CALLOUT_LOCK_DESTROY(cfg);
1746 	CFG_LOCK_DESTROY(cfg);
1747 	COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1748 	free(cfg->hosts_hash, M_NAT64LSN);
1749 	free(cfg->aliases, M_NAT64LSN);
1750 	free(cfg, M_NAT64LSN);
1751 }
1752