xref: /netbsd-src/sys/net/npf/npf_tableset.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: npf_tableset.c,v 1.20 2013/11/22 00:25:51 rmind Exp $	*/
2 
3 /*-
4  * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This material is based upon work partially supported by The
8  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * NPF tableset module.
34  *
35  * Notes
36  *
37  *	The tableset is an array of tables.  After the creation, the array
38  *	is immutable.  The caller is responsible to synchronise the access
39  *	to the tableset.  The table can either be a hash or a tree.  Its
40  *	entries are protected by a read-write lock.
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.20 2013/11/22 00:25:51 rmind Exp $");
45 
46 #include <sys/param.h>
47 #include <sys/types.h>
48 
49 #include <sys/atomic.h>
50 #include <sys/hash.h>
51 #include <sys/kmem.h>
52 #include <sys/pool.h>
53 #include <sys/queue.h>
54 #include <sys/rwlock.h>
55 #include <sys/systm.h>
56 #include <sys/types.h>
57 
58 #include "npf_impl.h"
59 
60 typedef struct npf_tblent {
61 	union {
62 		LIST_ENTRY(npf_tblent) hashq;
63 		pt_node_t	node;
64 	} te_entry;
65 	int			te_alen;
66 	npf_addr_t		te_addr;
67 } npf_tblent_t;
68 
69 LIST_HEAD(npf_hashl, npf_tblent);
70 
71 struct npf_table {
72 	/*
73 	 * The storage type can be: a) hash b) tree.
74 	 * There are separate trees for IPv4 and IPv6.
75 	 */
76 	struct npf_hashl *	t_hashl;
77 	u_long			t_hashmask;
78 	pt_tree_t		t_tree[2];
79 
80 	/*
81 	 * Table ID, type and lock.  The ID may change during the
82 	 * config reload, it is protected by the npf_config_lock.
83 	 */
84 	int			t_type;
85 	u_int			t_id;
86 	krwlock_t		t_lock;
87 
88 	/* The number of items, reference count and table name. */
89 	u_int			t_nitems;
90 	u_int			t_refcnt;
91 	char			t_name[NPF_TABLE_MAXNAMELEN];
92 };
93 
94 struct npf_tableset {
95 	u_int			ts_nitems;
96 	npf_table_t *		ts_map[];
97 };
98 
99 #define	NPF_TABLESET_SIZE(n)	\
100     (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))
101 
102 #define	NPF_ADDRLEN2TREE(alen)	((alen) >> 4)
103 
104 static pool_cache_t		tblent_cache	__read_mostly;
105 
106 /*
107  * npf_table_sysinit: initialise tableset structures.
108  */
109 void
110 npf_tableset_sysinit(void)
111 {
112 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
113 	    0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
114 }
115 
116 void
117 npf_tableset_sysfini(void)
118 {
119 	pool_cache_destroy(tblent_cache);
120 }
121 
122 npf_tableset_t *
123 npf_tableset_create(u_int nitems)
124 {
125 	npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
126 	ts->ts_nitems = nitems;
127 	return ts;
128 }
129 
130 void
131 npf_tableset_destroy(npf_tableset_t *ts)
132 {
133 	/*
134 	 * Destroy all tables (no references should be held, since the
135 	 * ruleset should be destroyed before).
136 	 */
137 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
138 		npf_table_t *t = ts->ts_map[tid];
139 
140 		if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
141 			npf_table_destroy(t);
142 		}
143 	}
144 	kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
145 }
146 
147 /*
148  * npf_tableset_insert: insert the table into the specified tableset.
149  *
150  * => Returns 0 on success.  Fails and returns error if ID is already used.
151  */
152 int
153 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
154 {
155 	const u_int tid = t->t_id;
156 	int error;
157 
158 	KASSERT((u_int)tid < ts->ts_nitems);
159 
160 	if (ts->ts_map[tid] == NULL) {
161 		atomic_inc_uint(&t->t_refcnt);
162 		ts->ts_map[tid] = t;
163 		error = 0;
164 	} else {
165 		error = EEXIST;
166 	}
167 	return error;
168 }
169 
170 /*
171  * npf_tableset_getbyname: look for a table in the set given the name.
172  */
173 npf_table_t *
174 npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
175 {
176 	npf_table_t *t;
177 
178 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
179 		if ((t = ts->ts_map[tid]) == NULL)
180 			continue;
181 		if (strcmp(name, t->t_name) == 0)
182 			return t;
183 	}
184 	return NULL;
185 }
186 
187 npf_table_t *
188 npf_tableset_getbyid(npf_tableset_t *ts, u_int tid)
189 {
190 	if (__predict_true(tid < ts->ts_nitems)) {
191 		return ts->ts_map[tid];
192 	}
193 	return NULL;
194 }
195 
196 /*
197  * npf_tableset_reload: iterate all tables and if the new table is of the
198  * same type and has no items, then we preserve the old one and its entries.
199  *
200  * => The caller is responsible for providing synchronisation.
201  */
202 void
203 npf_tableset_reload(npf_tableset_t *nts, npf_tableset_t *ots)
204 {
205 	for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
206 		npf_table_t *t, *ot;
207 
208 		if ((t = nts->ts_map[tid]) == NULL) {
209 			continue;
210 		}
211 
212 		/* If our table has entries, just load it. */
213 		if (t->t_nitems) {
214 			continue;
215 		}
216 
217 		/* Look for a currently existing table with such name. */
218 		ot = npf_tableset_getbyname(ots, t->t_name);
219 		if (ot == NULL) {
220 			/* Not found: we have a new table. */
221 			continue;
222 		}
223 
224 		/* Found.  Did the type change? */
225 		if (t->t_type != ot->t_type) {
226 			/* Yes, load the new. */
227 			continue;
228 		}
229 
230 		/*
231 		 * Preserve the current table.  Acquire a reference since
232 		 * we are keeping it in the old table set.  Update its ID.
233 		 */
234 		atomic_inc_uint(&ot->t_refcnt);
235 		nts->ts_map[tid] = ot;
236 
237 		KASSERT(npf_config_locked_p());
238 		ot->t_id = tid;
239 
240 		/* Destroy the new table (we hold only reference). */
241 		t->t_refcnt--;
242 		npf_table_destroy(t);
243 	}
244 }
245 
246 void
247 npf_tableset_syncdict(const npf_tableset_t *ts, prop_dictionary_t ndict)
248 {
249 	prop_array_t tables = prop_array_create();
250 	const npf_table_t *t;
251 
252 	KASSERT(npf_config_locked_p());
253 
254 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
255 		if ((t = ts->ts_map[tid]) == NULL) {
256 			continue;
257 		}
258 		prop_dictionary_t tdict = prop_dictionary_create();
259 		prop_dictionary_set_cstring(tdict, "name", t->t_name);
260 		prop_dictionary_set_uint32(tdict, "type", t->t_type);
261 		prop_dictionary_set_uint32(tdict, "id", tid);
262 
263 		prop_array_add(tables, tdict);
264 		prop_object_release(tdict);
265 	}
266 	prop_dictionary_remove(ndict, "tables");
267 	prop_dictionary_set(ndict, "tables", tables);
268 	prop_object_release(tables);
269 }
270 
271 /*
272  * Few helper routines.
273  */
274 
275 static npf_tblent_t *
276 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
277     const int alen, struct npf_hashl **rhtbl)
278 {
279 	const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
280 	struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
281 	npf_tblent_t *ent;
282 
283 	/*
284 	 * Lookup the hash table and check for duplicates.
285 	 * Note: mask is ignored for the hash storage.
286 	 */
287 	LIST_FOREACH(ent, htbl, te_entry.hashq) {
288 		if (ent->te_alen != alen) {
289 			continue;
290 		}
291 		if (memcmp(&ent->te_addr, addr, alen) == 0) {
292 			break;
293 		}
294 	}
295 	*rhtbl = htbl;
296 	return ent;
297 }
298 
299 static void
300 table_hash_destroy(npf_table_t *t)
301 {
302 	for (unsigned n = 0; n <= t->t_hashmask; n++) {
303 		npf_tblent_t *ent;
304 
305 		while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
306 			LIST_REMOVE(ent, te_entry.hashq);
307 			pool_cache_put(tblent_cache, ent);
308 		}
309 	}
310 }
311 
312 static void
313 table_tree_destroy(pt_tree_t *tree)
314 {
315 	npf_tblent_t *ent;
316 
317 	while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) {
318 		ptree_remove_node(tree, ent);
319 		pool_cache_put(tblent_cache, ent);
320 	}
321 }
322 
323 /*
324  * npf_table_create: create table with a specified ID.
325  */
326 npf_table_t *
327 npf_table_create(const char *name, u_int tid, int type, size_t hsize)
328 {
329 	npf_table_t *t;
330 
331 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
332 	strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
333 
334 	switch (type) {
335 	case NPF_TABLE_TREE:
336 		ptree_init(&t->t_tree[0], &npf_table_ptree_ops,
337 		    (void *)(sizeof(struct in_addr) / sizeof(uint32_t)),
338 		    offsetof(npf_tblent_t, te_entry.node),
339 		    offsetof(npf_tblent_t, te_addr));
340 		ptree_init(&t->t_tree[1], &npf_table_ptree_ops,
341 		    (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)),
342 		    offsetof(npf_tblent_t, te_entry.node),
343 		    offsetof(npf_tblent_t, te_addr));
344 		break;
345 	case NPF_TABLE_HASH:
346 		t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
347 		if (t->t_hashl == NULL) {
348 			kmem_free(t, sizeof(npf_table_t));
349 			return NULL;
350 		}
351 		break;
352 	default:
353 		KASSERT(false);
354 	}
355 	rw_init(&t->t_lock);
356 	t->t_type = type;
357 	t->t_id = tid;
358 
359 	return t;
360 }
361 
362 /*
363  * npf_table_destroy: free all table entries and table itself.
364  */
365 void
366 npf_table_destroy(npf_table_t *t)
367 {
368 	KASSERT(t->t_refcnt == 0);
369 
370 	switch (t->t_type) {
371 	case NPF_TABLE_HASH:
372 		table_hash_destroy(t);
373 		hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
374 		break;
375 	case NPF_TABLE_TREE:
376 		table_tree_destroy(&t->t_tree[0]);
377 		table_tree_destroy(&t->t_tree[1]);
378 		break;
379 	default:
380 		KASSERT(false);
381 	}
382 	rw_destroy(&t->t_lock);
383 	kmem_free(t, sizeof(npf_table_t));
384 }
385 
386 /*
387  * npf_table_check: validate the name, ID and type.
388  */
389 int
390 npf_table_check(npf_tableset_t *ts, const char *name, u_int tid, int type)
391 {
392 	if ((u_int)tid >= ts->ts_nitems) {
393 		return EINVAL;
394 	}
395 	if (ts->ts_map[tid] != NULL) {
396 		return EEXIST;
397 	}
398 	if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) {
399 		return EINVAL;
400 	}
401 	if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
402 		return ENAMETOOLONG;
403 	}
404 	if (npf_tableset_getbyname(ts, name)) {
405 		return EEXIST;
406 	}
407 	return 0;
408 }
409 
410 static int
411 table_cidr_check(const u_int aidx, const npf_addr_t *addr,
412     const npf_netmask_t mask)
413 {
414 	if (aidx > 1) {
415 		return EINVAL;
416 	}
417 	if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
418 		return EINVAL;
419 	}
420 
421 	/*
422 	 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
423 	 * If it is a host - shall use NPF_NO_NETMASK.
424 	 */
425 	if (mask >= (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
426 		return EINVAL;
427 	}
428 	return 0;
429 }
430 
431 /*
432  * npf_table_insert: add an IP CIDR entry into the table.
433  */
434 int
435 npf_table_insert(npf_table_t *t, const int alen,
436     const npf_addr_t *addr, const npf_netmask_t mask)
437 {
438 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
439 	npf_tblent_t *ent;
440 	int error;
441 
442 	error = table_cidr_check(aidx, addr, mask);
443 	if (error) {
444 		return error;
445 	}
446 	ent = pool_cache_get(tblent_cache, PR_WAITOK);
447 	memcpy(&ent->te_addr, addr, alen);
448 	ent->te_alen = alen;
449 
450 	/*
451 	 * Insert the entry.  Return an error on duplicate.
452 	 */
453 	rw_enter(&t->t_lock, RW_WRITER);
454 	switch (t->t_type) {
455 	case NPF_TABLE_HASH: {
456 		struct npf_hashl *htbl;
457 
458 		/*
459 		 * Hash tables by the concept support only IPs.
460 		 */
461 		if (mask != NPF_NO_NETMASK) {
462 			error = EINVAL;
463 			break;
464 		}
465 		if (!table_hash_lookup(t, addr, alen, &htbl)) {
466 			LIST_INSERT_HEAD(htbl, ent, te_entry.hashq);
467 			t->t_nitems++;
468 		} else {
469 			error = EEXIST;
470 		}
471 		break;
472 	}
473 	case NPF_TABLE_TREE: {
474 		pt_tree_t *tree = &t->t_tree[aidx];
475 		bool ok;
476 
477 		/*
478 		 * If no mask specified, use maximum mask.
479 		 */
480 		ok = (mask != NPF_NO_NETMASK) ?
481 		    ptree_insert_mask_node(tree, ent, mask) :
482 		    ptree_insert_node(tree, ent);
483 		if (ok) {
484 			t->t_nitems++;
485 			error = 0;
486 		} else {
487 			error = EEXIST;
488 		}
489 		break;
490 	}
491 	default:
492 		KASSERT(false);
493 	}
494 	rw_exit(&t->t_lock);
495 
496 	if (error) {
497 		pool_cache_put(tblent_cache, ent);
498 	}
499 	return error;
500 }
501 
502 /*
503  * npf_table_remove: remove the IP CIDR entry from the table.
504  */
505 int
506 npf_table_remove(npf_table_t *t, const int alen,
507     const npf_addr_t *addr, const npf_netmask_t mask)
508 {
509 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
510 	npf_tblent_t *ent;
511 	int error;
512 
513 	error = table_cidr_check(aidx, addr, mask);
514 	if (error) {
515 		return error;
516 	}
517 
518 	rw_enter(&t->t_lock, RW_WRITER);
519 	switch (t->t_type) {
520 	case NPF_TABLE_HASH: {
521 		struct npf_hashl *htbl;
522 
523 		ent = table_hash_lookup(t, addr, alen, &htbl);
524 		if (__predict_true(ent != NULL)) {
525 			LIST_REMOVE(ent, te_entry.hashq);
526 			t->t_nitems--;
527 		}
528 		break;
529 	}
530 	case NPF_TABLE_TREE: {
531 		pt_tree_t *tree = &t->t_tree[aidx];
532 
533 		ent = ptree_find_node(tree, addr);
534 		if (__predict_true(ent != NULL)) {
535 			ptree_remove_node(tree, ent);
536 			t->t_nitems--;
537 		}
538 		break;
539 	}
540 	default:
541 		KASSERT(false);
542 		ent = NULL;
543 	}
544 	rw_exit(&t->t_lock);
545 
546 	if (ent == NULL) {
547 		return ENOENT;
548 	}
549 	pool_cache_put(tblent_cache, ent);
550 	return 0;
551 }
552 
553 /*
554  * npf_table_lookup: find the table according to ID, lookup and match
555  * the contents with the specified IP address.
556  */
557 int
558 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
559 {
560 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
561 	npf_tblent_t *ent;
562 
563 	if (__predict_false(aidx > 1)) {
564 		return EINVAL;
565 	}
566 
567 	rw_enter(&t->t_lock, RW_READER);
568 	switch (t->t_type) {
569 	case NPF_TABLE_HASH: {
570 		struct npf_hashl *htbl;
571 		ent = table_hash_lookup(t, addr, alen, &htbl);
572 		break;
573 	}
574 	case NPF_TABLE_TREE: {
575 		ent = ptree_find_node(&t->t_tree[aidx], addr);
576 		break;
577 	}
578 	default:
579 		KASSERT(false);
580 		ent = NULL;
581 	}
582 	rw_exit(&t->t_lock);
583 
584 	return ent ? 0 : ENOENT;
585 }
586 
587 static int
588 table_ent_copyout(npf_tblent_t *ent, npf_netmask_t mask,
589     void *ubuf, size_t len, size_t *off)
590 {
591 	void *ubufp = (uint8_t *)ubuf + *off;
592 	npf_ioctl_ent_t uent;
593 
594 	if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
595 		return ENOMEM;
596 	}
597 	uent.alen = ent->te_alen;
598 	memcpy(&uent.addr, &ent->te_addr, sizeof(npf_addr_t));
599 	uent.mask = mask;
600 
601 	return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
602 }
603 
604 static int
605 table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf,
606     size_t len, size_t *off)
607 {
608 	npf_tblent_t *ent = NULL;
609 	int error = 0;
610 
611 	while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) {
612 		pt_bitlen_t blen;
613 
614 		if (!ptree_mask_node_p(tree, ent, &blen)) {
615 			blen = maxmask;
616 		}
617 		error = table_ent_copyout(ent, blen, ubuf, len, off);
618 		if (error)
619 			break;
620 	}
621 	return error;
622 }
623 
624 /*
625  * npf_table_list: copy a list of all table entries into a userspace buffer.
626  */
627 int
628 npf_table_list(npf_table_t *t, void *ubuf, size_t len)
629 {
630 	size_t off = 0;
631 	int error = 0;
632 
633 	rw_enter(&t->t_lock, RW_READER);
634 	switch (t->t_type) {
635 	case NPF_TABLE_HASH:
636 		for (unsigned n = 0; n <= t->t_hashmask; n++) {
637 			npf_tblent_t *ent;
638 
639 			LIST_FOREACH(ent, &t->t_hashl[n], te_entry.hashq)
640 				if ((error = table_ent_copyout(ent, 0, ubuf,
641 				    len, &off)) != 0)
642 					break;
643 		}
644 		break;
645 	case NPF_TABLE_TREE:
646 		error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off);
647 		if (error)
648 			break;
649 		error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off);
650 		break;
651 	default:
652 		KASSERT(false);
653 	}
654 	rw_exit(&t->t_lock);
655 
656 	return error;
657 }
658 
659 /*
660  * npf_table_flush: remove all table entries.
661  */
662 int
663 npf_table_flush(npf_table_t *t)
664 {
665 	rw_enter(&t->t_lock, RW_WRITER);
666 	switch (t->t_type) {
667 	case NPF_TABLE_HASH:
668 		table_hash_destroy(t);
669 		t->t_nitems = 0;
670 		break;
671 	case NPF_TABLE_TREE:
672 		table_tree_destroy(&t->t_tree[0]);
673 		table_tree_destroy(&t->t_tree[1]);
674 		t->t_nitems = 0;
675 		break;
676 	default:
677 		KASSERT(false);
678 	}
679 	rw_exit(&t->t_lock);
680 	return 0;
681 }
682