xref: /netbsd-src/sys/net/npf/npf_tableset.c (revision 7788a0781fe6ff2cce37368b4578a7ade0850cb1)
1 /*	$NetBSD: npf_tableset.c,v 1.18 2013/05/19 20:45:34 rmind Exp $	*/
2 
3 /*-
4  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This material is based upon work partially supported by The
8  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * NPF tableset module.
34  *
35  * Notes
36  *
37  *	The tableset is an array of tables.  After the creation, the array
38  *	is immutable.  The caller is responsible to synchronise the access
39  *	to the tableset.  The table can either be a hash or a tree.  Its
40  *	entries are protected by a read-write lock.
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.18 2013/05/19 20:45:34 rmind Exp $");
45 
46 #include <sys/param.h>
47 #include <sys/types.h>
48 
49 #include <sys/atomic.h>
50 #include <sys/hash.h>
51 #include <sys/kmem.h>
52 #include <sys/pool.h>
53 #include <sys/queue.h>
54 #include <sys/rwlock.h>
55 #include <sys/systm.h>
56 #include <sys/types.h>
57 
58 #include "npf_impl.h"
59 
60 /*
61  * Table structures.
62  */
63 
64 typedef struct npf_tblent {
65 	union {
66 		LIST_ENTRY(npf_tblent) hashq;
67 		pt_node_t	node;
68 	} te_entry;
69 	int			te_alen;
70 	npf_addr_t		te_addr;
71 } npf_tblent_t;
72 
73 LIST_HEAD(npf_hashl, npf_tblent);
74 
75 struct npf_table {
76 	char			t_name[16];
77 	/* Lock and reference count. */
78 	krwlock_t		t_lock;
79 	u_int			t_refcnt;
80 	/* Total number of items. */
81 	u_int			t_nitems;
82 	/* Table ID. */
83 	u_int			t_id;
84 	/* The storage type can be: a) hash b) tree. */
85 	int			t_type;
86 	struct npf_hashl *	t_hashl;
87 	u_long			t_hashmask;
88 	/* Separate trees for IPv4 and IPv6. */
89 	pt_tree_t		t_tree[2];
90 };
91 
92 #define	NPF_ADDRLEN2TREE(alen)	((alen) >> 4)
93 
94 static pool_cache_t		tblent_cache	__read_mostly;
95 
96 /*
97  * npf_table_sysinit: initialise tableset structures.
98  */
99 void
100 npf_tableset_sysinit(void)
101 {
102 
103 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
104 	    0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
105 }
106 
107 void
108 npf_tableset_sysfini(void)
109 {
110 
111 	pool_cache_destroy(tblent_cache);
112 }
113 
114 npf_tableset_t *
115 npf_tableset_create(void)
116 {
117 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
118 
119 	return kmem_zalloc(sz, KM_SLEEP);
120 }
121 
122 void
123 npf_tableset_destroy(npf_tableset_t *tblset)
124 {
125 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
126 	npf_table_t *t;
127 	u_int tid;
128 
129 	/*
130 	 * Destroy all tables (no references should be held, as ruleset
131 	 * should be destroyed before).
132 	 */
133 	for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
134 		t = tblset[tid];
135 		if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
136 			npf_table_destroy(t);
137 		}
138 	}
139 	kmem_free(tblset, sz);
140 }
141 
142 /*
143  * npf_tableset_insert: insert the table into the specified tableset.
144  *
145  * => Returns 0 on success.  Fails and returns error if ID is already used.
146  */
147 int
148 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
149 {
150 	const u_int tid = t->t_id;
151 	int error;
152 
153 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
154 
155 	if (tblset[tid] == NULL) {
156 		atomic_inc_uint(&t->t_refcnt);
157 		tblset[tid] = t;
158 		error = 0;
159 	} else {
160 		error = EEXIST;
161 	}
162 	return error;
163 }
164 
165 /*
166  * npf_tableset_reload: iterate all tables and if the new table is of the
167  * same type and has no items, then we preserve the old one and its entries.
168  *
169  * => The caller is responsible for providing synchronisation.
170  */
171 void
172 npf_tableset_reload(npf_tableset_t *ntset, npf_tableset_t *otset)
173 {
174 	for (int i = 0; i < NPF_TABLE_SLOTS; i++) {
175 		npf_table_t *t = ntset[i], *ot = otset[i];
176 
177 		if (t == NULL || ot == NULL) {
178 			continue;
179 		}
180 		if (t->t_nitems || t->t_type != ot->t_type) {
181 			continue;
182 		}
183 
184 		/*
185 		 * Acquire a reference since the table has to be kept
186 		 * in the old tableset.
187 		 */
188 		atomic_inc_uint(&ot->t_refcnt);
189 		ntset[i] = ot;
190 
191 		/* Only reference, never been visible. */
192 		t->t_refcnt--;
193 		npf_table_destroy(t);
194 	}
195 }
196 
197 /*
198  * Few helper routines.
199  */
200 
201 static npf_tblent_t *
202 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
203     const int alen, struct npf_hashl **rhtbl)
204 {
205 	const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
206 	struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
207 	npf_tblent_t *ent;
208 
209 	/*
210 	 * Lookup the hash table and check for duplicates.
211 	 * Note: mask is ignored for the hash storage.
212 	 */
213 	LIST_FOREACH(ent, htbl, te_entry.hashq) {
214 		if (ent->te_alen != alen) {
215 			continue;
216 		}
217 		if (memcmp(&ent->te_addr, addr, alen) == 0) {
218 			break;
219 		}
220 	}
221 	*rhtbl = htbl;
222 	return ent;
223 }
224 
225 static void
226 table_hash_destroy(npf_table_t *t)
227 {
228 	for (unsigned n = 0; n <= t->t_hashmask; n++) {
229 		npf_tblent_t *ent;
230 
231 		while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
232 			LIST_REMOVE(ent, te_entry.hashq);
233 			pool_cache_put(tblent_cache, ent);
234 		}
235 	}
236 }
237 
238 static void
239 table_tree_destroy(pt_tree_t *tree)
240 {
241 	npf_tblent_t *ent;
242 
243 	while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) {
244 		ptree_remove_node(tree, ent);
245 		pool_cache_put(tblent_cache, ent);
246 	}
247 }
248 
249 /*
250  * npf_table_create: create table with a specified ID.
251  */
252 npf_table_t *
253 npf_table_create(u_int tid, int type, size_t hsize)
254 {
255 	npf_table_t *t;
256 
257 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
258 
259 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
260 	switch (type) {
261 	case NPF_TABLE_TREE:
262 		ptree_init(&t->t_tree[0], &npf_table_ptree_ops,
263 		    (void *)(sizeof(struct in_addr) / sizeof(uint32_t)),
264 		    offsetof(npf_tblent_t, te_entry.node),
265 		    offsetof(npf_tblent_t, te_addr));
266 		ptree_init(&t->t_tree[1], &npf_table_ptree_ops,
267 		    (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)),
268 		    offsetof(npf_tblent_t, te_entry.node),
269 		    offsetof(npf_tblent_t, te_addr));
270 		break;
271 	case NPF_TABLE_HASH:
272 		t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
273 		if (t->t_hashl == NULL) {
274 			kmem_free(t, sizeof(npf_table_t));
275 			return NULL;
276 		}
277 		break;
278 	default:
279 		KASSERT(false);
280 	}
281 	rw_init(&t->t_lock);
282 	t->t_type = type;
283 	t->t_id = tid;
284 
285 	return t;
286 }
287 
288 /*
289  * npf_table_destroy: free all table entries and table itself.
290  */
291 void
292 npf_table_destroy(npf_table_t *t)
293 {
294 	KASSERT(t->t_refcnt == 0);
295 
296 	switch (t->t_type) {
297 	case NPF_TABLE_HASH:
298 		table_hash_destroy(t);
299 		hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
300 		break;
301 	case NPF_TABLE_TREE:
302 		table_tree_destroy(&t->t_tree[0]);
303 		table_tree_destroy(&t->t_tree[1]);
304 		break;
305 	default:
306 		KASSERT(false);
307 	}
308 	rw_destroy(&t->t_lock);
309 	kmem_free(t, sizeof(npf_table_t));
310 }
311 
312 /*
313  * npf_table_check: validate ID and type.
314  */
315 int
316 npf_table_check(const npf_tableset_t *tset, u_int tid, int type)
317 {
318 
319 	if ((u_int)tid >= NPF_TABLE_SLOTS) {
320 		return EINVAL;
321 	}
322 	if (tset[tid] != NULL) {
323 		return EEXIST;
324 	}
325 	if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) {
326 		return EINVAL;
327 	}
328 	return 0;
329 }
330 
331 static int
332 table_cidr_check(const u_int aidx, const npf_addr_t *addr,
333     const npf_netmask_t mask)
334 {
335 
336 	if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
337 		return EINVAL;
338 	}
339 	if (aidx > 1) {
340 		return EINVAL;
341 	}
342 
343 	/*
344 	 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
345 	 * If it is a host - shall use NPF_NO_NETMASK.
346 	 */
347 	if (mask >= (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
348 		return EINVAL;
349 	}
350 	return 0;
351 }
352 
353 /*
354  * npf_table_insert: add an IP CIDR entry into the table.
355  */
356 int
357 npf_table_insert(npf_tableset_t *tset, u_int tid, const int alen,
358     const npf_addr_t *addr, const npf_netmask_t mask)
359 {
360 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
361 	npf_tblent_t *ent;
362 	npf_table_t *t;
363 	int error;
364 
365 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
366 		return EINVAL;
367 	}
368 
369 	error = table_cidr_check(aidx, addr, mask);
370 	if (error) {
371 		return error;
372 	}
373 	ent = pool_cache_get(tblent_cache, PR_WAITOK);
374 	memcpy(&ent->te_addr, addr, alen);
375 	ent->te_alen = alen;
376 
377 	/*
378 	 * Insert the entry.  Return an error on duplicate.
379 	 */
380 	rw_enter(&t->t_lock, RW_WRITER);
381 	switch (t->t_type) {
382 	case NPF_TABLE_HASH: {
383 		struct npf_hashl *htbl;
384 
385 		/*
386 		 * Hash tables by the concept support only IPs.
387 		 */
388 		if (mask != NPF_NO_NETMASK) {
389 			error = EINVAL;
390 			break;
391 		}
392 		if (!table_hash_lookup(t, addr, alen, &htbl)) {
393 			LIST_INSERT_HEAD(htbl, ent, te_entry.hashq);
394 			t->t_nitems++;
395 		} else {
396 			error = EEXIST;
397 		}
398 		break;
399 	}
400 	case NPF_TABLE_TREE: {
401 		pt_tree_t *tree = &t->t_tree[aidx];
402 		bool ok;
403 
404 		/*
405 		 * If no mask specified, use maximum mask.
406 		 */
407 		ok = (mask != NPF_NO_NETMASK) ?
408 		    ptree_insert_mask_node(tree, ent, mask) :
409 		    ptree_insert_node(tree, ent);
410 		if (ok) {
411 			t->t_nitems++;
412 			error = 0;
413 		} else {
414 			error = EEXIST;
415 		}
416 		break;
417 	}
418 	default:
419 		KASSERT(false);
420 	}
421 	rw_exit(&t->t_lock);
422 
423 	if (error) {
424 		pool_cache_put(tblent_cache, ent);
425 	}
426 	return error;
427 }
428 
429 /*
430  * npf_table_remove: remove the IP CIDR entry from the table.
431  */
432 int
433 npf_table_remove(npf_tableset_t *tset, u_int tid, const int alen,
434     const npf_addr_t *addr, const npf_netmask_t mask)
435 {
436 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
437 	npf_tblent_t *ent;
438 	npf_table_t *t;
439 	int error;
440 
441 	error = table_cidr_check(aidx, addr, mask);
442 	if (error) {
443 		return error;
444 	}
445 
446 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
447 		return EINVAL;
448 	}
449 
450 	rw_enter(&t->t_lock, RW_WRITER);
451 	switch (t->t_type) {
452 	case NPF_TABLE_HASH: {
453 		struct npf_hashl *htbl;
454 
455 		ent = table_hash_lookup(t, addr, alen, &htbl);
456 		if (__predict_true(ent != NULL)) {
457 			LIST_REMOVE(ent, te_entry.hashq);
458 			t->t_nitems--;
459 		}
460 		break;
461 	}
462 	case NPF_TABLE_TREE: {
463 		pt_tree_t *tree = &t->t_tree[aidx];
464 
465 		ent = ptree_find_node(tree, addr);
466 		if (__predict_true(ent != NULL)) {
467 			ptree_remove_node(tree, ent);
468 			t->t_nitems--;
469 		}
470 		break;
471 	}
472 	default:
473 		KASSERT(false);
474 		ent = NULL;
475 	}
476 	rw_exit(&t->t_lock);
477 
478 	if (ent == NULL) {
479 		return ENOENT;
480 	}
481 	pool_cache_put(tblent_cache, ent);
482 	return 0;
483 }
484 
485 /*
486  * npf_table_lookup: find the table according to ID, lookup and match
487  * the contents with the specified IP address.
488  */
489 int
490 npf_table_lookup(npf_tableset_t *tset, u_int tid,
491     const int alen, const npf_addr_t *addr)
492 {
493 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
494 	npf_tblent_t *ent;
495 	npf_table_t *t;
496 
497 	if (__predict_false(aidx > 1)) {
498 		return EINVAL;
499 	}
500 
501 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
502 		return EINVAL;
503 	}
504 
505 	rw_enter(&t->t_lock, RW_READER);
506 	switch (t->t_type) {
507 	case NPF_TABLE_HASH: {
508 		struct npf_hashl *htbl;
509 		ent = table_hash_lookup(t, addr, alen, &htbl);
510 		break;
511 	}
512 	case NPF_TABLE_TREE: {
513 		ent = ptree_find_node(&t->t_tree[aidx], addr);
514 		break;
515 	}
516 	default:
517 		KASSERT(false);
518 		ent = NULL;
519 	}
520 	rw_exit(&t->t_lock);
521 
522 	return ent ? 0 : ENOENT;
523 }
524 
525 static int
526 table_ent_copyout(npf_tblent_t *ent, npf_netmask_t mask,
527     void *ubuf, size_t len, size_t *off)
528 {
529 	void *ubufp = (uint8_t *)ubuf + *off;
530 	npf_ioctl_ent_t uent;
531 
532 	if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
533 		return ENOMEM;
534 	}
535 	uent.alen = ent->te_alen;
536 	memcpy(&uent.addr, &ent->te_addr, sizeof(npf_addr_t));
537 	uent.mask = mask;
538 
539 	return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
540 }
541 
542 static int
543 table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf,
544     size_t len, size_t *off)
545 {
546 	npf_tblent_t *ent = NULL;
547 	int error = 0;
548 
549 	while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) {
550 		pt_bitlen_t blen;
551 
552 		if (!ptree_mask_node_p(tree, ent, &blen)) {
553 			blen = maxmask;
554 		}
555 		error = table_ent_copyout(ent, blen, ubuf, len, off);
556 		if (error)
557 			break;
558 	}
559 	return error;
560 }
561 
562 /*
563  * npf_table_list: copy a list of all table entries into a userspace buffer.
564  */
565 int
566 npf_table_list(npf_tableset_t *tset, u_int tid, void *ubuf, size_t len)
567 {
568 	npf_table_t *t;
569 	size_t off = 0;
570 	int error = 0;
571 
572 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
573 		return EINVAL;
574 	}
575 
576 	rw_enter(&t->t_lock, RW_READER);
577 	switch (t->t_type) {
578 	case NPF_TABLE_HASH:
579 		for (unsigned n = 0; n <= t->t_hashmask; n++) {
580 			npf_tblent_t *ent;
581 
582 			LIST_FOREACH(ent, &t->t_hashl[n], te_entry.hashq)
583 				if ((error = table_ent_copyout(ent, 0, ubuf,
584 				    len, &off)) != 0)
585 					break;
586 		}
587 		break;
588 	case NPF_TABLE_TREE:
589 		error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off);
590 		if (error)
591 			break;
592 		error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off);
593 		break;
594 	default:
595 		KASSERT(false);
596 	}
597 	rw_exit(&t->t_lock);
598 
599 	return error;
600 }
601 
602 /*
603  * npf_table_flush: remove all table entries.
604  */
605 int
606 npf_table_flush(npf_tableset_t *tset, u_int tid)
607 {
608 	npf_table_t *t;
609 
610 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
611 		return EINVAL;
612 	}
613 
614 	rw_enter(&t->t_lock, RW_WRITER);
615 	switch (t->t_type) {
616 	case NPF_TABLE_HASH:
617 		table_hash_destroy(t);
618 		t->t_nitems = 0;
619 		break;
620 	case NPF_TABLE_TREE:
621 		table_tree_destroy(&t->t_tree[0]);
622 		table_tree_destroy(&t->t_tree[1]);
623 		t->t_nitems = 0;
624 		break;
625 	default:
626 		KASSERT(false);
627 	}
628 	rw_exit(&t->t_lock);
629 
630 	return 0;
631 }
632