xref: /netbsd-src/sys/net/npf/npf_tableset.c (revision a4ddc2c8fb9af816efe3b1c375a5530aef0e89e9)
1 /*	$NetBSD: npf_tableset.c,v 1.17 2013/02/09 03:35:32 rmind Exp $	*/
2 
3 /*-
4  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This material is based upon work partially supported by The
8  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * NPF tableset module.
34  *
35  * Notes
36  *
37  *	The tableset is an array of tables.  After the creation, the array
38  *	is immutable.  The caller is responsible to synchronise the access
39  *	to the tableset.  The table can either be a hash or a tree.  Its
40  *	entries are protected by a read-write lock.
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.17 2013/02/09 03:35:32 rmind Exp $");
45 
46 #include <sys/param.h>
47 #include <sys/types.h>
48 
49 #include <sys/atomic.h>
50 #include <sys/hash.h>
51 #include <sys/kmem.h>
52 #include <sys/pool.h>
53 #include <sys/queue.h>
54 #include <sys/rwlock.h>
55 #include <sys/systm.h>
56 #include <sys/types.h>
57 
58 #include "npf_impl.h"
59 
60 /*
61  * Table structures.
62  */
63 
64 typedef struct npf_tblent {
65 	union {
66 		LIST_ENTRY(npf_tblent) hashq;
67 		pt_node_t	node;
68 	} te_entry;
69 	int			te_alen;
70 	npf_addr_t		te_addr;
71 } npf_tblent_t;
72 
73 LIST_HEAD(npf_hashl, npf_tblent);
74 
75 struct npf_table {
76 	char			t_name[16];
77 	/* Lock and reference count. */
78 	krwlock_t		t_lock;
79 	u_int			t_refcnt;
80 	/* Total number of items. */
81 	u_int			t_nitems;
82 	/* Table ID. */
83 	u_int			t_id;
84 	/* The storage type can be: a) hash b) tree. */
85 	int			t_type;
86 	struct npf_hashl *	t_hashl;
87 	u_long			t_hashmask;
88 	/* Separate trees for IPv4 and IPv6. */
89 	pt_tree_t		t_tree[2];
90 };
91 
92 #define	NPF_ADDRLEN2TREE(alen)	((alen) >> 4)
93 
94 static pool_cache_t		tblent_cache	__read_mostly;
95 
96 /*
97  * npf_table_sysinit: initialise tableset structures.
98  */
99 void
100 npf_tableset_sysinit(void)
101 {
102 
103 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
104 	    0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
105 }
106 
107 void
108 npf_tableset_sysfini(void)
109 {
110 
111 	pool_cache_destroy(tblent_cache);
112 }
113 
114 npf_tableset_t *
115 npf_tableset_create(void)
116 {
117 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
118 
119 	return kmem_zalloc(sz, KM_SLEEP);
120 }
121 
122 void
123 npf_tableset_destroy(npf_tableset_t *tblset)
124 {
125 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
126 	npf_table_t *t;
127 	u_int tid;
128 
129 	/*
130 	 * Destroy all tables (no references should be held, as ruleset
131 	 * should be destroyed before).
132 	 */
133 	for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
134 		t = tblset[tid];
135 		if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
136 			npf_table_destroy(t);
137 		}
138 	}
139 	kmem_free(tblset, sz);
140 }
141 
142 /*
143  * npf_tableset_insert: insert the table into the specified tableset.
144  *
145  * => Returns 0 on success.  Fails and returns error if ID is already used.
146  */
147 int
148 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
149 {
150 	const u_int tid = t->t_id;
151 	int error;
152 
153 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
154 
155 	if (tblset[tid] == NULL) {
156 		atomic_inc_uint(&t->t_refcnt);
157 		tblset[tid] = t;
158 		error = 0;
159 	} else {
160 		error = EEXIST;
161 	}
162 	return error;
163 }
164 
165 /*
166  * npf_tableset_reload: iterate all tables and if the new table is of the
167  * same type and has no items, then we preserve the old one and its entries.
168  *
169  * => The caller is responsible for providing synchronisation.
170  */
171 void
172 npf_tableset_reload(npf_tableset_t *ntset, npf_tableset_t *otset)
173 {
174 	for (int i = 0; i < NPF_TABLE_SLOTS; i++) {
175 		npf_table_t *t = ntset[i], *ot = otset[i];
176 
177 		if (t == NULL || ot == NULL) {
178 			continue;
179 		}
180 		if (t->t_nitems || t->t_type != ot->t_type) {
181 			continue;
182 		}
183 
184 		/*
185 		 * Acquire a reference since the table has to be kept
186 		 * in the old tableset.
187 		 */
188 		atomic_inc_uint(&ot->t_refcnt);
189 		ntset[i] = ot;
190 
191 		/* Only reference, never been visible. */
192 		t->t_refcnt--;
193 		npf_table_destroy(t);
194 	}
195 }
196 
197 /*
198  * Few helper routines.
199  */
200 
201 static npf_tblent_t *
202 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
203     const int alen, struct npf_hashl **rhtbl)
204 {
205 	const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
206 	struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
207 	npf_tblent_t *ent;
208 
209 	/*
210 	 * Lookup the hash table and check for duplicates.
211 	 * Note: mask is ignored for the hash storage.
212 	 */
213 	LIST_FOREACH(ent, htbl, te_entry.hashq) {
214 		if (ent->te_alen != alen) {
215 			continue;
216 		}
217 		if (memcmp(&ent->te_addr, addr, alen) == 0) {
218 			break;
219 		}
220 	}
221 	*rhtbl = htbl;
222 	return ent;
223 }
224 
225 static void
226 table_tree_destroy(pt_tree_t *tree)
227 {
228 	npf_tblent_t *ent;
229 
230 	while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) {
231 		ptree_remove_node(tree, ent);
232 		pool_cache_put(tblent_cache, ent);
233 	}
234 }
235 
236 /*
237  * npf_table_create: create table with a specified ID.
238  */
239 npf_table_t *
240 npf_table_create(u_int tid, int type, size_t hsize)
241 {
242 	npf_table_t *t;
243 
244 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
245 
246 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
247 	switch (type) {
248 	case NPF_TABLE_TREE:
249 		ptree_init(&t->t_tree[0], &npf_table_ptree_ops,
250 		    (void *)(sizeof(struct in_addr) / sizeof(uint32_t)),
251 		    offsetof(npf_tblent_t, te_entry.node),
252 		    offsetof(npf_tblent_t, te_addr));
253 		ptree_init(&t->t_tree[1], &npf_table_ptree_ops,
254 		    (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)),
255 		    offsetof(npf_tblent_t, te_entry.node),
256 		    offsetof(npf_tblent_t, te_addr));
257 		break;
258 	case NPF_TABLE_HASH:
259 		t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
260 		if (t->t_hashl == NULL) {
261 			kmem_free(t, sizeof(npf_table_t));
262 			return NULL;
263 		}
264 		break;
265 	default:
266 		KASSERT(false);
267 	}
268 	rw_init(&t->t_lock);
269 	t->t_type = type;
270 	t->t_id = tid;
271 
272 	return t;
273 }
274 
275 /*
276  * npf_table_destroy: free all table entries and table itself.
277  */
278 void
279 npf_table_destroy(npf_table_t *t)
280 {
281 	KASSERT(t->t_refcnt == 0);
282 
283 	switch (t->t_type) {
284 	case NPF_TABLE_HASH:
285 		for (unsigned n = 0; n <= t->t_hashmask; n++) {
286 			npf_tblent_t *ent;
287 
288 			while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
289 				LIST_REMOVE(ent, te_entry.hashq);
290 				pool_cache_put(tblent_cache, ent);
291 			}
292 		}
293 		hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
294 		break;
295 	case NPF_TABLE_TREE:
296 		table_tree_destroy(&t->t_tree[0]);
297 		table_tree_destroy(&t->t_tree[1]);
298 		break;
299 	default:
300 		KASSERT(false);
301 	}
302 	rw_destroy(&t->t_lock);
303 	kmem_free(t, sizeof(npf_table_t));
304 }
305 
306 /*
307  * npf_table_check: validate ID and type.
308  */
309 int
310 npf_table_check(const npf_tableset_t *tset, u_int tid, int type)
311 {
312 
313 	if ((u_int)tid >= NPF_TABLE_SLOTS) {
314 		return EINVAL;
315 	}
316 	if (tset[tid] != NULL) {
317 		return EEXIST;
318 	}
319 	if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) {
320 		return EINVAL;
321 	}
322 	return 0;
323 }
324 
325 static int
326 table_cidr_check(const u_int aidx, const npf_addr_t *addr,
327     const npf_netmask_t mask)
328 {
329 
330 	if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
331 		return EINVAL;
332 	}
333 	if (aidx > 1) {
334 		return EINVAL;
335 	}
336 
337 	/*
338 	 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
339 	 * If it is a host - shall use NPF_NO_NETMASK.
340 	 */
341 	if (mask >= (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
342 		return EINVAL;
343 	}
344 	return 0;
345 }
346 
347 /*
348  * npf_table_insert: add an IP CIDR entry into the table.
349  */
350 int
351 npf_table_insert(npf_tableset_t *tset, u_int tid, const int alen,
352     const npf_addr_t *addr, const npf_netmask_t mask)
353 {
354 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
355 	npf_tblent_t *ent;
356 	npf_table_t *t;
357 	int error;
358 
359 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
360 		return EINVAL;
361 	}
362 
363 	error = table_cidr_check(aidx, addr, mask);
364 	if (error) {
365 		return error;
366 	}
367 	ent = pool_cache_get(tblent_cache, PR_WAITOK);
368 	memcpy(&ent->te_addr, addr, alen);
369 	ent->te_alen = alen;
370 
371 	/*
372 	 * Insert the entry.  Return an error on duplicate.
373 	 */
374 	rw_enter(&t->t_lock, RW_WRITER);
375 	switch (t->t_type) {
376 	case NPF_TABLE_HASH: {
377 		struct npf_hashl *htbl;
378 
379 		/*
380 		 * Hash tables by the concept support only IPs.
381 		 */
382 		if (mask != NPF_NO_NETMASK) {
383 			error = EINVAL;
384 			break;
385 		}
386 		if (!table_hash_lookup(t, addr, alen, &htbl)) {
387 			LIST_INSERT_HEAD(htbl, ent, te_entry.hashq);
388 			t->t_nitems++;
389 		} else {
390 			error = EEXIST;
391 		}
392 		break;
393 	}
394 	case NPF_TABLE_TREE: {
395 		pt_tree_t *tree = &t->t_tree[aidx];
396 		bool ok;
397 
398 		/*
399 		 * If no mask specified, use maximum mask.
400 		 */
401 		ok = (mask != NPF_NO_NETMASK) ?
402 		    ptree_insert_mask_node(tree, ent, mask) :
403 		    ptree_insert_node(tree, ent);
404 		if (ok) {
405 			t->t_nitems++;
406 			error = 0;
407 		} else {
408 			error = EEXIST;
409 		}
410 		break;
411 	}
412 	default:
413 		KASSERT(false);
414 	}
415 	rw_exit(&t->t_lock);
416 
417 	if (error) {
418 		pool_cache_put(tblent_cache, ent);
419 	}
420 	return error;
421 }
422 
423 /*
424  * npf_table_remove: remove the IP CIDR entry from the table.
425  */
426 int
427 npf_table_remove(npf_tableset_t *tset, u_int tid, const int alen,
428     const npf_addr_t *addr, const npf_netmask_t mask)
429 {
430 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
431 	npf_tblent_t *ent;
432 	npf_table_t *t;
433 	int error;
434 
435 	error = table_cidr_check(aidx, addr, mask);
436 	if (error) {
437 		return error;
438 	}
439 
440 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
441 		return EINVAL;
442 	}
443 
444 	rw_enter(&t->t_lock, RW_WRITER);
445 	switch (t->t_type) {
446 	case NPF_TABLE_HASH: {
447 		struct npf_hashl *htbl;
448 
449 		ent = table_hash_lookup(t, addr, alen, &htbl);
450 		if (__predict_true(ent != NULL)) {
451 			LIST_REMOVE(ent, te_entry.hashq);
452 			t->t_nitems--;
453 		}
454 		break;
455 	}
456 	case NPF_TABLE_TREE: {
457 		pt_tree_t *tree = &t->t_tree[aidx];
458 
459 		ent = ptree_find_node(tree, addr);
460 		if (__predict_true(ent != NULL)) {
461 			ptree_remove_node(tree, ent);
462 			t->t_nitems--;
463 		}
464 		break;
465 	}
466 	default:
467 		KASSERT(false);
468 		ent = NULL;
469 	}
470 	rw_exit(&t->t_lock);
471 
472 	if (ent == NULL) {
473 		return ENOENT;
474 	}
475 	pool_cache_put(tblent_cache, ent);
476 	return 0;
477 }
478 
479 /*
480  * npf_table_lookup: find the table according to ID, lookup and match
481  * the contents with the specified IP address.
482  */
483 int
484 npf_table_lookup(npf_tableset_t *tset, u_int tid,
485     const int alen, const npf_addr_t *addr)
486 {
487 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
488 	npf_tblent_t *ent;
489 	npf_table_t *t;
490 
491 	if (__predict_false(aidx > 1)) {
492 		return EINVAL;
493 	}
494 
495 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
496 		return EINVAL;
497 	}
498 
499 	rw_enter(&t->t_lock, RW_READER);
500 	switch (t->t_type) {
501 	case NPF_TABLE_HASH: {
502 		struct npf_hashl *htbl;
503 		ent = table_hash_lookup(t, addr, alen, &htbl);
504 		break;
505 	}
506 	case NPF_TABLE_TREE: {
507 		ent = ptree_find_node(&t->t_tree[aidx], addr);
508 		break;
509 	}
510 	default:
511 		KASSERT(false);
512 		ent = NULL;
513 	}
514 	rw_exit(&t->t_lock);
515 
516 	return ent ? 0 : ENOENT;
517 }
518 
519 static int
520 table_ent_copyout(npf_tblent_t *ent, npf_netmask_t mask,
521     void *ubuf, size_t len, size_t *off)
522 {
523 	void *ubufp = (uint8_t *)ubuf + *off;
524 	npf_ioctl_ent_t uent;
525 
526 	if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
527 		return ENOMEM;
528 	}
529 	uent.alen = ent->te_alen;
530 	memcpy(&uent.addr, &ent->te_addr, sizeof(npf_addr_t));
531 	uent.mask = mask;
532 
533 	return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
534 }
535 
536 static int
537 table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf,
538     size_t len, size_t *off)
539 {
540 	npf_tblent_t *ent = NULL;
541 	int error = 0;
542 
543 	while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) {
544 		pt_bitlen_t blen;
545 
546 		if (!ptree_mask_node_p(tree, ent, &blen)) {
547 			blen = maxmask;
548 		}
549 		error = table_ent_copyout(ent, blen, ubuf, len, off);
550 		if (error)
551 			break;
552 	}
553 	return error;
554 }
555 
556 /*
557  * npf_table_list: copy a list of all table entries into a userspace buffer.
558  */
559 int
560 npf_table_list(npf_tableset_t *tset, u_int tid, void *ubuf, size_t len)
561 {
562 	npf_table_t *t;
563 	size_t off = 0;
564 	int error = 0;
565 
566 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
567 		return EINVAL;
568 	}
569 
570 	rw_enter(&t->t_lock, RW_READER);
571 	switch (t->t_type) {
572 	case NPF_TABLE_HASH:
573 		for (unsigned n = 0; n <= t->t_hashmask; n++) {
574 			npf_tblent_t *ent;
575 
576 			LIST_FOREACH(ent, &t->t_hashl[n], te_entry.hashq)
577 				if ((error = table_ent_copyout(ent, 0, ubuf,
578 				    len, &off)) != 0)
579 					break;
580 		}
581 		break;
582 	case NPF_TABLE_TREE:
583 		error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off);
584 		if (error)
585 			break;
586 		error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off);
587 		break;
588 	default:
589 		KASSERT(false);
590 	}
591 	rw_exit(&t->t_lock);
592 
593 	return error;
594 }
595