xref: /netbsd-src/sys/net/npf/npf_tableset.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: npf_tableset.c,v 1.1 2010/08/22 18:56:23 rmind Exp $	*/
2 
3 /*-
4  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This material is based upon work partially supported by The
8  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * NPF table module.
34  *
35  *	table_lock ->
36  *		npf_table_t::t_lock
37  *
38  * TODO:
39  * - Currently, code is modeled to handle IPv4 CIDR blocks.
40  * - Dynamic hash growing/shrinking (i.e. re-hash functionality), maybe?
41  * - Dynamic array resize.
42  */
43 
44 #ifdef _KERNEL
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.1 2010/08/22 18:56:23 rmind Exp $");
47 #endif
48 
49 #include <sys/param.h>
50 #include <sys/kernel.h>
51 
52 #include <sys/atomic.h>
53 #include <sys/hash.h>
54 #include <sys/kmem.h>
55 #include <sys/pool.h>
56 #include <sys/queue.h>
57 #include <sys/rwlock.h>
58 #include <sys/systm.h>
59 #include <sys/types.h>
60 
61 #include "npf_impl.h"
62 
63 /* Table entry structure. */
64 struct npf_tblent {
65 	/* IPv4 CIDR block. */
66 	in_addr_t			te_addr;
67 	in_addr_t			te_mask;
68 	union {
69 		LIST_ENTRY(npf_tblent)	hashq;
70 		struct rb_node		rbnode;
71 	} te_entry;
72 };
73 
74 /* Return pointer to npf_tblent_t from RB-tree node. (XXX fix rb-tree) */
75 #define	NPF_RBN2TBLENT(n)		\
76     (npf_tblent_t *)((uintptr_t)n - offsetof(npf_tblent_t, te_entry.rbnode))
77 
78 LIST_HEAD(npf_hashl, npf_tblent);
79 
80 /* Table structure. */
81 struct npf_table {
82 	char				t_name[16];
83 	/* Lock and reference count. */
84 	krwlock_t			t_lock;
85 	u_int				t_refcnt;
86 	/* Table ID. */
87 	u_int				t_id;
88 	/* The storage type can be: 1. Hash 2. RB-tree. */
89 	u_int				t_type;
90 	struct npf_hashl *		t_hashl;
91 	u_long				t_hashmask;
92 	struct rb_tree			t_rbtree;
93 };
94 
95 /* Global table array and its lock. */
96 static npf_tableset_t *		table_array;
97 static krwlock_t		table_lock;
98 static pool_cache_t		tblent_cache;
99 
100 /*
101  * npf_table_sysinit: initialise tableset structures.
102  */
103 int
104 npf_tableset_sysinit(void)
105 {
106 
107 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
108 	    0, 0, "npftenpl", NULL, IPL_NONE, NULL, NULL, NULL);
109 	if (tblent_cache == NULL) {
110 		return ENOMEM;
111 	}
112 	table_array = npf_tableset_create();
113 	if (table_array == NULL) {
114 		pool_cache_destroy(tblent_cache);
115 		return ENOMEM;
116 	}
117 	rw_init(&table_lock);
118 	return 0;
119 }
120 
121 void
122 npf_tableset_sysfini(void)
123 {
124 
125 	npf_tableset_destroy(table_array);
126 	pool_cache_destroy(tblent_cache);
127 	rw_destroy(&table_lock);
128 }
129 
130 npf_tableset_t *
131 npf_tableset_create(void)
132 {
133 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
134 
135 	return kmem_zalloc(sz, KM_SLEEP);
136 }
137 
138 void
139 npf_tableset_destroy(npf_tableset_t *tblset)
140 {
141 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
142 	npf_table_t *t;
143 	u_int tid;
144 
145 	/*
146 	 * Destroy all tables (no references should be held, as ruleset
147 	 * should be destroyed before).
148 	 */
149 	for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
150 		t = tblset[tid];
151 		if (t != NULL) {
152 			npf_table_destroy(t);
153 		}
154 	}
155 	kmem_free(tblset, sz);
156 }
157 
158 /*
159  * npf_tableset_insert: insert the table into the specified tableset.
160  *
161  * => Returns 0 on success, fails and returns errno if ID is already used.
162  */
163 int
164 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
165 {
166 	const u_int tid = t->t_id;
167 	int error;
168 
169 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
170 
171 	if (tblset[tid] == NULL) {
172 		tblset[tid] = t;
173 		error = 0;
174 	} else {
175 		error = EEXIST;
176 	}
177 	return error;
178 }
179 
180 /*
181  * npf_tableset_reload: replace old tableset array with a new one.
182  *
183  * => Called from npf_ruleset_reload() with a global ruleset lock held.
184  * => Returns pointer to the old tableset, caller will destroy it.
185  */
186 npf_tableset_t *
187 npf_tableset_reload(npf_tableset_t *tblset)
188 {
189 	npf_tableset_t *oldtblset;
190 
191 	rw_enter(&table_lock, RW_WRITER);
192 	oldtblset = table_array;
193 	table_array = tblset;
194 	rw_exit(&table_lock);
195 
196 	return oldtblset;
197 }
198 
199 /*
200  * Red-black tree storage.
201  */
202 
203 static signed int
204 table_rbtree_cmp_nodes(const struct rb_node *n1, const struct rb_node *n2)
205 {
206 	const npf_tblent_t *te1 = NPF_RBN2TBLENT(n1);
207 	const npf_tblent_t *te2 = NPF_RBN2TBLENT(n2);
208 	const in_addr_t x = te1->te_addr & te1->te_mask;
209 	const in_addr_t y = te2->te_addr & te2->te_mask;
210 
211 	if (x < y)
212 		return 1;
213 	if (x > y)
214 		return -1;
215 	return 0;
216 }
217 
218 static signed int
219 table_rbtree_cmp_key(const struct rb_node *n1, const void *key)
220 {
221 	const npf_tblent_t *te = NPF_RBN2TBLENT(n1);
222 	const in_addr_t x = te->te_addr & te->te_mask;
223 	const in_addr_t y = *(const in_addr_t *)key;
224 
225 	if (x < y)
226 		return 1;
227 	if (x > y)
228 		return -1;
229 	return 0;
230 }
231 
232 static const struct rb_tree_ops table_rbtree_ops = {
233 	.rbto_compare_nodes = table_rbtree_cmp_nodes,
234 	.rbto_compare_key = table_rbtree_cmp_key
235 };
236 
237 /*
238  * Hash helper routine.
239  */
240 
241 static inline struct npf_hashl *
242 table_hash_bucket(npf_table_t *t, void *buf, size_t sz)
243 {
244 	const uint32_t hidx = hash32_buf(buf, sz, HASH32_BUF_INIT);
245 
246 	return &t->t_hashl[hidx & t->t_hashmask];
247 }
248 
249 /*
250  * npf_table_create: create table with a specified ID.
251  */
252 npf_table_t *
253 npf_table_create(u_int tid, int type, size_t hsize)
254 {
255 	npf_table_t *t;
256 
257 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
258 
259 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
260 	switch (type) {
261 	case NPF_TABLE_RBTREE:
262 		rb_tree_init(&t->t_rbtree, &table_rbtree_ops);
263 		break;
264 	case NPF_TABLE_HASH:
265 		t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
266 		if (t->t_hashl == NULL) {
267 			kmem_free(t, sizeof(npf_table_t));
268 			return NULL;
269 		}
270 		break;
271 	default:
272 		KASSERT(false);
273 	}
274 	rw_init(&t->t_lock);
275 	t->t_type = type;
276 	t->t_refcnt = 1;
277 	t->t_id = tid;
278 	return t;
279 }
280 
281 /*
282  * npf_table_destroy: free all table entries and table itself.
283  */
284 void
285 npf_table_destroy(npf_table_t *t)
286 {
287 	npf_tblent_t *e;
288 	struct rb_node *nd;
289 	u_int n;
290 
291 	switch (t->t_type) {
292 	case NPF_TABLE_HASH:
293 		for (n = 0; n <= t->t_hashmask; n++) {
294 			while ((e = LIST_FIRST(&t->t_hashl[n])) != NULL) {
295 				LIST_REMOVE(e, te_entry.hashq);
296 				pool_cache_put(tblent_cache, e);
297 			}
298 		}
299 		hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
300 		break;
301 	case NPF_TABLE_RBTREE:
302 		while ((nd = rb_tree_iterate(&t->t_rbtree, NULL,
303 		    RB_DIR_RIGHT)) != NULL) {
304 			e = NPF_RBN2TBLENT(nd);
305 			rb_tree_remove_node(&t->t_rbtree, &e->te_entry.rbnode);
306 			pool_cache_put(tblent_cache, e);
307 		}
308 		break;
309 	default:
310 		KASSERT(false);
311 	}
312 	rw_destroy(&t->t_lock);
313 	kmem_free(t, sizeof(npf_table_t));
314 }
315 
316 /*
317  * npf_table_ref: holds the reference on table.
318  *
319  * => Table must be locked.
320  */
321 void
322 npf_table_ref(npf_table_t *t)
323 {
324 
325 	KASSERT(rw_lock_held(&t->t_lock));
326 	atomic_inc_uint(&t->t_refcnt);
327 }
328 
329 /*
330  * npf_table_unref: drop reference from the table and destroy the table if
331  * it is the last reference.
332  */
333 void
334 npf_table_unref(npf_table_t *t)
335 {
336 
337 	if (atomic_dec_uint_nv(&t->t_refcnt) != 0) {
338 		return;
339 	}
340 	npf_table_destroy(t);
341 }
342 
343 /*
344  * npf_table_get: find the table according to ID and "get it" by locking it.
345  */
346 npf_table_t *
347 npf_table_get(npf_tableset_t *tset, u_int tid)
348 {
349 	npf_table_t *t;
350 
351 	if ((u_int)tid >= NPF_TABLE_SLOTS) {
352 		return NULL;
353 	}
354 	if (tset) {
355 		t = tset[tid];
356 		if (t != NULL) {
357 			rw_enter(&t->t_lock, RW_READER);
358 		}
359 		return t;
360 	}
361 	rw_enter(&table_lock, RW_READER);
362 	t = table_array[tid];
363 	if (t != NULL) {
364 		rw_enter(&t->t_lock, RW_READER);
365 	}
366 	rw_exit(&table_lock);
367 	return t;
368 }
369 
370 /*
371  * npf_table_put: "put table back" by unlocking it.
372  */
373 void
374 npf_table_put(npf_table_t *t)
375 {
376 
377 	rw_exit(&t->t_lock);
378 }
379 
380 /*
381  * npf_table_check: validate ID and type.
382  * */
383 int
384 npf_table_check(npf_tableset_t *tset, u_int tid, int type)
385 {
386 
387 	if ((u_int)tid >= NPF_TABLE_SLOTS) {
388 		return EINVAL;
389 	}
390 	if (tset[tid] != NULL) {
391 		return EEXIST;
392 	}
393 	if (type != NPF_TABLE_RBTREE && type != NPF_TABLE_HASH) {
394 		return EINVAL;
395 	}
396 	return 0;
397 }
398 
399 /*
400  * npf_table_add_v4cidr: add an IPv4 CIDR into the table.
401  */
402 int
403 npf_table_add_v4cidr(npf_tableset_t *tset, u_int tid,
404     in_addr_t addr, in_addr_t mask)
405 {
406 	struct npf_hashl *htbl;
407 	npf_tblent_t *e, *it;
408 	npf_table_t *t;
409 	in_addr_t val;
410 	int error = 0;
411 
412 	/* Allocate and setup entry. */
413 	e = pool_cache_get(tblent_cache, PR_WAITOK);
414 	if (e == NULL) {
415 		return ENOMEM;
416 	}
417 	e->te_addr = addr;
418 	e->te_mask = mask;
419 
420 	/* Locks the table. */
421 	t = npf_table_get(tset, tid);
422 	if (__predict_false(t == NULL)) {
423 		pool_cache_put(tblent_cache, e);
424 		return EINVAL;
425 	}
426 	switch (t->t_type) {
427 	case NPF_TABLE_HASH:
428 		/* Generate hash value from: address & mask. */
429 		val = addr & mask;
430 		htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
431 		/* Lookup to check for duplicates. */
432 		LIST_FOREACH(it, htbl, te_entry.hashq) {
433 			if (it->te_addr == addr && it->te_mask == mask)
434 				break;
435 		}
436 		/* If no duplicate - insert entry. */
437 		if (__predict_true(it == NULL)) {
438 			LIST_INSERT_HEAD(htbl, e, te_entry.hashq);
439 		} else {
440 			error = EEXIST;
441 		}
442 		break;
443 	case NPF_TABLE_RBTREE:
444 		/* Insert entry.  Returns false, if duplicate. */
445 		if (!rb_tree_insert_node(&t->t_rbtree, &e->te_entry.rbnode)) {
446 			error = EEXIST;
447 		}
448 		break;
449 	default:
450 		KASSERT(false);
451 	}
452 	npf_table_put(t);
453 
454 	if (__predict_false(error)) {
455 		pool_cache_put(tblent_cache, e);
456 	}
457 	return error;
458 }
459 
460 /*
461  * npf_table_rem_v4cidr: remove an IPv4 CIDR from the table.
462  */
463 int
464 npf_table_rem_v4cidr(npf_tableset_t *tset, u_int tid,
465     in_addr_t addr, in_addr_t mask)
466 {
467 	struct npf_hashl *htbl;
468 	struct rb_node *nd;
469 	npf_tblent_t *e;
470 	npf_table_t *t;
471 	in_addr_t val;
472 	int error;
473 
474 	e = NULL;
475 
476 	/* Locks the table. */
477 	t = npf_table_get(tset, tid);
478 	if (__predict_false(t == NULL)) {
479 		return EINVAL;
480 	}
481 	/* Lookup & remove. */
482 	switch (t->t_type) {
483 	case NPF_TABLE_HASH:
484 		/* Generate hash value from: (address & mask). */
485 		val = addr & mask;
486 		htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
487 		LIST_FOREACH(e, htbl, te_entry.hashq) {
488 			if (e->te_addr == addr && e->te_mask == mask)
489 				break;
490 		}
491 		if (__predict_true(e != NULL)) {
492 			LIST_REMOVE(e, te_entry.hashq);
493 		} else {
494 			error = ESRCH;
495 		}
496 		break;
497 	case NPF_TABLE_RBTREE:
498 		/* Key: (address & mask). */
499 		val = addr & mask;
500 		nd = rb_tree_find_node(&t->t_rbtree, &val);
501 		if (__predict_true(nd != NULL)) {
502 			e = NPF_RBN2TBLENT(nd);
503 			rb_tree_remove_node(&t->t_rbtree, &e->te_entry.rbnode);
504 		} else {
505 			error = ESRCH;
506 		}
507 		break;
508 	default:
509 		KASSERT(false);
510 	}
511 	npf_table_put(t);
512 
513 	/* Free table the entry. */
514 	if (__predict_true(e != NULL)) {
515 		pool_cache_put(tblent_cache, e);
516 	}
517 	return e ? 0 : -1;
518 }
519 
520 /*
521  * npf_table_match_v4addr: find the table according to ID, lookup and
522  * match the contents with specified IPv4 address.
523  */
524 int
525 npf_table_match_v4addr(u_int tid, in_addr_t ip4addr)
526 {
527 	struct npf_hashl *htbl;
528 	struct rb_node *nd;
529 	npf_tblent_t *e;
530 	npf_table_t *t;
531 
532 	e = NULL;
533 
534 	/* Locks the table. */
535 	t = npf_table_get(NULL, tid);
536 	if (__predict_false(t == NULL)) {
537 		return EINVAL;
538 	}
539 	switch (t->t_type) {
540 	case NPF_TABLE_HASH:
541 		htbl = table_hash_bucket(t, &ip4addr, sizeof(in_addr_t));
542 		LIST_FOREACH(e, htbl, te_entry.hashq) {
543 			if ((ip4addr & e->te_mask) == e->te_addr) {
544 				break;
545 			}
546 		}
547 		break;
548 	case NPF_TABLE_RBTREE:
549 		nd = rb_tree_find_node(&t->t_rbtree, &ip4addr);
550 		e = NPF_RBN2TBLENT(nd);
551 		KASSERT((ip4addr & e->te_mask) == e->te_addr);
552 		break;
553 	default:
554 		KASSERT(false);
555 	}
556 	npf_table_put(t);
557 
558 	return e ? 0 : -1;
559 }
560