1 /* $NetBSD: npf_tableset.c,v 1.4 2010/12/18 01:07:25 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * NPF tableset module. 34 * 35 * TODO: 36 * - Currently, code is modeled to handle IPv4 CIDR blocks. 37 * - Dynamic hash growing/shrinking (i.e. re-hash functionality), maybe? 38 * - Dynamic array resize. 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.4 2010/12/18 01:07:25 rmind Exp $"); 43 44 #include <sys/param.h> 45 #include <sys/kernel.h> 46 47 #include <sys/atomic.h> 48 #include <sys/hash.h> 49 #include <sys/kmem.h> 50 #include <sys/pool.h> 51 #include <sys/queue.h> 52 #include <sys/rwlock.h> 53 #include <sys/systm.h> 54 #include <sys/types.h> 55 56 #include "npf_impl.h" 57 58 /* Table entry structure. */ 59 struct npf_tblent { 60 /* Hash/tree entry. */ 61 union { 62 LIST_ENTRY(npf_tblent) hashq; 63 rb_node_t rbnode; 64 } te_entry; 65 /* IPv4 CIDR block. */ 66 in_addr_t te_addr; 67 in_addr_t te_mask; 68 }; 69 70 LIST_HEAD(npf_hashl, npf_tblent); 71 72 /* Table structure. */ 73 struct npf_table { 74 char t_name[16]; 75 /* Lock and reference count. */ 76 krwlock_t t_lock; 77 u_int t_refcnt; 78 /* Table ID. */ 79 u_int t_id; 80 /* The storage type can be: 1. Hash 2. RB-tree. */ 81 u_int t_type; 82 struct npf_hashl * t_hashl; 83 u_long t_hashmask; 84 rb_tree_t t_rbtree; 85 }; 86 87 static pool_cache_t tblent_cache __read_mostly; 88 89 /* 90 * npf_table_sysinit: initialise tableset structures. 91 */ 92 void 93 npf_tableset_sysinit(void) 94 { 95 96 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit, 97 0, 0, "npftenpl", NULL, IPL_NONE, NULL, NULL, NULL); 98 } 99 100 void 101 npf_tableset_sysfini(void) 102 { 103 104 pool_cache_destroy(tblent_cache); 105 } 106 107 npf_tableset_t * 108 npf_tableset_create(void) 109 { 110 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *); 111 112 return kmem_zalloc(sz, KM_SLEEP); 113 } 114 115 void 116 npf_tableset_destroy(npf_tableset_t *tblset) 117 { 118 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *); 119 npf_table_t *t; 120 u_int tid; 121 122 /* 123 * Destroy all tables (no references should be held, as ruleset 124 * should be destroyed before). 125 */ 126 for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) { 127 t = tblset[tid]; 128 if (t != NULL) { 129 npf_table_destroy(t); 130 } 131 } 132 kmem_free(tblset, sz); 133 } 134 135 /* 136 * npf_tableset_insert: insert the table into the specified tableset. 137 * 138 * => Returns 0 on success, fails and returns errno if ID is already used. 139 */ 140 int 141 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t) 142 { 143 const u_int tid = t->t_id; 144 int error; 145 146 KASSERT((u_int)tid < NPF_TABLE_SLOTS); 147 148 if (tblset[tid] == NULL) { 149 tblset[tid] = t; 150 error = 0; 151 } else { 152 error = EEXIST; 153 } 154 return error; 155 } 156 157 /* 158 * Red-black tree storage. 159 */ 160 161 static signed int 162 table_rbtree_cmp_nodes(void *ctx, const void *n1, const void *n2) 163 { 164 const npf_tblent_t * const te1 = n1; 165 const npf_tblent_t * const te2 = n2; 166 const in_addr_t x = te1->te_addr & te1->te_mask; 167 const in_addr_t y = te2->te_addr & te2->te_mask; 168 169 if (x < y) 170 return -1; 171 if (x > y) 172 return 1; 173 return 0; 174 } 175 176 static signed int 177 table_rbtree_cmp_key(void *ctx, const void *n1, const void *key) 178 { 179 const npf_tblent_t * const te = n1; 180 const in_addr_t x = te->te_addr & te->te_mask; 181 const in_addr_t y = *(const in_addr_t *)key; 182 183 if (x < y) 184 return -1; 185 if (x > y) 186 return 1; 187 return 0; 188 } 189 190 static const rb_tree_ops_t table_rbtree_ops = { 191 .rbto_compare_nodes = table_rbtree_cmp_nodes, 192 .rbto_compare_key = table_rbtree_cmp_key, 193 .rbto_node_offset = offsetof(npf_tblent_t, te_entry.rbnode), 194 .rbto_context = NULL 195 }; 196 197 /* 198 * Hash helper routine. 199 */ 200 201 static inline struct npf_hashl * 202 table_hash_bucket(npf_table_t *t, void *buf, size_t sz) 203 { 204 const uint32_t hidx = hash32_buf(buf, sz, HASH32_BUF_INIT); 205 206 return &t->t_hashl[hidx & t->t_hashmask]; 207 } 208 209 /* 210 * npf_table_create: create table with a specified ID. 211 */ 212 npf_table_t * 213 npf_table_create(u_int tid, int type, size_t hsize) 214 { 215 npf_table_t *t; 216 217 KASSERT((u_int)tid < NPF_TABLE_SLOTS); 218 219 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP); 220 switch (type) { 221 case NPF_TABLE_RBTREE: 222 rb_tree_init(&t->t_rbtree, &table_rbtree_ops); 223 break; 224 case NPF_TABLE_HASH: 225 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask); 226 if (t->t_hashl == NULL) { 227 kmem_free(t, sizeof(npf_table_t)); 228 return NULL; 229 } 230 break; 231 default: 232 KASSERT(false); 233 } 234 rw_init(&t->t_lock); 235 t->t_type = type; 236 t->t_refcnt = 1; 237 t->t_id = tid; 238 return t; 239 } 240 241 /* 242 * npf_table_destroy: free all table entries and table itself. 243 */ 244 void 245 npf_table_destroy(npf_table_t *t) 246 { 247 npf_tblent_t *e; 248 u_int n; 249 250 switch (t->t_type) { 251 case NPF_TABLE_HASH: 252 for (n = 0; n <= t->t_hashmask; n++) { 253 while ((e = LIST_FIRST(&t->t_hashl[n])) != NULL) { 254 LIST_REMOVE(e, te_entry.hashq); 255 pool_cache_put(tblent_cache, e); 256 } 257 } 258 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask); 259 break; 260 case NPF_TABLE_RBTREE: 261 while ((e = rb_tree_iterate(&t->t_rbtree, NULL, 262 RB_DIR_LEFT)) != NULL) { 263 rb_tree_remove_node(&t->t_rbtree, e); 264 pool_cache_put(tblent_cache, e); 265 } 266 break; 267 default: 268 KASSERT(false); 269 } 270 rw_destroy(&t->t_lock); 271 kmem_free(t, sizeof(npf_table_t)); 272 } 273 274 /* 275 * npf_table_ref: holds the reference on table. 276 * 277 * => Table must be locked. 278 */ 279 void 280 npf_table_ref(npf_table_t *t) 281 { 282 283 KASSERT(rw_lock_held(&t->t_lock)); 284 atomic_inc_uint(&t->t_refcnt); 285 } 286 287 /* 288 * npf_table_unref: drop reference from the table and destroy the table if 289 * it is the last reference. 290 */ 291 void 292 npf_table_unref(npf_table_t *t) 293 { 294 295 if (atomic_dec_uint_nv(&t->t_refcnt) != 0) { 296 return; 297 } 298 npf_table_destroy(t); 299 } 300 301 /* 302 * npf_table_get: find the table according to ID and "get it" by locking it. 303 */ 304 npf_table_t * 305 npf_table_get(npf_tableset_t *tset, u_int tid) 306 { 307 npf_tableset_t *rtset; 308 npf_table_t *t; 309 310 if ((u_int)tid >= NPF_TABLE_SLOTS) { 311 return NULL; 312 } 313 if (tset == NULL) { 314 npf_core_enter(); 315 rtset = npf_core_tableset(); 316 } else { 317 rtset = tset; 318 } 319 t = rtset[tid]; 320 if (t != NULL) { 321 rw_enter(&t->t_lock, RW_READER); 322 } 323 if (tset == NULL) { 324 npf_core_exit(); 325 } 326 return t; 327 } 328 329 /* 330 * npf_table_put: "put table back" by unlocking it. 331 */ 332 void 333 npf_table_put(npf_table_t *t) 334 { 335 336 rw_exit(&t->t_lock); 337 } 338 339 /* 340 * npf_table_check: validate ID and type. 341 * */ 342 int 343 npf_table_check(npf_tableset_t *tset, u_int tid, int type) 344 { 345 346 if ((u_int)tid >= NPF_TABLE_SLOTS) { 347 return EINVAL; 348 } 349 if (tset[tid] != NULL) { 350 return EEXIST; 351 } 352 if (type != NPF_TABLE_RBTREE && type != NPF_TABLE_HASH) { 353 return EINVAL; 354 } 355 return 0; 356 } 357 358 /* 359 * npf_table_add_v4cidr: add an IPv4 CIDR into the table. 360 */ 361 int 362 npf_table_add_v4cidr(npf_tableset_t *tset, u_int tid, 363 in_addr_t addr, in_addr_t mask) 364 { 365 struct npf_hashl *htbl; 366 npf_tblent_t *e, *it; 367 npf_table_t *t; 368 in_addr_t val; 369 int error = 0; 370 371 /* Allocate and setup entry. */ 372 e = pool_cache_get(tblent_cache, PR_WAITOK); 373 e->te_addr = addr; 374 e->te_mask = mask; 375 376 /* Locks the table. */ 377 t = npf_table_get(tset, tid); 378 if (__predict_false(t == NULL)) { 379 pool_cache_put(tblent_cache, e); 380 return EINVAL; 381 } 382 switch (t->t_type) { 383 case NPF_TABLE_HASH: 384 /* Generate hash value from: address & mask. */ 385 val = addr & mask; 386 htbl = table_hash_bucket(t, &val, sizeof(in_addr_t)); 387 /* Lookup to check for duplicates. */ 388 LIST_FOREACH(it, htbl, te_entry.hashq) { 389 if (it->te_addr == addr && it->te_mask == mask) 390 break; 391 } 392 /* If no duplicate - insert entry. */ 393 if (__predict_true(it == NULL)) { 394 LIST_INSERT_HEAD(htbl, e, te_entry.hashq); 395 } else { 396 error = EEXIST; 397 } 398 break; 399 case NPF_TABLE_RBTREE: 400 /* Insert entry. Returns false, if duplicate. */ 401 if (rb_tree_insert_node(&t->t_rbtree, e) != e) { 402 error = EEXIST; 403 } 404 break; 405 default: 406 KASSERT(false); 407 } 408 npf_table_put(t); 409 410 if (__predict_false(error)) { 411 pool_cache_put(tblent_cache, e); 412 } 413 return error; 414 } 415 416 /* 417 * npf_table_rem_v4cidr: remove an IPv4 CIDR from the table. 418 */ 419 int 420 npf_table_rem_v4cidr(npf_tableset_t *tset, u_int tid, 421 in_addr_t addr, in_addr_t mask) 422 { 423 struct npf_hashl *htbl; 424 npf_tblent_t *e; 425 npf_table_t *t; 426 in_addr_t val; 427 int error; 428 429 e = NULL; 430 431 /* Locks the table. */ 432 t = npf_table_get(tset, tid); 433 if (__predict_false(t == NULL)) { 434 return EINVAL; 435 } 436 /* Lookup & remove. */ 437 switch (t->t_type) { 438 case NPF_TABLE_HASH: 439 /* Generate hash value from: (address & mask). */ 440 val = addr & mask; 441 htbl = table_hash_bucket(t, &val, sizeof(in_addr_t)); 442 LIST_FOREACH(e, htbl, te_entry.hashq) { 443 if (e->te_addr == addr && e->te_mask == mask) 444 break; 445 } 446 if (__predict_true(e != NULL)) { 447 LIST_REMOVE(e, te_entry.hashq); 448 } else { 449 error = ESRCH; 450 } 451 break; 452 case NPF_TABLE_RBTREE: 453 /* Key: (address & mask). */ 454 val = addr & mask; 455 e = rb_tree_find_node(&t->t_rbtree, &val); 456 if (__predict_true(e != NULL)) { 457 rb_tree_remove_node(&t->t_rbtree, e); 458 } else { 459 error = ESRCH; 460 } 461 break; 462 default: 463 KASSERT(false); 464 } 465 npf_table_put(t); 466 467 /* Free table the entry. */ 468 if (__predict_true(e != NULL)) { 469 pool_cache_put(tblent_cache, e); 470 } 471 return e ? 0 : -1; 472 } 473 474 /* 475 * npf_table_match_v4addr: find the table according to ID, lookup and 476 * match the contents with specified IPv4 address. 477 */ 478 int 479 npf_table_match_v4addr(u_int tid, in_addr_t ip4addr) 480 { 481 struct npf_hashl *htbl; 482 npf_tblent_t *e; 483 npf_table_t *t; 484 485 e = NULL; 486 487 /* Locks the table. */ 488 t = npf_table_get(NULL, tid); 489 if (__predict_false(t == NULL)) { 490 return EINVAL; 491 } 492 switch (t->t_type) { 493 case NPF_TABLE_HASH: 494 htbl = table_hash_bucket(t, &ip4addr, sizeof(in_addr_t)); 495 LIST_FOREACH(e, htbl, te_entry.hashq) { 496 if ((ip4addr & e->te_mask) == e->te_addr) { 497 break; 498 } 499 } 500 break; 501 case NPF_TABLE_RBTREE: 502 e = rb_tree_find_node(&t->t_rbtree, &ip4addr); 503 KASSERT((ip4addr & e->te_mask) == e->te_addr); 504 break; 505 default: 506 KASSERT(false); 507 } 508 npf_table_put(t); 509 510 return e ? 0 : -1; 511 } 512