1 /* $NetBSD: npf_tableset.c,v 1.1 2010/08/22 18:56:23 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * NPF table module. 34 * 35 * table_lock -> 36 * npf_table_t::t_lock 37 * 38 * TODO: 39 * - Currently, code is modeled to handle IPv4 CIDR blocks. 40 * - Dynamic hash growing/shrinking (i.e. re-hash functionality), maybe? 41 * - Dynamic array resize. 42 */ 43 44 #ifdef _KERNEL 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.1 2010/08/22 18:56:23 rmind Exp $"); 47 #endif 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 52 #include <sys/atomic.h> 53 #include <sys/hash.h> 54 #include <sys/kmem.h> 55 #include <sys/pool.h> 56 #include <sys/queue.h> 57 #include <sys/rwlock.h> 58 #include <sys/systm.h> 59 #include <sys/types.h> 60 61 #include "npf_impl.h" 62 63 /* Table entry structure. */ 64 struct npf_tblent { 65 /* IPv4 CIDR block. */ 66 in_addr_t te_addr; 67 in_addr_t te_mask; 68 union { 69 LIST_ENTRY(npf_tblent) hashq; 70 struct rb_node rbnode; 71 } te_entry; 72 }; 73 74 /* Return pointer to npf_tblent_t from RB-tree node. (XXX fix rb-tree) */ 75 #define NPF_RBN2TBLENT(n) \ 76 (npf_tblent_t *)((uintptr_t)n - offsetof(npf_tblent_t, te_entry.rbnode)) 77 78 LIST_HEAD(npf_hashl, npf_tblent); 79 80 /* Table structure. */ 81 struct npf_table { 82 char t_name[16]; 83 /* Lock and reference count. */ 84 krwlock_t t_lock; 85 u_int t_refcnt; 86 /* Table ID. */ 87 u_int t_id; 88 /* The storage type can be: 1. Hash 2. RB-tree. */ 89 u_int t_type; 90 struct npf_hashl * t_hashl; 91 u_long t_hashmask; 92 struct rb_tree t_rbtree; 93 }; 94 95 /* Global table array and its lock. */ 96 static npf_tableset_t * table_array; 97 static krwlock_t table_lock; 98 static pool_cache_t tblent_cache; 99 100 /* 101 * npf_table_sysinit: initialise tableset structures. 102 */ 103 int 104 npf_tableset_sysinit(void) 105 { 106 107 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit, 108 0, 0, "npftenpl", NULL, IPL_NONE, NULL, NULL, NULL); 109 if (tblent_cache == NULL) { 110 return ENOMEM; 111 } 112 table_array = npf_tableset_create(); 113 if (table_array == NULL) { 114 pool_cache_destroy(tblent_cache); 115 return ENOMEM; 116 } 117 rw_init(&table_lock); 118 return 0; 119 } 120 121 void 122 npf_tableset_sysfini(void) 123 { 124 125 npf_tableset_destroy(table_array); 126 pool_cache_destroy(tblent_cache); 127 rw_destroy(&table_lock); 128 } 129 130 npf_tableset_t * 131 npf_tableset_create(void) 132 { 133 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *); 134 135 return kmem_zalloc(sz, KM_SLEEP); 136 } 137 138 void 139 npf_tableset_destroy(npf_tableset_t *tblset) 140 { 141 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *); 142 npf_table_t *t; 143 u_int tid; 144 145 /* 146 * Destroy all tables (no references should be held, as ruleset 147 * should be destroyed before). 148 */ 149 for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) { 150 t = tblset[tid]; 151 if (t != NULL) { 152 npf_table_destroy(t); 153 } 154 } 155 kmem_free(tblset, sz); 156 } 157 158 /* 159 * npf_tableset_insert: insert the table into the specified tableset. 160 * 161 * => Returns 0 on success, fails and returns errno if ID is already used. 162 */ 163 int 164 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t) 165 { 166 const u_int tid = t->t_id; 167 int error; 168 169 KASSERT((u_int)tid < NPF_TABLE_SLOTS); 170 171 if (tblset[tid] == NULL) { 172 tblset[tid] = t; 173 error = 0; 174 } else { 175 error = EEXIST; 176 } 177 return error; 178 } 179 180 /* 181 * npf_tableset_reload: replace old tableset array with a new one. 182 * 183 * => Called from npf_ruleset_reload() with a global ruleset lock held. 184 * => Returns pointer to the old tableset, caller will destroy it. 185 */ 186 npf_tableset_t * 187 npf_tableset_reload(npf_tableset_t *tblset) 188 { 189 npf_tableset_t *oldtblset; 190 191 rw_enter(&table_lock, RW_WRITER); 192 oldtblset = table_array; 193 table_array = tblset; 194 rw_exit(&table_lock); 195 196 return oldtblset; 197 } 198 199 /* 200 * Red-black tree storage. 201 */ 202 203 static signed int 204 table_rbtree_cmp_nodes(const struct rb_node *n1, const struct rb_node *n2) 205 { 206 const npf_tblent_t *te1 = NPF_RBN2TBLENT(n1); 207 const npf_tblent_t *te2 = NPF_RBN2TBLENT(n2); 208 const in_addr_t x = te1->te_addr & te1->te_mask; 209 const in_addr_t y = te2->te_addr & te2->te_mask; 210 211 if (x < y) 212 return 1; 213 if (x > y) 214 return -1; 215 return 0; 216 } 217 218 static signed int 219 table_rbtree_cmp_key(const struct rb_node *n1, const void *key) 220 { 221 const npf_tblent_t *te = NPF_RBN2TBLENT(n1); 222 const in_addr_t x = te->te_addr & te->te_mask; 223 const in_addr_t y = *(const in_addr_t *)key; 224 225 if (x < y) 226 return 1; 227 if (x > y) 228 return -1; 229 return 0; 230 } 231 232 static const struct rb_tree_ops table_rbtree_ops = { 233 .rbto_compare_nodes = table_rbtree_cmp_nodes, 234 .rbto_compare_key = table_rbtree_cmp_key 235 }; 236 237 /* 238 * Hash helper routine. 239 */ 240 241 static inline struct npf_hashl * 242 table_hash_bucket(npf_table_t *t, void *buf, size_t sz) 243 { 244 const uint32_t hidx = hash32_buf(buf, sz, HASH32_BUF_INIT); 245 246 return &t->t_hashl[hidx & t->t_hashmask]; 247 } 248 249 /* 250 * npf_table_create: create table with a specified ID. 251 */ 252 npf_table_t * 253 npf_table_create(u_int tid, int type, size_t hsize) 254 { 255 npf_table_t *t; 256 257 KASSERT((u_int)tid < NPF_TABLE_SLOTS); 258 259 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP); 260 switch (type) { 261 case NPF_TABLE_RBTREE: 262 rb_tree_init(&t->t_rbtree, &table_rbtree_ops); 263 break; 264 case NPF_TABLE_HASH: 265 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask); 266 if (t->t_hashl == NULL) { 267 kmem_free(t, sizeof(npf_table_t)); 268 return NULL; 269 } 270 break; 271 default: 272 KASSERT(false); 273 } 274 rw_init(&t->t_lock); 275 t->t_type = type; 276 t->t_refcnt = 1; 277 t->t_id = tid; 278 return t; 279 } 280 281 /* 282 * npf_table_destroy: free all table entries and table itself. 283 */ 284 void 285 npf_table_destroy(npf_table_t *t) 286 { 287 npf_tblent_t *e; 288 struct rb_node *nd; 289 u_int n; 290 291 switch (t->t_type) { 292 case NPF_TABLE_HASH: 293 for (n = 0; n <= t->t_hashmask; n++) { 294 while ((e = LIST_FIRST(&t->t_hashl[n])) != NULL) { 295 LIST_REMOVE(e, te_entry.hashq); 296 pool_cache_put(tblent_cache, e); 297 } 298 } 299 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask); 300 break; 301 case NPF_TABLE_RBTREE: 302 while ((nd = rb_tree_iterate(&t->t_rbtree, NULL, 303 RB_DIR_RIGHT)) != NULL) { 304 e = NPF_RBN2TBLENT(nd); 305 rb_tree_remove_node(&t->t_rbtree, &e->te_entry.rbnode); 306 pool_cache_put(tblent_cache, e); 307 } 308 break; 309 default: 310 KASSERT(false); 311 } 312 rw_destroy(&t->t_lock); 313 kmem_free(t, sizeof(npf_table_t)); 314 } 315 316 /* 317 * npf_table_ref: holds the reference on table. 318 * 319 * => Table must be locked. 320 */ 321 void 322 npf_table_ref(npf_table_t *t) 323 { 324 325 KASSERT(rw_lock_held(&t->t_lock)); 326 atomic_inc_uint(&t->t_refcnt); 327 } 328 329 /* 330 * npf_table_unref: drop reference from the table and destroy the table if 331 * it is the last reference. 332 */ 333 void 334 npf_table_unref(npf_table_t *t) 335 { 336 337 if (atomic_dec_uint_nv(&t->t_refcnt) != 0) { 338 return; 339 } 340 npf_table_destroy(t); 341 } 342 343 /* 344 * npf_table_get: find the table according to ID and "get it" by locking it. 345 */ 346 npf_table_t * 347 npf_table_get(npf_tableset_t *tset, u_int tid) 348 { 349 npf_table_t *t; 350 351 if ((u_int)tid >= NPF_TABLE_SLOTS) { 352 return NULL; 353 } 354 if (tset) { 355 t = tset[tid]; 356 if (t != NULL) { 357 rw_enter(&t->t_lock, RW_READER); 358 } 359 return t; 360 } 361 rw_enter(&table_lock, RW_READER); 362 t = table_array[tid]; 363 if (t != NULL) { 364 rw_enter(&t->t_lock, RW_READER); 365 } 366 rw_exit(&table_lock); 367 return t; 368 } 369 370 /* 371 * npf_table_put: "put table back" by unlocking it. 372 */ 373 void 374 npf_table_put(npf_table_t *t) 375 { 376 377 rw_exit(&t->t_lock); 378 } 379 380 /* 381 * npf_table_check: validate ID and type. 382 * */ 383 int 384 npf_table_check(npf_tableset_t *tset, u_int tid, int type) 385 { 386 387 if ((u_int)tid >= NPF_TABLE_SLOTS) { 388 return EINVAL; 389 } 390 if (tset[tid] != NULL) { 391 return EEXIST; 392 } 393 if (type != NPF_TABLE_RBTREE && type != NPF_TABLE_HASH) { 394 return EINVAL; 395 } 396 return 0; 397 } 398 399 /* 400 * npf_table_add_v4cidr: add an IPv4 CIDR into the table. 401 */ 402 int 403 npf_table_add_v4cidr(npf_tableset_t *tset, u_int tid, 404 in_addr_t addr, in_addr_t mask) 405 { 406 struct npf_hashl *htbl; 407 npf_tblent_t *e, *it; 408 npf_table_t *t; 409 in_addr_t val; 410 int error = 0; 411 412 /* Allocate and setup entry. */ 413 e = pool_cache_get(tblent_cache, PR_WAITOK); 414 if (e == NULL) { 415 return ENOMEM; 416 } 417 e->te_addr = addr; 418 e->te_mask = mask; 419 420 /* Locks the table. */ 421 t = npf_table_get(tset, tid); 422 if (__predict_false(t == NULL)) { 423 pool_cache_put(tblent_cache, e); 424 return EINVAL; 425 } 426 switch (t->t_type) { 427 case NPF_TABLE_HASH: 428 /* Generate hash value from: address & mask. */ 429 val = addr & mask; 430 htbl = table_hash_bucket(t, &val, sizeof(in_addr_t)); 431 /* Lookup to check for duplicates. */ 432 LIST_FOREACH(it, htbl, te_entry.hashq) { 433 if (it->te_addr == addr && it->te_mask == mask) 434 break; 435 } 436 /* If no duplicate - insert entry. */ 437 if (__predict_true(it == NULL)) { 438 LIST_INSERT_HEAD(htbl, e, te_entry.hashq); 439 } else { 440 error = EEXIST; 441 } 442 break; 443 case NPF_TABLE_RBTREE: 444 /* Insert entry. Returns false, if duplicate. */ 445 if (!rb_tree_insert_node(&t->t_rbtree, &e->te_entry.rbnode)) { 446 error = EEXIST; 447 } 448 break; 449 default: 450 KASSERT(false); 451 } 452 npf_table_put(t); 453 454 if (__predict_false(error)) { 455 pool_cache_put(tblent_cache, e); 456 } 457 return error; 458 } 459 460 /* 461 * npf_table_rem_v4cidr: remove an IPv4 CIDR from the table. 462 */ 463 int 464 npf_table_rem_v4cidr(npf_tableset_t *tset, u_int tid, 465 in_addr_t addr, in_addr_t mask) 466 { 467 struct npf_hashl *htbl; 468 struct rb_node *nd; 469 npf_tblent_t *e; 470 npf_table_t *t; 471 in_addr_t val; 472 int error; 473 474 e = NULL; 475 476 /* Locks the table. */ 477 t = npf_table_get(tset, tid); 478 if (__predict_false(t == NULL)) { 479 return EINVAL; 480 } 481 /* Lookup & remove. */ 482 switch (t->t_type) { 483 case NPF_TABLE_HASH: 484 /* Generate hash value from: (address & mask). */ 485 val = addr & mask; 486 htbl = table_hash_bucket(t, &val, sizeof(in_addr_t)); 487 LIST_FOREACH(e, htbl, te_entry.hashq) { 488 if (e->te_addr == addr && e->te_mask == mask) 489 break; 490 } 491 if (__predict_true(e != NULL)) { 492 LIST_REMOVE(e, te_entry.hashq); 493 } else { 494 error = ESRCH; 495 } 496 break; 497 case NPF_TABLE_RBTREE: 498 /* Key: (address & mask). */ 499 val = addr & mask; 500 nd = rb_tree_find_node(&t->t_rbtree, &val); 501 if (__predict_true(nd != NULL)) { 502 e = NPF_RBN2TBLENT(nd); 503 rb_tree_remove_node(&t->t_rbtree, &e->te_entry.rbnode); 504 } else { 505 error = ESRCH; 506 } 507 break; 508 default: 509 KASSERT(false); 510 } 511 npf_table_put(t); 512 513 /* Free table the entry. */ 514 if (__predict_true(e != NULL)) { 515 pool_cache_put(tblent_cache, e); 516 } 517 return e ? 0 : -1; 518 } 519 520 /* 521 * npf_table_match_v4addr: find the table according to ID, lookup and 522 * match the contents with specified IPv4 address. 523 */ 524 int 525 npf_table_match_v4addr(u_int tid, in_addr_t ip4addr) 526 { 527 struct npf_hashl *htbl; 528 struct rb_node *nd; 529 npf_tblent_t *e; 530 npf_table_t *t; 531 532 e = NULL; 533 534 /* Locks the table. */ 535 t = npf_table_get(NULL, tid); 536 if (__predict_false(t == NULL)) { 537 return EINVAL; 538 } 539 switch (t->t_type) { 540 case NPF_TABLE_HASH: 541 htbl = table_hash_bucket(t, &ip4addr, sizeof(in_addr_t)); 542 LIST_FOREACH(e, htbl, te_entry.hashq) { 543 if ((ip4addr & e->te_mask) == e->te_addr) { 544 break; 545 } 546 } 547 break; 548 case NPF_TABLE_RBTREE: 549 nd = rb_tree_find_node(&t->t_rbtree, &ip4addr); 550 e = NPF_RBN2TBLENT(nd); 551 KASSERT((ip4addr & e->te_mask) == e->te_addr); 552 break; 553 default: 554 KASSERT(false); 555 } 556 npf_table_put(t); 557 558 return e ? 0 : -1; 559 } 560