1 /* $NetBSD: npf_tableset.c,v 1.10 2012/02/20 00:18:20 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2012 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * NPF tableset module. 34 * 35 * TODO: 36 * - Currently, code is modeled to handle IPv4 CIDR blocks. 37 * - Dynamic hash growing/shrinking (i.e. re-hash functionality), maybe? 38 * - Dynamic array resize. 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.10 2012/02/20 00:18:20 rmind Exp $"); 43 44 #include <sys/param.h> 45 #include <sys/types.h> 46 47 #include <sys/atomic.h> 48 #include <sys/hash.h> 49 #include <sys/kmem.h> 50 #include <sys/pool.h> 51 #include <sys/queue.h> 52 #include <sys/rwlock.h> 53 #include <sys/systm.h> 54 #include <sys/types.h> 55 56 #include "npf_impl.h" 57 58 /* Table entry structure. */ 59 struct npf_tblent { 60 /* Hash/tree entry. */ 61 union { 62 LIST_ENTRY(npf_tblent) hashq; 63 rb_node_t rbnode; 64 } te_entry; 65 /* IPv4 CIDR block. */ 66 npf_addr_t te_addr; 67 npf_netmask_t te_mask; 68 }; 69 70 LIST_HEAD(npf_hashl, npf_tblent); 71 72 /* Table structure. */ 73 struct npf_table { 74 char t_name[16]; 75 /* Lock and reference count. */ 76 krwlock_t t_lock; 77 u_int t_refcnt; 78 /* Table ID. */ 79 u_int t_id; 80 /* The storage type can be: 1. Hash 2. RB-tree. */ 81 int t_type; 82 struct npf_hashl * t_hashl; 83 u_long t_hashmask; 84 rb_tree_t t_rbtree; 85 }; 86 87 static pool_cache_t tblent_cache __read_mostly; 88 89 /* 90 * npf_table_sysinit: initialise tableset structures. 91 */ 92 void 93 npf_tableset_sysinit(void) 94 { 95 96 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit, 97 0, 0, "npftenpl", NULL, IPL_NONE, NULL, NULL, NULL); 98 } 99 100 void 101 npf_tableset_sysfini(void) 102 { 103 104 pool_cache_destroy(tblent_cache); 105 } 106 107 npf_tableset_t * 108 npf_tableset_create(void) 109 { 110 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *); 111 112 return kmem_zalloc(sz, KM_SLEEP); 113 } 114 115 void 116 npf_tableset_destroy(npf_tableset_t *tblset) 117 { 118 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *); 119 npf_table_t *t; 120 u_int tid; 121 122 /* 123 * Destroy all tables (no references should be held, as ruleset 124 * should be destroyed before). 125 */ 126 for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) { 127 t = tblset[tid]; 128 if (t != NULL) { 129 npf_table_destroy(t); 130 } 131 } 132 kmem_free(tblset, sz); 133 } 134 135 /* 136 * npf_tableset_insert: insert the table into the specified tableset. 137 * 138 * => Returns 0 on success, fails and returns errno if ID is already used. 139 */ 140 int 141 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t) 142 { 143 const u_int tid = t->t_id; 144 int error; 145 146 KASSERT((u_int)tid < NPF_TABLE_SLOTS); 147 148 if (tblset[tid] == NULL) { 149 tblset[tid] = t; 150 error = 0; 151 } else { 152 error = EEXIST; 153 } 154 return error; 155 } 156 157 /* 158 * Red-black tree storage. 159 */ 160 161 static signed int 162 table_rbtree_cmp_nodes(void *ctx, const void *n1, const void *n2) 163 { 164 const npf_tblent_t * const te1 = n1; 165 const npf_tblent_t * const te2 = n2; 166 167 return npf_compare_cidr(&te1->te_addr, te1->te_mask, 168 &te2->te_addr, te2->te_mask); 169 } 170 171 static signed int 172 table_rbtree_cmp_key(void *ctx, const void *n1, const void *key) 173 { 174 const npf_tblent_t * const te = n1; 175 const npf_addr_t *t2 = key; 176 177 return npf_compare_cidr(&te->te_addr, te->te_mask, t2, NPF_NO_NETMASK); 178 } 179 180 static const rb_tree_ops_t table_rbtree_ops = { 181 .rbto_compare_nodes = table_rbtree_cmp_nodes, 182 .rbto_compare_key = table_rbtree_cmp_key, 183 .rbto_node_offset = offsetof(npf_tblent_t, te_entry.rbnode), 184 .rbto_context = NULL 185 }; 186 187 /* 188 * Hash helper routine. 189 */ 190 191 static inline struct npf_hashl * 192 table_hash_bucket(npf_table_t *t, const void *buf, size_t sz) 193 { 194 const uint32_t hidx = hash32_buf(buf, sz, HASH32_BUF_INIT); 195 196 return &t->t_hashl[hidx & t->t_hashmask]; 197 } 198 199 /* 200 * npf_table_create: create table with a specified ID. 201 */ 202 npf_table_t * 203 npf_table_create(u_int tid, int type, size_t hsize) 204 { 205 npf_table_t *t; 206 207 KASSERT((u_int)tid < NPF_TABLE_SLOTS); 208 209 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP); 210 switch (type) { 211 case NPF_TABLE_TREE: 212 rb_tree_init(&t->t_rbtree, &table_rbtree_ops); 213 break; 214 case NPF_TABLE_HASH: 215 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask); 216 if (t->t_hashl == NULL) { 217 kmem_free(t, sizeof(npf_table_t)); 218 return NULL; 219 } 220 break; 221 default: 222 KASSERT(false); 223 } 224 rw_init(&t->t_lock); 225 t->t_type = type; 226 t->t_refcnt = 1; 227 t->t_id = tid; 228 return t; 229 } 230 231 /* 232 * npf_table_destroy: free all table entries and table itself. 233 */ 234 void 235 npf_table_destroy(npf_table_t *t) 236 { 237 npf_tblent_t *e; 238 u_int n; 239 240 switch (t->t_type) { 241 case NPF_TABLE_HASH: 242 for (n = 0; n <= t->t_hashmask; n++) { 243 while ((e = LIST_FIRST(&t->t_hashl[n])) != NULL) { 244 LIST_REMOVE(e, te_entry.hashq); 245 pool_cache_put(tblent_cache, e); 246 } 247 } 248 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask); 249 break; 250 case NPF_TABLE_TREE: 251 while ((e = rb_tree_iterate(&t->t_rbtree, NULL, 252 RB_DIR_LEFT)) != NULL) { 253 rb_tree_remove_node(&t->t_rbtree, e); 254 pool_cache_put(tblent_cache, e); 255 } 256 break; 257 default: 258 KASSERT(false); 259 } 260 rw_destroy(&t->t_lock); 261 kmem_free(t, sizeof(npf_table_t)); 262 } 263 264 /* 265 * npf_table_ref: holds the reference on table. 266 * 267 * => Table must be locked. 268 */ 269 void 270 npf_table_ref(npf_table_t *t) 271 { 272 273 KASSERT(rw_lock_held(&t->t_lock)); 274 atomic_inc_uint(&t->t_refcnt); 275 } 276 277 /* 278 * npf_table_unref: drop reference from the table and destroy the table if 279 * it is the last reference. 280 */ 281 void 282 npf_table_unref(npf_table_t *t) 283 { 284 285 if (atomic_dec_uint_nv(&t->t_refcnt) != 0) { 286 return; 287 } 288 npf_table_destroy(t); 289 } 290 291 /* 292 * npf_table_get: find the table according to ID and "get it" by locking it. 293 */ 294 npf_table_t * 295 npf_table_get(npf_tableset_t *tset, u_int tid) 296 { 297 npf_table_t *t; 298 299 KASSERT(tset != NULL); 300 301 if ((u_int)tid >= NPF_TABLE_SLOTS) { 302 return NULL; 303 } 304 t = tset[tid]; 305 if (t != NULL) { 306 rw_enter(&t->t_lock, RW_READER); 307 } 308 return t; 309 } 310 311 /* 312 * npf_table_put: "put table back" by unlocking it. 313 */ 314 void 315 npf_table_put(npf_table_t *t) 316 { 317 318 rw_exit(&t->t_lock); 319 } 320 321 /* 322 * npf_table_check: validate ID and type. 323 * */ 324 int 325 npf_table_check(npf_tableset_t *tset, u_int tid, int type) 326 { 327 328 if ((u_int)tid >= NPF_TABLE_SLOTS) { 329 return EINVAL; 330 } 331 if (tset[tid] != NULL) { 332 return EEXIST; 333 } 334 if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) { 335 return EINVAL; 336 } 337 return 0; 338 } 339 340 /* 341 * npf_table_add_cidr: add an IPv4 or IPv6 CIDR into the table. 342 */ 343 int 344 npf_table_add_cidr(npf_tableset_t *tset, u_int tid, 345 const npf_addr_t *addr, const npf_netmask_t mask) 346 { 347 struct npf_hashl *htbl; 348 npf_tblent_t *e, *it; 349 npf_table_t *t; 350 npf_addr_t val; 351 int error = 0; 352 353 if (mask > NPF_MAX_NETMASK) { 354 return EINVAL; 355 } 356 e = pool_cache_get(tblent_cache, PR_WAITOK); 357 memcpy(&e->te_addr, addr, sizeof(npf_addr_t)); 358 e->te_mask = mask; 359 360 /* Get the table (acquire the lock). */ 361 t = npf_table_get(tset, tid); 362 if (t == NULL) { 363 pool_cache_put(tblent_cache, e); 364 return EINVAL; 365 } 366 switch (t->t_type) { 367 case NPF_TABLE_HASH: 368 /* Generate hash value from: address & mask. */ 369 npf_calculate_masked_addr(&val, addr, mask); 370 htbl = table_hash_bucket(t, &val, sizeof(npf_addr_t)); 371 /* Lookup to check for duplicates. */ 372 LIST_FOREACH(it, htbl, te_entry.hashq) { 373 if (it->te_mask != mask) { 374 continue; 375 } 376 if (!memcmp(&it->te_addr, addr, sizeof(npf_addr_t))) { 377 break; 378 } 379 } 380 /* If no duplicate - insert entry. */ 381 if (__predict_true(it == NULL)) { 382 LIST_INSERT_HEAD(htbl, e, te_entry.hashq); 383 } else { 384 error = EEXIST; 385 } 386 break; 387 case NPF_TABLE_TREE: 388 /* Insert entry. Returns false, if duplicate. */ 389 if (rb_tree_insert_node(&t->t_rbtree, e) != e) { 390 error = EEXIST; 391 } 392 break; 393 default: 394 KASSERT(false); 395 } 396 npf_table_put(t); 397 398 if (error) { 399 pool_cache_put(tblent_cache, e); 400 } 401 return error; 402 } 403 404 /* 405 * npf_table_rem_v4cidr: remove an IPv4 CIDR from the table. 406 */ 407 int 408 npf_table_rem_cidr(npf_tableset_t *tset, u_int tid, 409 const npf_addr_t *addr, const npf_netmask_t mask) 410 { 411 struct npf_hashl *htbl; 412 npf_tblent_t *e; 413 npf_table_t *t; 414 npf_addr_t val; 415 int error; 416 417 if (mask > NPF_MAX_NETMASK) { 418 return EINVAL; 419 } 420 421 /* Get the table (acquire the lock). */ 422 t = npf_table_get(tset, tid); 423 if (__predict_false(t == NULL)) { 424 return EINVAL; 425 } 426 e = NULL; 427 428 switch (t->t_type) { 429 case NPF_TABLE_HASH: 430 /* Generate hash value from: (address & mask). */ 431 npf_calculate_masked_addr(&val, addr, mask); 432 htbl = table_hash_bucket(t, &val, sizeof(npf_addr_t)); 433 LIST_FOREACH(e, htbl, te_entry.hashq) { 434 if (e->te_mask != mask) { 435 continue; 436 } 437 if (!memcmp(&e->te_addr, addr, sizeof(npf_addr_t))) { 438 break; 439 } 440 } 441 if (__predict_true(e != NULL)) { 442 LIST_REMOVE(e, te_entry.hashq); 443 } else { 444 error = ESRCH; 445 } 446 break; 447 case NPF_TABLE_TREE: 448 /* Key: (address & mask). */ 449 npf_calculate_masked_addr(&val, addr, mask); 450 e = rb_tree_find_node(&t->t_rbtree, &val); 451 if (__predict_true(e != NULL)) { 452 rb_tree_remove_node(&t->t_rbtree, e); 453 } else { 454 error = ESRCH; 455 } 456 break; 457 default: 458 KASSERT(false); 459 } 460 npf_table_put(t); 461 462 if (e == NULL) { 463 return ENOENT; 464 } 465 pool_cache_put(tblent_cache, e); 466 return 0; 467 } 468 469 /* 470 * npf_table_match_addr: find the table according to ID, lookup and 471 * match the contents with specified IPv4 address. 472 */ 473 int 474 npf_table_match_addr(npf_tableset_t *tset, u_int tid, const npf_addr_t *addr) 475 { 476 struct npf_hashl *htbl; 477 npf_tblent_t *e = NULL; 478 npf_table_t *t; 479 480 /* Get the table (acquire the lock). */ 481 t = npf_table_get(tset, tid); 482 if (__predict_false(t == NULL)) { 483 return EINVAL; 484 } 485 switch (t->t_type) { 486 case NPF_TABLE_HASH: 487 htbl = table_hash_bucket(t, addr, sizeof(npf_addr_t)); 488 LIST_FOREACH(e, htbl, te_entry.hashq) { 489 if (npf_compare_cidr(addr, e->te_mask, &e->te_addr, 490 NPF_NO_NETMASK) == 0) 491 break; 492 } 493 break; 494 case NPF_TABLE_TREE: 495 e = rb_tree_find_node(&t->t_rbtree, addr); 496 KASSERT(e && npf_compare_cidr(addr, e->te_mask, &e->te_addr, 497 NPF_NO_NETMASK) == 0); 498 break; 499 default: 500 KASSERT(false); 501 } 502 npf_table_put(t); 503 504 return e ? 0 : ENOENT; 505 } 506