1 /* $NetBSD: npf_tableset.c,v 1.18 2013/05/19 20:45:34 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2012 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * NPF tableset module. 34 * 35 * Notes 36 * 37 * The tableset is an array of tables. After the creation, the array 38 * is immutable. The caller is responsible to synchronise the access 39 * to the tableset. The table can either be a hash or a tree. Its 40 * entries are protected by a read-write lock. 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.18 2013/05/19 20:45:34 rmind Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/types.h> 48 49 #include <sys/atomic.h> 50 #include <sys/hash.h> 51 #include <sys/kmem.h> 52 #include <sys/pool.h> 53 #include <sys/queue.h> 54 #include <sys/rwlock.h> 55 #include <sys/systm.h> 56 #include <sys/types.h> 57 58 #include "npf_impl.h" 59 60 /* 61 * Table structures. 62 */ 63 64 typedef struct npf_tblent { 65 union { 66 LIST_ENTRY(npf_tblent) hashq; 67 pt_node_t node; 68 } te_entry; 69 int te_alen; 70 npf_addr_t te_addr; 71 } npf_tblent_t; 72 73 LIST_HEAD(npf_hashl, npf_tblent); 74 75 struct npf_table { 76 char t_name[16]; 77 /* Lock and reference count. */ 78 krwlock_t t_lock; 79 u_int t_refcnt; 80 /* Total number of items. */ 81 u_int t_nitems; 82 /* Table ID. */ 83 u_int t_id; 84 /* The storage type can be: a) hash b) tree. */ 85 int t_type; 86 struct npf_hashl * t_hashl; 87 u_long t_hashmask; 88 /* Separate trees for IPv4 and IPv6. */ 89 pt_tree_t t_tree[2]; 90 }; 91 92 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4) 93 94 static pool_cache_t tblent_cache __read_mostly; 95 96 /* 97 * npf_table_sysinit: initialise tableset structures. 98 */ 99 void 100 npf_tableset_sysinit(void) 101 { 102 103 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit, 104 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL); 105 } 106 107 void 108 npf_tableset_sysfini(void) 109 { 110 111 pool_cache_destroy(tblent_cache); 112 } 113 114 npf_tableset_t * 115 npf_tableset_create(void) 116 { 117 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *); 118 119 return kmem_zalloc(sz, KM_SLEEP); 120 } 121 122 void 123 npf_tableset_destroy(npf_tableset_t *tblset) 124 { 125 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *); 126 npf_table_t *t; 127 u_int tid; 128 129 /* 130 * Destroy all tables (no references should be held, as ruleset 131 * should be destroyed before). 132 */ 133 for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) { 134 t = tblset[tid]; 135 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) { 136 npf_table_destroy(t); 137 } 138 } 139 kmem_free(tblset, sz); 140 } 141 142 /* 143 * npf_tableset_insert: insert the table into the specified tableset. 144 * 145 * => Returns 0 on success. Fails and returns error if ID is already used. 146 */ 147 int 148 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t) 149 { 150 const u_int tid = t->t_id; 151 int error; 152 153 KASSERT((u_int)tid < NPF_TABLE_SLOTS); 154 155 if (tblset[tid] == NULL) { 156 atomic_inc_uint(&t->t_refcnt); 157 tblset[tid] = t; 158 error = 0; 159 } else { 160 error = EEXIST; 161 } 162 return error; 163 } 164 165 /* 166 * npf_tableset_reload: iterate all tables and if the new table is of the 167 * same type and has no items, then we preserve the old one and its entries. 168 * 169 * => The caller is responsible for providing synchronisation. 170 */ 171 void 172 npf_tableset_reload(npf_tableset_t *ntset, npf_tableset_t *otset) 173 { 174 for (int i = 0; i < NPF_TABLE_SLOTS; i++) { 175 npf_table_t *t = ntset[i], *ot = otset[i]; 176 177 if (t == NULL || ot == NULL) { 178 continue; 179 } 180 if (t->t_nitems || t->t_type != ot->t_type) { 181 continue; 182 } 183 184 /* 185 * Acquire a reference since the table has to be kept 186 * in the old tableset. 187 */ 188 atomic_inc_uint(&ot->t_refcnt); 189 ntset[i] = ot; 190 191 /* Only reference, never been visible. */ 192 t->t_refcnt--; 193 npf_table_destroy(t); 194 } 195 } 196 197 /* 198 * Few helper routines. 199 */ 200 201 static npf_tblent_t * 202 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr, 203 const int alen, struct npf_hashl **rhtbl) 204 { 205 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT); 206 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask]; 207 npf_tblent_t *ent; 208 209 /* 210 * Lookup the hash table and check for duplicates. 211 * Note: mask is ignored for the hash storage. 212 */ 213 LIST_FOREACH(ent, htbl, te_entry.hashq) { 214 if (ent->te_alen != alen) { 215 continue; 216 } 217 if (memcmp(&ent->te_addr, addr, alen) == 0) { 218 break; 219 } 220 } 221 *rhtbl = htbl; 222 return ent; 223 } 224 225 static void 226 table_hash_destroy(npf_table_t *t) 227 { 228 for (unsigned n = 0; n <= t->t_hashmask; n++) { 229 npf_tblent_t *ent; 230 231 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) { 232 LIST_REMOVE(ent, te_entry.hashq); 233 pool_cache_put(tblent_cache, ent); 234 } 235 } 236 } 237 238 static void 239 table_tree_destroy(pt_tree_t *tree) 240 { 241 npf_tblent_t *ent; 242 243 while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) { 244 ptree_remove_node(tree, ent); 245 pool_cache_put(tblent_cache, ent); 246 } 247 } 248 249 /* 250 * npf_table_create: create table with a specified ID. 251 */ 252 npf_table_t * 253 npf_table_create(u_int tid, int type, size_t hsize) 254 { 255 npf_table_t *t; 256 257 KASSERT((u_int)tid < NPF_TABLE_SLOTS); 258 259 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP); 260 switch (type) { 261 case NPF_TABLE_TREE: 262 ptree_init(&t->t_tree[0], &npf_table_ptree_ops, 263 (void *)(sizeof(struct in_addr) / sizeof(uint32_t)), 264 offsetof(npf_tblent_t, te_entry.node), 265 offsetof(npf_tblent_t, te_addr)); 266 ptree_init(&t->t_tree[1], &npf_table_ptree_ops, 267 (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)), 268 offsetof(npf_tblent_t, te_entry.node), 269 offsetof(npf_tblent_t, te_addr)); 270 break; 271 case NPF_TABLE_HASH: 272 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask); 273 if (t->t_hashl == NULL) { 274 kmem_free(t, sizeof(npf_table_t)); 275 return NULL; 276 } 277 break; 278 default: 279 KASSERT(false); 280 } 281 rw_init(&t->t_lock); 282 t->t_type = type; 283 t->t_id = tid; 284 285 return t; 286 } 287 288 /* 289 * npf_table_destroy: free all table entries and table itself. 290 */ 291 void 292 npf_table_destroy(npf_table_t *t) 293 { 294 KASSERT(t->t_refcnt == 0); 295 296 switch (t->t_type) { 297 case NPF_TABLE_HASH: 298 table_hash_destroy(t); 299 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask); 300 break; 301 case NPF_TABLE_TREE: 302 table_tree_destroy(&t->t_tree[0]); 303 table_tree_destroy(&t->t_tree[1]); 304 break; 305 default: 306 KASSERT(false); 307 } 308 rw_destroy(&t->t_lock); 309 kmem_free(t, sizeof(npf_table_t)); 310 } 311 312 /* 313 * npf_table_check: validate ID and type. 314 */ 315 int 316 npf_table_check(const npf_tableset_t *tset, u_int tid, int type) 317 { 318 319 if ((u_int)tid >= NPF_TABLE_SLOTS) { 320 return EINVAL; 321 } 322 if (tset[tid] != NULL) { 323 return EEXIST; 324 } 325 if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) { 326 return EINVAL; 327 } 328 return 0; 329 } 330 331 static int 332 table_cidr_check(const u_int aidx, const npf_addr_t *addr, 333 const npf_netmask_t mask) 334 { 335 336 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) { 337 return EINVAL; 338 } 339 if (aidx > 1) { 340 return EINVAL; 341 } 342 343 /* 344 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128. 345 * If it is a host - shall use NPF_NO_NETMASK. 346 */ 347 if (mask >= (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) { 348 return EINVAL; 349 } 350 return 0; 351 } 352 353 /* 354 * npf_table_insert: add an IP CIDR entry into the table. 355 */ 356 int 357 npf_table_insert(npf_tableset_t *tset, u_int tid, const int alen, 358 const npf_addr_t *addr, const npf_netmask_t mask) 359 { 360 const u_int aidx = NPF_ADDRLEN2TREE(alen); 361 npf_tblent_t *ent; 362 npf_table_t *t; 363 int error; 364 365 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) { 366 return EINVAL; 367 } 368 369 error = table_cidr_check(aidx, addr, mask); 370 if (error) { 371 return error; 372 } 373 ent = pool_cache_get(tblent_cache, PR_WAITOK); 374 memcpy(&ent->te_addr, addr, alen); 375 ent->te_alen = alen; 376 377 /* 378 * Insert the entry. Return an error on duplicate. 379 */ 380 rw_enter(&t->t_lock, RW_WRITER); 381 switch (t->t_type) { 382 case NPF_TABLE_HASH: { 383 struct npf_hashl *htbl; 384 385 /* 386 * Hash tables by the concept support only IPs. 387 */ 388 if (mask != NPF_NO_NETMASK) { 389 error = EINVAL; 390 break; 391 } 392 if (!table_hash_lookup(t, addr, alen, &htbl)) { 393 LIST_INSERT_HEAD(htbl, ent, te_entry.hashq); 394 t->t_nitems++; 395 } else { 396 error = EEXIST; 397 } 398 break; 399 } 400 case NPF_TABLE_TREE: { 401 pt_tree_t *tree = &t->t_tree[aidx]; 402 bool ok; 403 404 /* 405 * If no mask specified, use maximum mask. 406 */ 407 ok = (mask != NPF_NO_NETMASK) ? 408 ptree_insert_mask_node(tree, ent, mask) : 409 ptree_insert_node(tree, ent); 410 if (ok) { 411 t->t_nitems++; 412 error = 0; 413 } else { 414 error = EEXIST; 415 } 416 break; 417 } 418 default: 419 KASSERT(false); 420 } 421 rw_exit(&t->t_lock); 422 423 if (error) { 424 pool_cache_put(tblent_cache, ent); 425 } 426 return error; 427 } 428 429 /* 430 * npf_table_remove: remove the IP CIDR entry from the table. 431 */ 432 int 433 npf_table_remove(npf_tableset_t *tset, u_int tid, const int alen, 434 const npf_addr_t *addr, const npf_netmask_t mask) 435 { 436 const u_int aidx = NPF_ADDRLEN2TREE(alen); 437 npf_tblent_t *ent; 438 npf_table_t *t; 439 int error; 440 441 error = table_cidr_check(aidx, addr, mask); 442 if (error) { 443 return error; 444 } 445 446 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) { 447 return EINVAL; 448 } 449 450 rw_enter(&t->t_lock, RW_WRITER); 451 switch (t->t_type) { 452 case NPF_TABLE_HASH: { 453 struct npf_hashl *htbl; 454 455 ent = table_hash_lookup(t, addr, alen, &htbl); 456 if (__predict_true(ent != NULL)) { 457 LIST_REMOVE(ent, te_entry.hashq); 458 t->t_nitems--; 459 } 460 break; 461 } 462 case NPF_TABLE_TREE: { 463 pt_tree_t *tree = &t->t_tree[aidx]; 464 465 ent = ptree_find_node(tree, addr); 466 if (__predict_true(ent != NULL)) { 467 ptree_remove_node(tree, ent); 468 t->t_nitems--; 469 } 470 break; 471 } 472 default: 473 KASSERT(false); 474 ent = NULL; 475 } 476 rw_exit(&t->t_lock); 477 478 if (ent == NULL) { 479 return ENOENT; 480 } 481 pool_cache_put(tblent_cache, ent); 482 return 0; 483 } 484 485 /* 486 * npf_table_lookup: find the table according to ID, lookup and match 487 * the contents with the specified IP address. 488 */ 489 int 490 npf_table_lookup(npf_tableset_t *tset, u_int tid, 491 const int alen, const npf_addr_t *addr) 492 { 493 const u_int aidx = NPF_ADDRLEN2TREE(alen); 494 npf_tblent_t *ent; 495 npf_table_t *t; 496 497 if (__predict_false(aidx > 1)) { 498 return EINVAL; 499 } 500 501 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) { 502 return EINVAL; 503 } 504 505 rw_enter(&t->t_lock, RW_READER); 506 switch (t->t_type) { 507 case NPF_TABLE_HASH: { 508 struct npf_hashl *htbl; 509 ent = table_hash_lookup(t, addr, alen, &htbl); 510 break; 511 } 512 case NPF_TABLE_TREE: { 513 ent = ptree_find_node(&t->t_tree[aidx], addr); 514 break; 515 } 516 default: 517 KASSERT(false); 518 ent = NULL; 519 } 520 rw_exit(&t->t_lock); 521 522 return ent ? 0 : ENOENT; 523 } 524 525 static int 526 table_ent_copyout(npf_tblent_t *ent, npf_netmask_t mask, 527 void *ubuf, size_t len, size_t *off) 528 { 529 void *ubufp = (uint8_t *)ubuf + *off; 530 npf_ioctl_ent_t uent; 531 532 if ((*off += sizeof(npf_ioctl_ent_t)) > len) { 533 return ENOMEM; 534 } 535 uent.alen = ent->te_alen; 536 memcpy(&uent.addr, &ent->te_addr, sizeof(npf_addr_t)); 537 uent.mask = mask; 538 539 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t)); 540 } 541 542 static int 543 table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf, 544 size_t len, size_t *off) 545 { 546 npf_tblent_t *ent = NULL; 547 int error = 0; 548 549 while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) { 550 pt_bitlen_t blen; 551 552 if (!ptree_mask_node_p(tree, ent, &blen)) { 553 blen = maxmask; 554 } 555 error = table_ent_copyout(ent, blen, ubuf, len, off); 556 if (error) 557 break; 558 } 559 return error; 560 } 561 562 /* 563 * npf_table_list: copy a list of all table entries into a userspace buffer. 564 */ 565 int 566 npf_table_list(npf_tableset_t *tset, u_int tid, void *ubuf, size_t len) 567 { 568 npf_table_t *t; 569 size_t off = 0; 570 int error = 0; 571 572 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) { 573 return EINVAL; 574 } 575 576 rw_enter(&t->t_lock, RW_READER); 577 switch (t->t_type) { 578 case NPF_TABLE_HASH: 579 for (unsigned n = 0; n <= t->t_hashmask; n++) { 580 npf_tblent_t *ent; 581 582 LIST_FOREACH(ent, &t->t_hashl[n], te_entry.hashq) 583 if ((error = table_ent_copyout(ent, 0, ubuf, 584 len, &off)) != 0) 585 break; 586 } 587 break; 588 case NPF_TABLE_TREE: 589 error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off); 590 if (error) 591 break; 592 error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off); 593 break; 594 default: 595 KASSERT(false); 596 } 597 rw_exit(&t->t_lock); 598 599 return error; 600 } 601 602 /* 603 * npf_table_flush: remove all table entries. 604 */ 605 int 606 npf_table_flush(npf_tableset_t *tset, u_int tid) 607 { 608 npf_table_t *t; 609 610 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) { 611 return EINVAL; 612 } 613 614 rw_enter(&t->t_lock, RW_WRITER); 615 switch (t->t_type) { 616 case NPF_TABLE_HASH: 617 table_hash_destroy(t); 618 t->t_nitems = 0; 619 break; 620 case NPF_TABLE_TREE: 621 table_tree_destroy(&t->t_tree[0]); 622 table_tree_destroy(&t->t_tree[1]); 623 t->t_nitems = 0; 624 break; 625 default: 626 KASSERT(false); 627 } 628 rw_exit(&t->t_lock); 629 630 return 0; 631 } 632