1 /* $NetBSD: npf_tableset.c,v 1.22 2014/08/11 01:54:12 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2014 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * NPF tableset module. 34 * 35 * Notes 36 * 37 * The tableset is an array of tables. After the creation, the array 38 * is immutable. The caller is responsible to synchronise the access 39 * to the tableset. The table can either be a hash or a tree. Its 40 * entries are protected by a read-write lock. 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.22 2014/08/11 01:54:12 rmind Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/types.h> 48 49 #include <sys/atomic.h> 50 #include <sys/hash.h> 51 #include <sys/cdbr.h> 52 #include <sys/kmem.h> 53 #include <sys/malloc.h> 54 #include <sys/pool.h> 55 #include <sys/queue.h> 56 #include <sys/rwlock.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 60 #include "npf_impl.h" 61 62 typedef struct npf_tblent { 63 union { 64 LIST_ENTRY(npf_tblent) te_hashent; 65 pt_node_t te_node; 66 } /* C11 */; 67 int te_alen; 68 npf_addr_t te_addr; 69 } npf_tblent_t; 70 71 LIST_HEAD(npf_hashl, npf_tblent); 72 73 struct npf_table { 74 /* 75 * The storage type can be: a) hash b) tree c) cdb. 76 * There are separate trees for IPv4 and IPv6. 77 */ 78 union { 79 struct { 80 struct npf_hashl *t_hashl; 81 u_long t_hashmask; 82 }; 83 struct { 84 pt_tree_t t_tree[2]; 85 }; 86 struct { 87 void * t_blob; 88 size_t t_bsize; 89 struct cdbr * t_cdb; 90 }; 91 } /* C11 */; 92 93 /* 94 * Table ID, type and lock. The ID may change during the 95 * config reload, it is protected by the npf_config_lock. 96 */ 97 int t_type; 98 u_int t_id; 99 krwlock_t t_lock; 100 101 /* The number of items, reference count and table name. */ 102 u_int t_nitems; 103 u_int t_refcnt; 104 char t_name[NPF_TABLE_MAXNAMELEN]; 105 }; 106 107 struct npf_tableset { 108 u_int ts_nitems; 109 npf_table_t * ts_map[]; 110 }; 111 112 #define NPF_TABLESET_SIZE(n) \ 113 (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *)) 114 115 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4) 116 117 static pool_cache_t tblent_cache __read_mostly; 118 119 /* 120 * npf_table_sysinit: initialise tableset structures. 121 */ 122 void 123 npf_tableset_sysinit(void) 124 { 125 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit, 126 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL); 127 } 128 129 void 130 npf_tableset_sysfini(void) 131 { 132 pool_cache_destroy(tblent_cache); 133 } 134 135 npf_tableset_t * 136 npf_tableset_create(u_int nitems) 137 { 138 npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP); 139 ts->ts_nitems = nitems; 140 return ts; 141 } 142 143 void 144 npf_tableset_destroy(npf_tableset_t *ts) 145 { 146 /* 147 * Destroy all tables (no references should be held, since the 148 * ruleset should be destroyed before). 149 */ 150 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 151 npf_table_t *t = ts->ts_map[tid]; 152 153 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) { 154 npf_table_destroy(t); 155 } 156 } 157 kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems)); 158 } 159 160 /* 161 * npf_tableset_insert: insert the table into the specified tableset. 162 * 163 * => Returns 0 on success. Fails and returns error if ID is already used. 164 */ 165 int 166 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t) 167 { 168 const u_int tid = t->t_id; 169 int error; 170 171 KASSERT((u_int)tid < ts->ts_nitems); 172 173 if (ts->ts_map[tid] == NULL) { 174 atomic_inc_uint(&t->t_refcnt); 175 ts->ts_map[tid] = t; 176 error = 0; 177 } else { 178 error = EEXIST; 179 } 180 return error; 181 } 182 183 /* 184 * npf_tableset_getbyname: look for a table in the set given the name. 185 */ 186 npf_table_t * 187 npf_tableset_getbyname(npf_tableset_t *ts, const char *name) 188 { 189 npf_table_t *t; 190 191 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 192 if ((t = ts->ts_map[tid]) == NULL) 193 continue; 194 if (strcmp(name, t->t_name) == 0) 195 return t; 196 } 197 return NULL; 198 } 199 200 npf_table_t * 201 npf_tableset_getbyid(npf_tableset_t *ts, u_int tid) 202 { 203 if (__predict_true(tid < ts->ts_nitems)) { 204 return ts->ts_map[tid]; 205 } 206 return NULL; 207 } 208 209 /* 210 * npf_tableset_reload: iterate all tables and if the new table is of the 211 * same type and has no items, then we preserve the old one and its entries. 212 * 213 * => The caller is responsible for providing synchronisation. 214 */ 215 void 216 npf_tableset_reload(npf_tableset_t *nts, npf_tableset_t *ots) 217 { 218 for (u_int tid = 0; tid < nts->ts_nitems; tid++) { 219 npf_table_t *t, *ot; 220 221 if ((t = nts->ts_map[tid]) == NULL) { 222 continue; 223 } 224 225 /* If our table has entries, just load it. */ 226 if (t->t_nitems) { 227 continue; 228 } 229 230 /* Look for a currently existing table with such name. */ 231 ot = npf_tableset_getbyname(ots, t->t_name); 232 if (ot == NULL) { 233 /* Not found: we have a new table. */ 234 continue; 235 } 236 237 /* Found. Did the type change? */ 238 if (t->t_type != ot->t_type) { 239 /* Yes, load the new. */ 240 continue; 241 } 242 243 /* 244 * Preserve the current table. Acquire a reference since 245 * we are keeping it in the old table set. Update its ID. 246 */ 247 atomic_inc_uint(&ot->t_refcnt); 248 nts->ts_map[tid] = ot; 249 250 KASSERT(npf_config_locked_p()); 251 ot->t_id = tid; 252 253 /* Destroy the new table (we hold the only reference). */ 254 t->t_refcnt--; 255 npf_table_destroy(t); 256 } 257 } 258 259 int 260 npf_tableset_export(const npf_tableset_t *ts, prop_array_t tables) 261 { 262 const npf_table_t *t; 263 264 KASSERT(npf_config_locked_p()); 265 266 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 267 if ((t = ts->ts_map[tid]) == NULL) { 268 continue; 269 } 270 prop_dictionary_t tdict = prop_dictionary_create(); 271 prop_dictionary_set_cstring(tdict, "name", t->t_name); 272 prop_dictionary_set_uint32(tdict, "type", t->t_type); 273 prop_dictionary_set_uint32(tdict, "id", tid); 274 275 prop_array_add(tables, tdict); 276 prop_object_release(tdict); 277 } 278 return 0; 279 } 280 281 /* 282 * Few helper routines. 283 */ 284 285 static npf_tblent_t * 286 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr, 287 const int alen, struct npf_hashl **rhtbl) 288 { 289 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT); 290 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask]; 291 npf_tblent_t *ent; 292 293 /* 294 * Lookup the hash table and check for duplicates. 295 * Note: mask is ignored for the hash storage. 296 */ 297 LIST_FOREACH(ent, htbl, te_hashent) { 298 if (ent->te_alen != alen) { 299 continue; 300 } 301 if (memcmp(&ent->te_addr, addr, alen) == 0) { 302 break; 303 } 304 } 305 *rhtbl = htbl; 306 return ent; 307 } 308 309 static void 310 table_hash_destroy(npf_table_t *t) 311 { 312 for (unsigned n = 0; n <= t->t_hashmask; n++) { 313 npf_tblent_t *ent; 314 315 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) { 316 LIST_REMOVE(ent, te_hashent); 317 pool_cache_put(tblent_cache, ent); 318 } 319 } 320 } 321 322 static void 323 table_tree_destroy(pt_tree_t *tree) 324 { 325 npf_tblent_t *ent; 326 327 while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) { 328 ptree_remove_node(tree, ent); 329 pool_cache_put(tblent_cache, ent); 330 } 331 } 332 333 /* 334 * npf_table_create: create table with a specified ID. 335 */ 336 npf_table_t * 337 npf_table_create(const char *name, u_int tid, int type, 338 void *blob, size_t size) 339 { 340 npf_table_t *t; 341 342 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP); 343 strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN); 344 345 switch (type) { 346 case NPF_TABLE_TREE: 347 ptree_init(&t->t_tree[0], &npf_table_ptree_ops, 348 (void *)(sizeof(struct in_addr) / sizeof(uint32_t)), 349 offsetof(npf_tblent_t, te_node), 350 offsetof(npf_tblent_t, te_addr)); 351 ptree_init(&t->t_tree[1], &npf_table_ptree_ops, 352 (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)), 353 offsetof(npf_tblent_t, te_node), 354 offsetof(npf_tblent_t, te_addr)); 355 break; 356 case NPF_TABLE_HASH: 357 t->t_hashl = hashinit(1024, HASH_LIST, true, &t->t_hashmask); 358 if (t->t_hashl == NULL) { 359 kmem_free(t, sizeof(npf_table_t)); 360 return NULL; 361 } 362 break; 363 case NPF_TABLE_CDB: 364 t->t_blob = blob; 365 t->t_bsize = size; 366 t->t_cdb = cdbr_open_mem(blob, size, CDBR_DEFAULT, NULL, NULL); 367 if (t->t_cdb == NULL) { 368 kmem_free(t, sizeof(npf_table_t)); 369 free(blob, M_TEMP); 370 return NULL; 371 } 372 t->t_nitems = cdbr_entries(t->t_cdb); 373 break; 374 default: 375 KASSERT(false); 376 } 377 rw_init(&t->t_lock); 378 t->t_type = type; 379 t->t_id = tid; 380 381 return t; 382 } 383 384 /* 385 * npf_table_destroy: free all table entries and table itself. 386 */ 387 void 388 npf_table_destroy(npf_table_t *t) 389 { 390 KASSERT(t->t_refcnt == 0); 391 392 switch (t->t_type) { 393 case NPF_TABLE_HASH: 394 table_hash_destroy(t); 395 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask); 396 break; 397 case NPF_TABLE_TREE: 398 table_tree_destroy(&t->t_tree[0]); 399 table_tree_destroy(&t->t_tree[1]); 400 break; 401 case NPF_TABLE_CDB: 402 cdbr_close(t->t_cdb); 403 free(t->t_blob, M_TEMP); 404 break; 405 default: 406 KASSERT(false); 407 } 408 rw_destroy(&t->t_lock); 409 kmem_free(t, sizeof(npf_table_t)); 410 } 411 412 /* 413 * npf_table_check: validate the name, ID and type. 414 */ 415 int 416 npf_table_check(npf_tableset_t *ts, const char *name, u_int tid, int type) 417 { 418 if ((u_int)tid >= ts->ts_nitems) { 419 return EINVAL; 420 } 421 if (ts->ts_map[tid] != NULL) { 422 return EEXIST; 423 } 424 switch (type) { 425 case NPF_TABLE_TREE: 426 case NPF_TABLE_HASH: 427 case NPF_TABLE_CDB: 428 break; 429 default: 430 return EINVAL; 431 } 432 if (strlen(name) >= NPF_TABLE_MAXNAMELEN) { 433 return ENAMETOOLONG; 434 } 435 if (npf_tableset_getbyname(ts, name)) { 436 return EEXIST; 437 } 438 return 0; 439 } 440 441 static int 442 table_cidr_check(const u_int aidx, const npf_addr_t *addr, 443 const npf_netmask_t mask) 444 { 445 if (aidx > 1) { 446 return EINVAL; 447 } 448 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) { 449 return EINVAL; 450 } 451 452 /* 453 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128. 454 * If it is a host - shall use NPF_NO_NETMASK. 455 */ 456 if (mask >= (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) { 457 return EINVAL; 458 } 459 return 0; 460 } 461 462 /* 463 * npf_table_insert: add an IP CIDR entry into the table. 464 */ 465 int 466 npf_table_insert(npf_table_t *t, const int alen, 467 const npf_addr_t *addr, const npf_netmask_t mask) 468 { 469 const u_int aidx = NPF_ADDRLEN2TREE(alen); 470 npf_tblent_t *ent; 471 int error; 472 473 error = table_cidr_check(aidx, addr, mask); 474 if (error) { 475 return error; 476 } 477 ent = pool_cache_get(tblent_cache, PR_WAITOK); 478 memcpy(&ent->te_addr, addr, alen); 479 ent->te_alen = alen; 480 481 /* 482 * Insert the entry. Return an error on duplicate. 483 */ 484 rw_enter(&t->t_lock, RW_WRITER); 485 switch (t->t_type) { 486 case NPF_TABLE_HASH: { 487 struct npf_hashl *htbl; 488 489 /* 490 * Hash tables by the concept support only IPs. 491 */ 492 if (mask != NPF_NO_NETMASK) { 493 error = EINVAL; 494 break; 495 } 496 if (!table_hash_lookup(t, addr, alen, &htbl)) { 497 LIST_INSERT_HEAD(htbl, ent, te_hashent); 498 t->t_nitems++; 499 } else { 500 error = EEXIST; 501 } 502 break; 503 } 504 case NPF_TABLE_TREE: { 505 pt_tree_t *tree = &t->t_tree[aidx]; 506 bool ok; 507 508 /* 509 * If no mask specified, use maximum mask. 510 */ 511 ok = (mask != NPF_NO_NETMASK) ? 512 ptree_insert_mask_node(tree, ent, mask) : 513 ptree_insert_node(tree, ent); 514 if (ok) { 515 t->t_nitems++; 516 error = 0; 517 } else { 518 error = EEXIST; 519 } 520 break; 521 } 522 case NPF_TABLE_CDB: 523 error = EINVAL; 524 break; 525 default: 526 KASSERT(false); 527 } 528 rw_exit(&t->t_lock); 529 530 if (error) { 531 pool_cache_put(tblent_cache, ent); 532 } 533 return error; 534 } 535 536 /* 537 * npf_table_remove: remove the IP CIDR entry from the table. 538 */ 539 int 540 npf_table_remove(npf_table_t *t, const int alen, 541 const npf_addr_t *addr, const npf_netmask_t mask) 542 { 543 const u_int aidx = NPF_ADDRLEN2TREE(alen); 544 npf_tblent_t *ent = NULL; 545 int error = ENOENT; 546 547 error = table_cidr_check(aidx, addr, mask); 548 if (error) { 549 return error; 550 } 551 552 rw_enter(&t->t_lock, RW_WRITER); 553 switch (t->t_type) { 554 case NPF_TABLE_HASH: { 555 struct npf_hashl *htbl; 556 557 ent = table_hash_lookup(t, addr, alen, &htbl); 558 if (__predict_true(ent != NULL)) { 559 LIST_REMOVE(ent, te_hashent); 560 t->t_nitems--; 561 } 562 break; 563 } 564 case NPF_TABLE_TREE: { 565 pt_tree_t *tree = &t->t_tree[aidx]; 566 567 ent = ptree_find_node(tree, addr); 568 if (__predict_true(ent != NULL)) { 569 ptree_remove_node(tree, ent); 570 t->t_nitems--; 571 } 572 break; 573 } 574 case NPF_TABLE_CDB: 575 error = EINVAL; 576 break; 577 default: 578 KASSERT(false); 579 ent = NULL; 580 } 581 rw_exit(&t->t_lock); 582 583 if (ent) { 584 pool_cache_put(tblent_cache, ent); 585 } 586 return error; 587 } 588 589 /* 590 * npf_table_lookup: find the table according to ID, lookup and match 591 * the contents with the specified IP address. 592 */ 593 int 594 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr) 595 { 596 const u_int aidx = NPF_ADDRLEN2TREE(alen); 597 struct npf_hashl *htbl; 598 const void *data; 599 size_t dlen; 600 bool found; 601 602 if (__predict_false(aidx > 1)) { 603 return EINVAL; 604 } 605 606 switch (t->t_type) { 607 case NPF_TABLE_HASH: 608 rw_enter(&t->t_lock, RW_READER); 609 found = table_hash_lookup(t, addr, alen, &htbl) != NULL; 610 rw_exit(&t->t_lock); 611 break; 612 case NPF_TABLE_TREE: 613 rw_enter(&t->t_lock, RW_READER); 614 found = ptree_find_node(&t->t_tree[aidx], addr) != NULL; 615 rw_exit(&t->t_lock); 616 break; 617 case NPF_TABLE_CDB: 618 if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) { 619 found = dlen == alen && memcmp(addr, data, dlen) == 0; 620 } else { 621 found = false; 622 } 623 break; 624 default: 625 KASSERT(false); 626 found = false; 627 } 628 629 return found ? 0 : ENOENT; 630 } 631 632 static int 633 table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask, 634 void *ubuf, size_t len, size_t *off) 635 { 636 void *ubufp = (uint8_t *)ubuf + *off; 637 npf_ioctl_ent_t uent; 638 639 if ((*off += sizeof(npf_ioctl_ent_t)) > len) { 640 return ENOMEM; 641 } 642 uent.alen = alen; 643 memcpy(&uent.addr, addr, sizeof(npf_addr_t)); 644 uent.mask = mask; 645 646 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t)); 647 } 648 649 static int 650 table_hash_list(const npf_table_t *t, void *ubuf, size_t len) 651 { 652 size_t off = 0; 653 int error = 0; 654 655 for (unsigned n = 0; n <= t->t_hashmask; n++) { 656 npf_tblent_t *ent; 657 658 LIST_FOREACH(ent, &t->t_hashl[n], te_hashent) { 659 error = table_ent_copyout(&ent->te_addr, 660 ent->te_alen, 0, ubuf, len, &off); 661 if (error) 662 break; 663 } 664 } 665 return error; 666 } 667 668 static int 669 table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf, 670 size_t len, size_t *off) 671 { 672 npf_tblent_t *ent = NULL; 673 int error = 0; 674 675 while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) { 676 pt_bitlen_t blen; 677 678 if (!ptree_mask_node_p(tree, ent, &blen)) { 679 blen = maxmask; 680 } 681 error = table_ent_copyout(&ent->te_addr, ent->te_alen, 682 blen, ubuf, len, off); 683 if (error) 684 break; 685 } 686 return error; 687 } 688 689 static int 690 table_cdb_list(npf_table_t *t, void *ubuf, size_t len) 691 { 692 size_t off = 0, dlen; 693 const void *data; 694 int error = 0; 695 696 for (size_t i = 0; i < t->t_nitems; i++) { 697 if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) { 698 return EINVAL; 699 } 700 error = table_ent_copyout(data, dlen, 0, ubuf, len, &off); 701 if (error) 702 break; 703 } 704 return error; 705 } 706 707 /* 708 * npf_table_list: copy a list of all table entries into a userspace buffer. 709 */ 710 int 711 npf_table_list(npf_table_t *t, void *ubuf, size_t len) 712 { 713 size_t off = 0; 714 int error = 0; 715 716 rw_enter(&t->t_lock, RW_READER); 717 switch (t->t_type) { 718 case NPF_TABLE_HASH: 719 error = table_hash_list(t, ubuf, len); 720 break; 721 case NPF_TABLE_TREE: 722 error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off); 723 if (error) 724 break; 725 error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off); 726 break; 727 case NPF_TABLE_CDB: 728 error = table_cdb_list(t, ubuf, len); 729 break; 730 default: 731 KASSERT(false); 732 } 733 rw_exit(&t->t_lock); 734 735 return error; 736 } 737 738 /* 739 * npf_table_flush: remove all table entries. 740 */ 741 int 742 npf_table_flush(npf_table_t *t) 743 { 744 int error = 0; 745 746 rw_enter(&t->t_lock, RW_WRITER); 747 switch (t->t_type) { 748 case NPF_TABLE_HASH: 749 table_hash_destroy(t); 750 t->t_nitems = 0; 751 break; 752 case NPF_TABLE_TREE: 753 table_tree_destroy(&t->t_tree[0]); 754 table_tree_destroy(&t->t_tree[1]); 755 t->t_nitems = 0; 756 break; 757 case NPF_TABLE_CDB: 758 error = EINVAL; 759 break; 760 default: 761 KASSERT(false); 762 } 763 rw_exit(&t->t_lock); 764 return error; 765 } 766