1 /* $NetBSD: npf_tableset.c,v 1.24 2016/12/09 02:40:38 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2016 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * NPF tableset module. 34 * 35 * Notes 36 * 37 * The tableset is an array of tables. After the creation, the array 38 * is immutable. The caller is responsible to synchronise the access 39 * to the tableset. The table can either be a hash or a tree. Its 40 * entries are protected by a read-write lock. 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.24 2016/12/09 02:40:38 christos Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/types.h> 48 49 #include <sys/atomic.h> 50 #include <sys/hash.h> 51 #include <sys/cdbr.h> 52 #include <sys/kmem.h> 53 #include <sys/malloc.h> 54 #include <sys/pool.h> 55 #include <sys/queue.h> 56 #include <sys/rwlock.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 60 #include "npf_impl.h" 61 #include "lpm.h" 62 63 typedef struct npf_tblent { 64 LIST_ENTRY(npf_tblent) te_listent; 65 uint16_t te_preflen; 66 uint16_t te_alen; 67 npf_addr_t te_addr; 68 } npf_tblent_t; 69 70 LIST_HEAD(npf_hashl, npf_tblent); 71 72 struct npf_table { 73 /* 74 * The storage type can be: a) hash b) tree c) cdb. 75 * There are separate trees for IPv4 and IPv6. 76 */ 77 union { 78 struct { 79 struct npf_hashl *t_hashl; 80 u_long t_hashmask; 81 }; 82 struct { 83 lpm_t * t_lpm; 84 LIST_HEAD(, npf_tblent) t_list; 85 }; 86 struct { 87 void * t_blob; 88 size_t t_bsize; 89 struct cdbr * t_cdb; 90 }; 91 } /* C11 */; 92 93 /* 94 * Table ID, type and lock. The ID may change during the 95 * config reload, it is protected by the npf_config_lock. 96 */ 97 int t_type; 98 u_int t_id; 99 krwlock_t t_lock; 100 101 /* The number of items, reference count and table name. */ 102 u_int t_nitems; 103 u_int t_refcnt; 104 char t_name[NPF_TABLE_MAXNAMELEN]; 105 }; 106 107 struct npf_tableset { 108 u_int ts_nitems; 109 npf_table_t * ts_map[]; 110 }; 111 112 #define NPF_TABLESET_SIZE(n) \ 113 (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *)) 114 115 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4) 116 117 static pool_cache_t tblent_cache __read_mostly; 118 119 /* 120 * npf_table_sysinit: initialise tableset structures. 121 */ 122 void 123 npf_tableset_sysinit(void) 124 { 125 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit, 126 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL); 127 } 128 129 void 130 npf_tableset_sysfini(void) 131 { 132 pool_cache_destroy(tblent_cache); 133 } 134 135 npf_tableset_t * 136 npf_tableset_create(u_int nitems) 137 { 138 npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP); 139 ts->ts_nitems = nitems; 140 return ts; 141 } 142 143 void 144 npf_tableset_destroy(npf_tableset_t *ts) 145 { 146 /* 147 * Destroy all tables (no references should be held, since the 148 * ruleset should be destroyed before). 149 */ 150 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 151 npf_table_t *t = ts->ts_map[tid]; 152 153 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) { 154 npf_table_destroy(t); 155 } 156 } 157 kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems)); 158 } 159 160 /* 161 * npf_tableset_insert: insert the table into the specified tableset. 162 * 163 * => Returns 0 on success. Fails and returns error if ID is already used. 164 */ 165 int 166 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t) 167 { 168 const u_int tid = t->t_id; 169 int error; 170 171 KASSERT((u_int)tid < ts->ts_nitems); 172 173 if (ts->ts_map[tid] == NULL) { 174 atomic_inc_uint(&t->t_refcnt); 175 ts->ts_map[tid] = t; 176 error = 0; 177 } else { 178 error = EEXIST; 179 } 180 return error; 181 } 182 183 /* 184 * npf_tableset_getbyname: look for a table in the set given the name. 185 */ 186 npf_table_t * 187 npf_tableset_getbyname(npf_tableset_t *ts, const char *name) 188 { 189 npf_table_t *t; 190 191 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 192 if ((t = ts->ts_map[tid]) == NULL) 193 continue; 194 if (strcmp(name, t->t_name) == 0) 195 return t; 196 } 197 return NULL; 198 } 199 200 npf_table_t * 201 npf_tableset_getbyid(npf_tableset_t *ts, u_int tid) 202 { 203 if (__predict_true(tid < ts->ts_nitems)) { 204 return ts->ts_map[tid]; 205 } 206 return NULL; 207 } 208 209 /* 210 * npf_tableset_reload: iterate all tables and if the new table is of the 211 * same type and has no items, then we preserve the old one and its entries. 212 * 213 * => The caller is responsible for providing synchronisation. 214 */ 215 void 216 npf_tableset_reload(npf_tableset_t *nts, npf_tableset_t *ots) 217 { 218 for (u_int tid = 0; tid < nts->ts_nitems; tid++) { 219 npf_table_t *t, *ot; 220 221 if ((t = nts->ts_map[tid]) == NULL) { 222 continue; 223 } 224 225 /* If our table has entries, just load it. */ 226 if (t->t_nitems) { 227 continue; 228 } 229 230 /* Look for a currently existing table with such name. */ 231 ot = npf_tableset_getbyname(ots, t->t_name); 232 if (ot == NULL) { 233 /* Not found: we have a new table. */ 234 continue; 235 } 236 237 /* Found. Did the type change? */ 238 if (t->t_type != ot->t_type) { 239 /* Yes, load the new. */ 240 continue; 241 } 242 243 /* 244 * Preserve the current table. Acquire a reference since 245 * we are keeping it in the old table set. Update its ID. 246 */ 247 atomic_inc_uint(&ot->t_refcnt); 248 nts->ts_map[tid] = ot; 249 250 KASSERT(npf_config_locked_p()); 251 ot->t_id = tid; 252 253 /* Destroy the new table (we hold the only reference). */ 254 t->t_refcnt--; 255 npf_table_destroy(t); 256 } 257 } 258 259 int 260 npf_tableset_export(const npf_tableset_t *ts, prop_array_t tables) 261 { 262 const npf_table_t *t; 263 264 KASSERT(npf_config_locked_p()); 265 266 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 267 if ((t = ts->ts_map[tid]) == NULL) { 268 continue; 269 } 270 prop_dictionary_t tdict = prop_dictionary_create(); 271 prop_dictionary_set_cstring(tdict, "name", t->t_name); 272 prop_dictionary_set_uint32(tdict, "type", t->t_type); 273 prop_dictionary_set_uint32(tdict, "id", tid); 274 275 prop_array_add(tables, tdict); 276 prop_object_release(tdict); 277 } 278 return 0; 279 } 280 281 /* 282 * Few helper routines. 283 */ 284 285 static npf_tblent_t * 286 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr, 287 const int alen, struct npf_hashl **rhtbl) 288 { 289 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT); 290 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask]; 291 npf_tblent_t *ent; 292 293 /* 294 * Lookup the hash table and check for duplicates. 295 * Note: mask is ignored for the hash storage. 296 */ 297 LIST_FOREACH(ent, htbl, te_listent) { 298 if (ent->te_alen != alen) { 299 continue; 300 } 301 if (memcmp(&ent->te_addr, addr, alen) == 0) { 302 break; 303 } 304 } 305 *rhtbl = htbl; 306 return ent; 307 } 308 309 static void 310 table_hash_flush(npf_table_t *t) 311 { 312 for (unsigned n = 0; n <= t->t_hashmask; n++) { 313 npf_tblent_t *ent; 314 315 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) { 316 LIST_REMOVE(ent, te_listent); 317 pool_cache_put(tblent_cache, ent); 318 } 319 } 320 } 321 322 static void 323 table_tree_flush(npf_table_t *t) 324 { 325 npf_tblent_t *ent; 326 327 while ((ent = LIST_FIRST(&t->t_list)) != NULL) { 328 LIST_REMOVE(ent, te_listent); 329 pool_cache_put(tblent_cache, ent); 330 } 331 lpm_clear(t->t_lpm, NULL, NULL); 332 } 333 334 /* 335 * npf_table_create: create table with a specified ID. 336 */ 337 npf_table_t * 338 npf_table_create(const char *name, u_int tid, int type, 339 void *blob, size_t size) 340 { 341 npf_table_t *t; 342 343 t = kmem_zalloc(sizeof(*t), KM_SLEEP); 344 strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN); 345 346 switch (type) { 347 case NPF_TABLE_TREE: 348 if ((t->t_lpm = lpm_create()) == NULL) 349 goto out; 350 LIST_INIT(&t->t_list); 351 break; 352 case NPF_TABLE_HASH: 353 t->t_hashl = hashinit(1024, HASH_LIST, true, &t->t_hashmask); 354 if (t->t_hashl == NULL) 355 goto out; 356 break; 357 case NPF_TABLE_CDB: 358 t->t_blob = blob; 359 t->t_bsize = size; 360 t->t_cdb = cdbr_open_mem(blob, size, CDBR_DEFAULT, NULL, NULL); 361 if (t->t_cdb == NULL) { 362 free(blob, M_TEMP); 363 goto out; 364 } 365 t->t_nitems = cdbr_entries(t->t_cdb); 366 break; 367 default: 368 KASSERT(false); 369 } 370 rw_init(&t->t_lock); 371 t->t_type = type; 372 t->t_id = tid; 373 374 return t; 375 out: 376 kmem_free(t, sizeof(*t)); 377 return NULL; 378 379 } 380 381 /* 382 * npf_table_destroy: free all table entries and table itself. 383 */ 384 void 385 npf_table_destroy(npf_table_t *t) 386 { 387 KASSERT(t->t_refcnt == 0); 388 389 switch (t->t_type) { 390 case NPF_TABLE_HASH: 391 table_hash_flush(t); 392 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask); 393 break; 394 case NPF_TABLE_TREE: 395 table_tree_flush(t); 396 lpm_destroy(t->t_lpm); 397 break; 398 case NPF_TABLE_CDB: 399 cdbr_close(t->t_cdb); 400 free(t->t_blob, M_TEMP); 401 break; 402 default: 403 KASSERT(false); 404 } 405 rw_destroy(&t->t_lock); 406 kmem_free(t, sizeof(*t)); 407 } 408 409 /* 410 * npf_table_check: validate the name, ID and type. 411 */ 412 int 413 npf_table_check(npf_tableset_t *ts, const char *name, u_int tid, int type) 414 { 415 if ((u_int)tid >= ts->ts_nitems) { 416 return EINVAL; 417 } 418 if (ts->ts_map[tid] != NULL) { 419 return EEXIST; 420 } 421 switch (type) { 422 case NPF_TABLE_TREE: 423 case NPF_TABLE_HASH: 424 case NPF_TABLE_CDB: 425 break; 426 default: 427 return EINVAL; 428 } 429 if (strlen(name) >= NPF_TABLE_MAXNAMELEN) { 430 return ENAMETOOLONG; 431 } 432 if (npf_tableset_getbyname(ts, name)) { 433 return EEXIST; 434 } 435 return 0; 436 } 437 438 static int 439 table_cidr_check(const u_int aidx, const npf_addr_t *addr, 440 const npf_netmask_t mask) 441 { 442 if (aidx > 1) { 443 return EINVAL; 444 } 445 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) { 446 return EINVAL; 447 } 448 449 /* 450 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128. 451 * If it is a host - shall use NPF_NO_NETMASK. 452 */ 453 if (mask > (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) { 454 return EINVAL; 455 } 456 return 0; 457 } 458 459 /* 460 * npf_table_insert: add an IP CIDR entry into the table. 461 */ 462 int 463 npf_table_insert(npf_table_t *t, const int alen, 464 const npf_addr_t *addr, const npf_netmask_t mask) 465 { 466 const u_int aidx = NPF_ADDRLEN2TREE(alen); 467 npf_tblent_t *ent; 468 int error; 469 470 error = table_cidr_check(aidx, addr, mask); 471 if (error) { 472 return error; 473 } 474 ent = pool_cache_get(tblent_cache, PR_WAITOK); 475 memcpy(&ent->te_addr, addr, alen); 476 ent->te_alen = alen; 477 478 /* 479 * Insert the entry. Return an error on duplicate. 480 */ 481 rw_enter(&t->t_lock, RW_WRITER); 482 switch (t->t_type) { 483 case NPF_TABLE_HASH: { 484 struct npf_hashl *htbl; 485 486 /* 487 * Hash tables by the concept support only IPs. 488 */ 489 if (mask != NPF_NO_NETMASK) { 490 error = EINVAL; 491 break; 492 } 493 if (!table_hash_lookup(t, addr, alen, &htbl)) { 494 LIST_INSERT_HEAD(htbl, ent, te_listent); 495 t->t_nitems++; 496 } else { 497 error = EEXIST; 498 } 499 break; 500 } 501 case NPF_TABLE_TREE: { 502 const unsigned preflen = 503 (mask == NPF_NO_NETMASK) ? (alen * 8) : mask; 504 if (lpm_lookup(t->t_lpm, addr, alen) == NULL && 505 lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) { 506 LIST_INSERT_HEAD(&t->t_list, ent, te_listent); 507 ent->te_preflen = preflen; 508 t->t_nitems++; 509 error = 0; 510 } else { 511 error = EEXIST; 512 } 513 break; 514 } 515 case NPF_TABLE_CDB: 516 error = EINVAL; 517 break; 518 default: 519 KASSERT(false); 520 } 521 rw_exit(&t->t_lock); 522 523 if (error) { 524 pool_cache_put(tblent_cache, ent); 525 } 526 return error; 527 } 528 529 /* 530 * npf_table_remove: remove the IP CIDR entry from the table. 531 */ 532 int 533 npf_table_remove(npf_table_t *t, const int alen, 534 const npf_addr_t *addr, const npf_netmask_t mask) 535 { 536 const u_int aidx = NPF_ADDRLEN2TREE(alen); 537 npf_tblent_t *ent = NULL; 538 int error = ENOENT; 539 540 error = table_cidr_check(aidx, addr, mask); 541 if (error) { 542 return error; 543 } 544 545 rw_enter(&t->t_lock, RW_WRITER); 546 switch (t->t_type) { 547 case NPF_TABLE_HASH: { 548 struct npf_hashl *htbl; 549 550 ent = table_hash_lookup(t, addr, alen, &htbl); 551 if (__predict_true(ent != NULL)) { 552 LIST_REMOVE(ent, te_listent); 553 t->t_nitems--; 554 } 555 break; 556 } 557 case NPF_TABLE_TREE: { 558 ent = lpm_lookup(t->t_lpm, addr, alen); 559 if (__predict_true(ent != NULL)) { 560 LIST_REMOVE(ent, te_listent); 561 lpm_remove(t->t_lpm, &ent->te_addr, 562 ent->te_alen, ent->te_preflen); 563 t->t_nitems--; 564 } 565 break; 566 } 567 case NPF_TABLE_CDB: 568 error = EINVAL; 569 break; 570 default: 571 KASSERT(false); 572 ent = NULL; 573 } 574 rw_exit(&t->t_lock); 575 576 if (ent) { 577 pool_cache_put(tblent_cache, ent); 578 } 579 return error; 580 } 581 582 /* 583 * npf_table_lookup: find the table according to ID, lookup and match 584 * the contents with the specified IP address. 585 */ 586 int 587 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr) 588 { 589 const u_int aidx = NPF_ADDRLEN2TREE(alen); 590 struct npf_hashl *htbl; 591 const void *data; 592 size_t dlen; 593 bool found; 594 595 if (__predict_false(aidx > 1)) { 596 return EINVAL; 597 } 598 599 switch (t->t_type) { 600 case NPF_TABLE_HASH: 601 rw_enter(&t->t_lock, RW_READER); 602 found = table_hash_lookup(t, addr, alen, &htbl) != NULL; 603 rw_exit(&t->t_lock); 604 break; 605 case NPF_TABLE_TREE: 606 rw_enter(&t->t_lock, RW_READER); 607 found = lpm_lookup(t->t_lpm, addr, alen) != NULL; 608 rw_exit(&t->t_lock); 609 break; 610 case NPF_TABLE_CDB: 611 if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) { 612 found = dlen == alen && memcmp(addr, data, dlen) == 0; 613 } else { 614 found = false; 615 } 616 break; 617 default: 618 KASSERT(false); 619 found = false; 620 } 621 622 return found ? 0 : ENOENT; 623 } 624 625 static int 626 table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask, 627 void *ubuf, size_t len, size_t *off) 628 { 629 void *ubufp = (uint8_t *)ubuf + *off; 630 npf_ioctl_ent_t uent; 631 632 if ((*off += sizeof(npf_ioctl_ent_t)) > len) { 633 return ENOMEM; 634 } 635 uent.alen = alen; 636 memcpy(&uent.addr, addr, sizeof(npf_addr_t)); 637 uent.mask = mask; 638 639 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t)); 640 } 641 642 static int 643 table_hash_list(const npf_table_t *t, void *ubuf, size_t len) 644 { 645 size_t off = 0; 646 int error = 0; 647 648 for (unsigned n = 0; n <= t->t_hashmask; n++) { 649 npf_tblent_t *ent; 650 651 LIST_FOREACH(ent, &t->t_hashl[n], te_listent) { 652 error = table_ent_copyout(&ent->te_addr, 653 ent->te_alen, 0, ubuf, len, &off); 654 if (error) 655 break; 656 } 657 } 658 return error; 659 } 660 661 static int 662 table_tree_list(const npf_table_t *t, void *ubuf, size_t len) 663 { 664 npf_tblent_t *ent; 665 size_t off = 0; 666 int error = 0; 667 668 LIST_FOREACH(ent, &t->t_list, te_listent) { 669 error = table_ent_copyout(&ent->te_addr, 670 ent->te_alen, 0, ubuf, len, &off); 671 if (error) 672 break; 673 } 674 return error; 675 } 676 677 static int 678 table_cdb_list(npf_table_t *t, void *ubuf, size_t len) 679 { 680 size_t off = 0, dlen; 681 const void *data; 682 int error = 0; 683 684 for (size_t i = 0; i < t->t_nitems; i++) { 685 if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) { 686 return EINVAL; 687 } 688 error = table_ent_copyout(data, dlen, 0, ubuf, len, &off); 689 if (error) 690 break; 691 } 692 return error; 693 } 694 695 /* 696 * npf_table_list: copy a list of all table entries into a userspace buffer. 697 */ 698 int 699 npf_table_list(npf_table_t *t, void *ubuf, size_t len) 700 { 701 int error = 0; 702 703 rw_enter(&t->t_lock, RW_READER); 704 switch (t->t_type) { 705 case NPF_TABLE_HASH: 706 error = table_hash_list(t, ubuf, len); 707 break; 708 case NPF_TABLE_TREE: 709 error = table_tree_list(t, ubuf, len); 710 break; 711 case NPF_TABLE_CDB: 712 error = table_cdb_list(t, ubuf, len); 713 break; 714 default: 715 KASSERT(false); 716 } 717 rw_exit(&t->t_lock); 718 719 return error; 720 } 721 722 /* 723 * npf_table_flush: remove all table entries. 724 */ 725 int 726 npf_table_flush(npf_table_t *t) 727 { 728 int error = 0; 729 730 rw_enter(&t->t_lock, RW_WRITER); 731 switch (t->t_type) { 732 case NPF_TABLE_HASH: 733 table_hash_flush(t); 734 t->t_nitems = 0; 735 break; 736 case NPF_TABLE_TREE: 737 table_tree_flush(t); 738 t->t_nitems = 0; 739 break; 740 case NPF_TABLE_CDB: 741 error = EINVAL; 742 break; 743 default: 744 KASSERT(false); 745 } 746 rw_exit(&t->t_lock); 747 return error; 748 } 749