1 /* $NetBSD: npf_tableset.c,v 1.26 2017/01/02 21:49:51 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2016 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * NPF tableset module. 34 * 35 * Notes 36 * 37 * The tableset is an array of tables. After the creation, the array 38 * is immutable. The caller is responsible to synchronise the access 39 * to the tableset. The table can either be a hash or a tree. Its 40 * entries are protected by a read-write lock. 41 */ 42 43 #ifdef _KERNEL 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.26 2017/01/02 21:49:51 rmind Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/types.h> 49 50 #include <sys/atomic.h> 51 #include <sys/hash.h> 52 #include <sys/cdbr.h> 53 #include <sys/kmem.h> 54 #include <sys/malloc.h> 55 #include <sys/pool.h> 56 #include <sys/queue.h> 57 #include <sys/rwlock.h> 58 #include <sys/systm.h> 59 #include <sys/types.h> 60 61 #include "lpm.h" 62 #endif 63 64 #include "npf_impl.h" 65 66 typedef struct npf_tblent { 67 LIST_ENTRY(npf_tblent) te_listent; 68 uint16_t te_preflen; 69 uint16_t te_alen; 70 npf_addr_t te_addr; 71 } npf_tblent_t; 72 73 LIST_HEAD(npf_hashl, npf_tblent); 74 75 struct npf_table { 76 /* 77 * The storage type can be: a) hash b) tree c) cdb. 78 * There are separate trees for IPv4 and IPv6. 79 */ 80 union { 81 struct { 82 struct npf_hashl *t_hashl; 83 u_long t_hashmask; 84 }; 85 struct { 86 lpm_t * t_lpm; 87 LIST_HEAD(, npf_tblent) t_list; 88 }; 89 struct { 90 void * t_blob; 91 size_t t_bsize; 92 struct cdbr * t_cdb; 93 }; 94 } /* C11 */; 95 96 /* 97 * Table ID, type and lock. The ID may change during the 98 * config reload, it is protected by the npf_config_lock. 99 */ 100 int t_type; 101 u_int t_id; 102 krwlock_t t_lock; 103 104 /* The number of items, reference count and table name. */ 105 u_int t_nitems; 106 u_int t_refcnt; 107 char t_name[NPF_TABLE_MAXNAMELEN]; 108 }; 109 110 struct npf_tableset { 111 u_int ts_nitems; 112 npf_table_t * ts_map[]; 113 }; 114 115 #define NPF_TABLESET_SIZE(n) \ 116 (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *)) 117 118 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4) 119 120 static pool_cache_t tblent_cache __read_mostly; 121 122 /* 123 * npf_table_sysinit: initialise tableset structures. 124 */ 125 void 126 npf_tableset_sysinit(void) 127 { 128 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit, 129 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL); 130 } 131 132 void 133 npf_tableset_sysfini(void) 134 { 135 pool_cache_destroy(tblent_cache); 136 } 137 138 npf_tableset_t * 139 npf_tableset_create(u_int nitems) 140 { 141 npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP); 142 ts->ts_nitems = nitems; 143 return ts; 144 } 145 146 void 147 npf_tableset_destroy(npf_tableset_t *ts) 148 { 149 /* 150 * Destroy all tables (no references should be held, since the 151 * ruleset should be destroyed before). 152 */ 153 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 154 npf_table_t *t = ts->ts_map[tid]; 155 156 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) { 157 npf_table_destroy(t); 158 } 159 } 160 kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems)); 161 } 162 163 /* 164 * npf_tableset_insert: insert the table into the specified tableset. 165 * 166 * => Returns 0 on success. Fails and returns error if ID is already used. 167 */ 168 int 169 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t) 170 { 171 const u_int tid = t->t_id; 172 int error; 173 174 KASSERT((u_int)tid < ts->ts_nitems); 175 176 if (ts->ts_map[tid] == NULL) { 177 atomic_inc_uint(&t->t_refcnt); 178 ts->ts_map[tid] = t; 179 error = 0; 180 } else { 181 error = EEXIST; 182 } 183 return error; 184 } 185 186 npf_table_t * 187 npf_tableset_swap(npf_tableset_t *ts, npf_table_t *newt) 188 { 189 const u_int tid = newt->t_id; 190 npf_table_t *oldt = ts->ts_map[tid]; 191 192 KASSERT(tid < ts->ts_nitems); 193 KASSERT(oldt->t_id == newt->t_id); 194 195 newt->t_refcnt = oldt->t_refcnt; 196 oldt->t_refcnt = 0; 197 198 return atomic_swap_ptr(&ts->ts_map[tid], newt); 199 } 200 201 /* 202 * npf_tableset_getbyname: look for a table in the set given the name. 203 */ 204 npf_table_t * 205 npf_tableset_getbyname(npf_tableset_t *ts, const char *name) 206 { 207 npf_table_t *t; 208 209 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 210 if ((t = ts->ts_map[tid]) == NULL) 211 continue; 212 if (strcmp(name, t->t_name) == 0) 213 return t; 214 } 215 return NULL; 216 } 217 218 npf_table_t * 219 npf_tableset_getbyid(npf_tableset_t *ts, u_int tid) 220 { 221 if (__predict_true(tid < ts->ts_nitems)) { 222 return ts->ts_map[tid]; 223 } 224 return NULL; 225 } 226 227 /* 228 * npf_tableset_reload: iterate all tables and if the new table is of the 229 * same type and has no items, then we preserve the old one and its entries. 230 * 231 * => The caller is responsible for providing synchronisation. 232 */ 233 void 234 npf_tableset_reload(npf_t *npf, npf_tableset_t *nts, npf_tableset_t *ots) 235 { 236 for (u_int tid = 0; tid < nts->ts_nitems; tid++) { 237 npf_table_t *t, *ot; 238 239 if ((t = nts->ts_map[tid]) == NULL) { 240 continue; 241 } 242 243 /* If our table has entries, just load it. */ 244 if (t->t_nitems) { 245 continue; 246 } 247 248 /* Look for a currently existing table with such name. */ 249 ot = npf_tableset_getbyname(ots, t->t_name); 250 if (ot == NULL) { 251 /* Not found: we have a new table. */ 252 continue; 253 } 254 255 /* Found. Did the type change? */ 256 if (t->t_type != ot->t_type) { 257 /* Yes, load the new. */ 258 continue; 259 } 260 261 /* 262 * Preserve the current table. Acquire a reference since 263 * we are keeping it in the old table set. Update its ID. 264 */ 265 atomic_inc_uint(&ot->t_refcnt); 266 nts->ts_map[tid] = ot; 267 268 KASSERT(npf_config_locked_p(npf)); 269 ot->t_id = tid; 270 271 /* Destroy the new table (we hold the only reference). */ 272 t->t_refcnt--; 273 npf_table_destroy(t); 274 } 275 } 276 277 int 278 npf_tableset_export(npf_t *npf, const npf_tableset_t *ts, prop_array_t tables) 279 { 280 const npf_table_t *t; 281 282 KASSERT(npf_config_locked_p(npf)); 283 284 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 285 if ((t = ts->ts_map[tid]) == NULL) { 286 continue; 287 } 288 prop_dictionary_t tdict = prop_dictionary_create(); 289 prop_dictionary_set_cstring(tdict, "name", t->t_name); 290 prop_dictionary_set_uint32(tdict, "type", t->t_type); 291 prop_dictionary_set_uint32(tdict, "id", tid); 292 293 prop_array_add(tables, tdict); 294 prop_object_release(tdict); 295 } 296 return 0; 297 } 298 299 /* 300 * Few helper routines. 301 */ 302 303 static npf_tblent_t * 304 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr, 305 const int alen, struct npf_hashl **rhtbl) 306 { 307 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT); 308 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask]; 309 npf_tblent_t *ent; 310 311 /* 312 * Lookup the hash table and check for duplicates. 313 * Note: mask is ignored for the hash storage. 314 */ 315 LIST_FOREACH(ent, htbl, te_listent) { 316 if (ent->te_alen != alen) { 317 continue; 318 } 319 if (memcmp(&ent->te_addr, addr, alen) == 0) { 320 break; 321 } 322 } 323 *rhtbl = htbl; 324 return ent; 325 } 326 327 static void 328 table_hash_flush(npf_table_t *t) 329 { 330 for (unsigned n = 0; n <= t->t_hashmask; n++) { 331 npf_tblent_t *ent; 332 333 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) { 334 LIST_REMOVE(ent, te_listent); 335 pool_cache_put(tblent_cache, ent); 336 } 337 } 338 } 339 340 static void 341 table_tree_flush(npf_table_t *t) 342 { 343 npf_tblent_t *ent; 344 345 while ((ent = LIST_FIRST(&t->t_list)) != NULL) { 346 LIST_REMOVE(ent, te_listent); 347 pool_cache_put(tblent_cache, ent); 348 } 349 lpm_clear(t->t_lpm, NULL, NULL); 350 } 351 352 /* 353 * npf_table_create: create table with a specified ID. 354 */ 355 npf_table_t * 356 npf_table_create(const char *name, u_int tid, int type, 357 void *blob, size_t size) 358 { 359 npf_table_t *t; 360 361 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP); 362 strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN); 363 364 switch (type) { 365 case NPF_TABLE_TREE: 366 if ((t->t_lpm = lpm_create()) == NULL) { 367 goto out; 368 } 369 LIST_INIT(&t->t_list); 370 break; 371 case NPF_TABLE_HASH: 372 size = MIN(size, 128); 373 t->t_hashl = hashinit(size, HASH_LIST, true, &t->t_hashmask); 374 if (t->t_hashl == NULL) { 375 goto out; 376 } 377 break; 378 case NPF_TABLE_CDB: 379 t->t_blob = blob; 380 t->t_bsize = size; 381 t->t_cdb = cdbr_open_mem(blob, size, CDBR_DEFAULT, NULL, NULL); 382 if (t->t_cdb == NULL) { 383 free(blob, M_TEMP); 384 goto out; 385 } 386 t->t_nitems = cdbr_entries(t->t_cdb); 387 break; 388 default: 389 KASSERT(false); 390 } 391 rw_init(&t->t_lock); 392 t->t_type = type; 393 t->t_id = tid; 394 return t; 395 out: 396 kmem_free(t, sizeof(npf_table_t)); 397 return NULL; 398 } 399 400 /* 401 * npf_table_destroy: free all table entries and table itself. 402 */ 403 void 404 npf_table_destroy(npf_table_t *t) 405 { 406 KASSERT(t->t_refcnt == 0); 407 408 switch (t->t_type) { 409 case NPF_TABLE_HASH: 410 table_hash_flush(t); 411 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask); 412 break; 413 case NPF_TABLE_TREE: 414 table_tree_flush(t); 415 lpm_destroy(t->t_lpm); 416 break; 417 case NPF_TABLE_CDB: 418 cdbr_close(t->t_cdb); 419 free(t->t_blob, M_TEMP); 420 break; 421 default: 422 KASSERT(false); 423 } 424 rw_destroy(&t->t_lock); 425 kmem_free(t, sizeof(npf_table_t)); 426 } 427 428 u_int 429 npf_table_getid(npf_table_t *t) 430 { 431 return t->t_id; 432 } 433 434 /* 435 * npf_table_check: validate the name, ID and type. 436 */ 437 int 438 npf_table_check(npf_tableset_t *ts, const char *name, u_int tid, int type) 439 { 440 if ((u_int)tid >= ts->ts_nitems) { 441 return EINVAL; 442 } 443 if (ts->ts_map[tid] != NULL) { 444 return EEXIST; 445 } 446 switch (type) { 447 case NPF_TABLE_TREE: 448 case NPF_TABLE_HASH: 449 case NPF_TABLE_CDB: 450 break; 451 default: 452 return EINVAL; 453 } 454 if (strlen(name) >= NPF_TABLE_MAXNAMELEN) { 455 return ENAMETOOLONG; 456 } 457 if (npf_tableset_getbyname(ts, name)) { 458 return EEXIST; 459 } 460 return 0; 461 } 462 463 static int 464 table_cidr_check(const u_int aidx, const npf_addr_t *addr, 465 const npf_netmask_t mask) 466 { 467 if (aidx > 1) { 468 return EINVAL; 469 } 470 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) { 471 return EINVAL; 472 } 473 474 /* 475 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128. 476 * If it is a host - shall use NPF_NO_NETMASK. 477 */ 478 if (mask > (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) { 479 return EINVAL; 480 } 481 return 0; 482 } 483 484 /* 485 * npf_table_insert: add an IP CIDR entry into the table. 486 */ 487 int 488 npf_table_insert(npf_table_t *t, const int alen, 489 const npf_addr_t *addr, const npf_netmask_t mask) 490 { 491 const u_int aidx = NPF_ADDRLEN2TREE(alen); 492 npf_tblent_t *ent; 493 int error; 494 495 error = table_cidr_check(aidx, addr, mask); 496 if (error) { 497 return error; 498 } 499 ent = pool_cache_get(tblent_cache, PR_WAITOK); 500 memcpy(&ent->te_addr, addr, alen); 501 ent->te_alen = alen; 502 503 /* 504 * Insert the entry. Return an error on duplicate. 505 */ 506 rw_enter(&t->t_lock, RW_WRITER); 507 switch (t->t_type) { 508 case NPF_TABLE_HASH: { 509 struct npf_hashl *htbl; 510 511 /* 512 * Hash tables by the concept support only IPs. 513 */ 514 if (mask != NPF_NO_NETMASK) { 515 error = EINVAL; 516 break; 517 } 518 if (!table_hash_lookup(t, addr, alen, &htbl)) { 519 LIST_INSERT_HEAD(htbl, ent, te_listent); 520 t->t_nitems++; 521 } else { 522 error = EEXIST; 523 } 524 break; 525 } 526 case NPF_TABLE_TREE: { 527 const unsigned preflen = 528 (mask == NPF_NO_NETMASK) ? (alen * 8) : mask; 529 if (lpm_lookup(t->t_lpm, addr, alen) == NULL && 530 lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) { 531 LIST_INSERT_HEAD(&t->t_list, ent, te_listent); 532 ent->te_preflen = preflen; 533 t->t_nitems++; 534 error = 0; 535 } else { 536 error = EEXIST; 537 } 538 break; 539 } 540 case NPF_TABLE_CDB: 541 error = EINVAL; 542 break; 543 default: 544 KASSERT(false); 545 } 546 rw_exit(&t->t_lock); 547 548 if (error) { 549 pool_cache_put(tblent_cache, ent); 550 } 551 return error; 552 } 553 554 /* 555 * npf_table_remove: remove the IP CIDR entry from the table. 556 */ 557 int 558 npf_table_remove(npf_table_t *t, const int alen, 559 const npf_addr_t *addr, const npf_netmask_t mask) 560 { 561 const u_int aidx = NPF_ADDRLEN2TREE(alen); 562 npf_tblent_t *ent = NULL; 563 int error = ENOENT; 564 565 error = table_cidr_check(aidx, addr, mask); 566 if (error) { 567 return error; 568 } 569 570 rw_enter(&t->t_lock, RW_WRITER); 571 switch (t->t_type) { 572 case NPF_TABLE_HASH: { 573 struct npf_hashl *htbl; 574 575 ent = table_hash_lookup(t, addr, alen, &htbl); 576 if (__predict_true(ent != NULL)) { 577 LIST_REMOVE(ent, te_listent); 578 t->t_nitems--; 579 } 580 break; 581 } 582 case NPF_TABLE_TREE: { 583 ent = lpm_lookup(t->t_lpm, addr, alen); 584 if (__predict_true(ent != NULL)) { 585 LIST_REMOVE(ent, te_listent); 586 lpm_remove(t->t_lpm, &ent->te_addr, 587 ent->te_alen, ent->te_preflen); 588 t->t_nitems--; 589 } 590 break; 591 } 592 case NPF_TABLE_CDB: 593 error = EINVAL; 594 break; 595 default: 596 KASSERT(false); 597 ent = NULL; 598 } 599 rw_exit(&t->t_lock); 600 601 if (ent) { 602 pool_cache_put(tblent_cache, ent); 603 } 604 return error; 605 } 606 607 /* 608 * npf_table_lookup: find the table according to ID, lookup and match 609 * the contents with the specified IP address. 610 */ 611 int 612 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr) 613 { 614 const u_int aidx = NPF_ADDRLEN2TREE(alen); 615 struct npf_hashl *htbl; 616 const void *data; 617 size_t dlen; 618 bool found; 619 620 if (__predict_false(aidx > 1)) { 621 return EINVAL; 622 } 623 624 switch (t->t_type) { 625 case NPF_TABLE_HASH: 626 rw_enter(&t->t_lock, RW_READER); 627 found = table_hash_lookup(t, addr, alen, &htbl) != NULL; 628 rw_exit(&t->t_lock); 629 break; 630 case NPF_TABLE_TREE: 631 rw_enter(&t->t_lock, RW_READER); 632 found = lpm_lookup(t->t_lpm, addr, alen) != NULL; 633 rw_exit(&t->t_lock); 634 break; 635 case NPF_TABLE_CDB: 636 if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) { 637 found = dlen == (u_int)alen && 638 memcmp(addr, data, dlen) == 0; 639 } else { 640 found = false; 641 } 642 break; 643 default: 644 KASSERT(false); 645 found = false; 646 } 647 648 return found ? 0 : ENOENT; 649 } 650 651 static int 652 table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask, 653 void *ubuf, size_t len, size_t *off) 654 { 655 void *ubufp = (uint8_t *)ubuf + *off; 656 npf_ioctl_ent_t uent; 657 658 if ((*off += sizeof(npf_ioctl_ent_t)) > len) { 659 return ENOMEM; 660 } 661 uent.alen = alen; 662 memcpy(&uent.addr, addr, sizeof(npf_addr_t)); 663 uent.mask = mask; 664 665 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t)); 666 } 667 668 static int 669 table_hash_list(const npf_table_t *t, void *ubuf, size_t len) 670 { 671 size_t off = 0; 672 int error = 0; 673 674 for (unsigned n = 0; n <= t->t_hashmask; n++) { 675 npf_tblent_t *ent; 676 677 LIST_FOREACH(ent, &t->t_hashl[n], te_listent) { 678 error = table_ent_copyout(&ent->te_addr, 679 ent->te_alen, 0, ubuf, len, &off); 680 if (error) 681 break; 682 } 683 } 684 return error; 685 } 686 687 static int 688 table_tree_list(const npf_table_t *t, void *ubuf, size_t len) 689 { 690 npf_tblent_t *ent; 691 size_t off = 0; 692 int error = 0; 693 694 LIST_FOREACH(ent, &t->t_list, te_listent) { 695 error = table_ent_copyout(&ent->te_addr, 696 ent->te_alen, 0, ubuf, len, &off); 697 if (error) 698 break; 699 } 700 return error; 701 } 702 703 static int 704 table_cdb_list(npf_table_t *t, void *ubuf, size_t len) 705 { 706 size_t off = 0, dlen; 707 const void *data; 708 int error = 0; 709 710 for (size_t i = 0; i < t->t_nitems; i++) { 711 if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) { 712 return EINVAL; 713 } 714 error = table_ent_copyout(data, dlen, 0, ubuf, len, &off); 715 if (error) 716 break; 717 } 718 return error; 719 } 720 721 /* 722 * npf_table_list: copy a list of all table entries into a userspace buffer. 723 */ 724 int 725 npf_table_list(npf_table_t *t, void *ubuf, size_t len) 726 { 727 int error = 0; 728 729 rw_enter(&t->t_lock, RW_READER); 730 switch (t->t_type) { 731 case NPF_TABLE_HASH: 732 error = table_hash_list(t, ubuf, len); 733 break; 734 case NPF_TABLE_TREE: 735 error = table_tree_list(t, ubuf, len); 736 break; 737 case NPF_TABLE_CDB: 738 error = table_cdb_list(t, ubuf, len); 739 break; 740 default: 741 KASSERT(false); 742 } 743 rw_exit(&t->t_lock); 744 745 return error; 746 } 747 748 /* 749 * npf_table_flush: remove all table entries. 750 */ 751 int 752 npf_table_flush(npf_table_t *t) 753 { 754 int error = 0; 755 756 rw_enter(&t->t_lock, RW_WRITER); 757 switch (t->t_type) { 758 case NPF_TABLE_HASH: 759 table_hash_flush(t); 760 t->t_nitems = 0; 761 break; 762 case NPF_TABLE_TREE: 763 table_tree_flush(t); 764 t->t_nitems = 0; 765 break; 766 case NPF_TABLE_CDB: 767 error = EINVAL; 768 break; 769 default: 770 KASSERT(false); 771 } 772 rw_exit(&t->t_lock); 773 return error; 774 } 775