1 /* $NetBSD: npf_tableset.c,v 1.20 2013/11/22 00:25:51 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2013 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * NPF tableset module. 34 * 35 * Notes 36 * 37 * The tableset is an array of tables. After the creation, the array 38 * is immutable. The caller is responsible to synchronise the access 39 * to the tableset. The table can either be a hash or a tree. Its 40 * entries are protected by a read-write lock. 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.20 2013/11/22 00:25:51 rmind Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/types.h> 48 49 #include <sys/atomic.h> 50 #include <sys/hash.h> 51 #include <sys/kmem.h> 52 #include <sys/pool.h> 53 #include <sys/queue.h> 54 #include <sys/rwlock.h> 55 #include <sys/systm.h> 56 #include <sys/types.h> 57 58 #include "npf_impl.h" 59 60 typedef struct npf_tblent { 61 union { 62 LIST_ENTRY(npf_tblent) hashq; 63 pt_node_t node; 64 } te_entry; 65 int te_alen; 66 npf_addr_t te_addr; 67 } npf_tblent_t; 68 69 LIST_HEAD(npf_hashl, npf_tblent); 70 71 struct npf_table { 72 /* 73 * The storage type can be: a) hash b) tree. 74 * There are separate trees for IPv4 and IPv6. 75 */ 76 struct npf_hashl * t_hashl; 77 u_long t_hashmask; 78 pt_tree_t t_tree[2]; 79 80 /* 81 * Table ID, type and lock. The ID may change during the 82 * config reload, it is protected by the npf_config_lock. 83 */ 84 int t_type; 85 u_int t_id; 86 krwlock_t t_lock; 87 88 /* The number of items, reference count and table name. */ 89 u_int t_nitems; 90 u_int t_refcnt; 91 char t_name[NPF_TABLE_MAXNAMELEN]; 92 }; 93 94 struct npf_tableset { 95 u_int ts_nitems; 96 npf_table_t * ts_map[]; 97 }; 98 99 #define NPF_TABLESET_SIZE(n) \ 100 (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *)) 101 102 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4) 103 104 static pool_cache_t tblent_cache __read_mostly; 105 106 /* 107 * npf_table_sysinit: initialise tableset structures. 108 */ 109 void 110 npf_tableset_sysinit(void) 111 { 112 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit, 113 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL); 114 } 115 116 void 117 npf_tableset_sysfini(void) 118 { 119 pool_cache_destroy(tblent_cache); 120 } 121 122 npf_tableset_t * 123 npf_tableset_create(u_int nitems) 124 { 125 npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP); 126 ts->ts_nitems = nitems; 127 return ts; 128 } 129 130 void 131 npf_tableset_destroy(npf_tableset_t *ts) 132 { 133 /* 134 * Destroy all tables (no references should be held, since the 135 * ruleset should be destroyed before). 136 */ 137 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 138 npf_table_t *t = ts->ts_map[tid]; 139 140 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) { 141 npf_table_destroy(t); 142 } 143 } 144 kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems)); 145 } 146 147 /* 148 * npf_tableset_insert: insert the table into the specified tableset. 149 * 150 * => Returns 0 on success. Fails and returns error if ID is already used. 151 */ 152 int 153 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t) 154 { 155 const u_int tid = t->t_id; 156 int error; 157 158 KASSERT((u_int)tid < ts->ts_nitems); 159 160 if (ts->ts_map[tid] == NULL) { 161 atomic_inc_uint(&t->t_refcnt); 162 ts->ts_map[tid] = t; 163 error = 0; 164 } else { 165 error = EEXIST; 166 } 167 return error; 168 } 169 170 /* 171 * npf_tableset_getbyname: look for a table in the set given the name. 172 */ 173 npf_table_t * 174 npf_tableset_getbyname(npf_tableset_t *ts, const char *name) 175 { 176 npf_table_t *t; 177 178 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 179 if ((t = ts->ts_map[tid]) == NULL) 180 continue; 181 if (strcmp(name, t->t_name) == 0) 182 return t; 183 } 184 return NULL; 185 } 186 187 npf_table_t * 188 npf_tableset_getbyid(npf_tableset_t *ts, u_int tid) 189 { 190 if (__predict_true(tid < ts->ts_nitems)) { 191 return ts->ts_map[tid]; 192 } 193 return NULL; 194 } 195 196 /* 197 * npf_tableset_reload: iterate all tables and if the new table is of the 198 * same type and has no items, then we preserve the old one and its entries. 199 * 200 * => The caller is responsible for providing synchronisation. 201 */ 202 void 203 npf_tableset_reload(npf_tableset_t *nts, npf_tableset_t *ots) 204 { 205 for (u_int tid = 0; tid < nts->ts_nitems; tid++) { 206 npf_table_t *t, *ot; 207 208 if ((t = nts->ts_map[tid]) == NULL) { 209 continue; 210 } 211 212 /* If our table has entries, just load it. */ 213 if (t->t_nitems) { 214 continue; 215 } 216 217 /* Look for a currently existing table with such name. */ 218 ot = npf_tableset_getbyname(ots, t->t_name); 219 if (ot == NULL) { 220 /* Not found: we have a new table. */ 221 continue; 222 } 223 224 /* Found. Did the type change? */ 225 if (t->t_type != ot->t_type) { 226 /* Yes, load the new. */ 227 continue; 228 } 229 230 /* 231 * Preserve the current table. Acquire a reference since 232 * we are keeping it in the old table set. Update its ID. 233 */ 234 atomic_inc_uint(&ot->t_refcnt); 235 nts->ts_map[tid] = ot; 236 237 KASSERT(npf_config_locked_p()); 238 ot->t_id = tid; 239 240 /* Destroy the new table (we hold only reference). */ 241 t->t_refcnt--; 242 npf_table_destroy(t); 243 } 244 } 245 246 void 247 npf_tableset_syncdict(const npf_tableset_t *ts, prop_dictionary_t ndict) 248 { 249 prop_array_t tables = prop_array_create(); 250 const npf_table_t *t; 251 252 KASSERT(npf_config_locked_p()); 253 254 for (u_int tid = 0; tid < ts->ts_nitems; tid++) { 255 if ((t = ts->ts_map[tid]) == NULL) { 256 continue; 257 } 258 prop_dictionary_t tdict = prop_dictionary_create(); 259 prop_dictionary_set_cstring(tdict, "name", t->t_name); 260 prop_dictionary_set_uint32(tdict, "type", t->t_type); 261 prop_dictionary_set_uint32(tdict, "id", tid); 262 263 prop_array_add(tables, tdict); 264 prop_object_release(tdict); 265 } 266 prop_dictionary_remove(ndict, "tables"); 267 prop_dictionary_set(ndict, "tables", tables); 268 prop_object_release(tables); 269 } 270 271 /* 272 * Few helper routines. 273 */ 274 275 static npf_tblent_t * 276 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr, 277 const int alen, struct npf_hashl **rhtbl) 278 { 279 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT); 280 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask]; 281 npf_tblent_t *ent; 282 283 /* 284 * Lookup the hash table and check for duplicates. 285 * Note: mask is ignored for the hash storage. 286 */ 287 LIST_FOREACH(ent, htbl, te_entry.hashq) { 288 if (ent->te_alen != alen) { 289 continue; 290 } 291 if (memcmp(&ent->te_addr, addr, alen) == 0) { 292 break; 293 } 294 } 295 *rhtbl = htbl; 296 return ent; 297 } 298 299 static void 300 table_hash_destroy(npf_table_t *t) 301 { 302 for (unsigned n = 0; n <= t->t_hashmask; n++) { 303 npf_tblent_t *ent; 304 305 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) { 306 LIST_REMOVE(ent, te_entry.hashq); 307 pool_cache_put(tblent_cache, ent); 308 } 309 } 310 } 311 312 static void 313 table_tree_destroy(pt_tree_t *tree) 314 { 315 npf_tblent_t *ent; 316 317 while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) { 318 ptree_remove_node(tree, ent); 319 pool_cache_put(tblent_cache, ent); 320 } 321 } 322 323 /* 324 * npf_table_create: create table with a specified ID. 325 */ 326 npf_table_t * 327 npf_table_create(const char *name, u_int tid, int type, size_t hsize) 328 { 329 npf_table_t *t; 330 331 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP); 332 strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN); 333 334 switch (type) { 335 case NPF_TABLE_TREE: 336 ptree_init(&t->t_tree[0], &npf_table_ptree_ops, 337 (void *)(sizeof(struct in_addr) / sizeof(uint32_t)), 338 offsetof(npf_tblent_t, te_entry.node), 339 offsetof(npf_tblent_t, te_addr)); 340 ptree_init(&t->t_tree[1], &npf_table_ptree_ops, 341 (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)), 342 offsetof(npf_tblent_t, te_entry.node), 343 offsetof(npf_tblent_t, te_addr)); 344 break; 345 case NPF_TABLE_HASH: 346 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask); 347 if (t->t_hashl == NULL) { 348 kmem_free(t, sizeof(npf_table_t)); 349 return NULL; 350 } 351 break; 352 default: 353 KASSERT(false); 354 } 355 rw_init(&t->t_lock); 356 t->t_type = type; 357 t->t_id = tid; 358 359 return t; 360 } 361 362 /* 363 * npf_table_destroy: free all table entries and table itself. 364 */ 365 void 366 npf_table_destroy(npf_table_t *t) 367 { 368 KASSERT(t->t_refcnt == 0); 369 370 switch (t->t_type) { 371 case NPF_TABLE_HASH: 372 table_hash_destroy(t); 373 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask); 374 break; 375 case NPF_TABLE_TREE: 376 table_tree_destroy(&t->t_tree[0]); 377 table_tree_destroy(&t->t_tree[1]); 378 break; 379 default: 380 KASSERT(false); 381 } 382 rw_destroy(&t->t_lock); 383 kmem_free(t, sizeof(npf_table_t)); 384 } 385 386 /* 387 * npf_table_check: validate the name, ID and type. 388 */ 389 int 390 npf_table_check(npf_tableset_t *ts, const char *name, u_int tid, int type) 391 { 392 if ((u_int)tid >= ts->ts_nitems) { 393 return EINVAL; 394 } 395 if (ts->ts_map[tid] != NULL) { 396 return EEXIST; 397 } 398 if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) { 399 return EINVAL; 400 } 401 if (strlen(name) >= NPF_TABLE_MAXNAMELEN) { 402 return ENAMETOOLONG; 403 } 404 if (npf_tableset_getbyname(ts, name)) { 405 return EEXIST; 406 } 407 return 0; 408 } 409 410 static int 411 table_cidr_check(const u_int aidx, const npf_addr_t *addr, 412 const npf_netmask_t mask) 413 { 414 if (aidx > 1) { 415 return EINVAL; 416 } 417 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) { 418 return EINVAL; 419 } 420 421 /* 422 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128. 423 * If it is a host - shall use NPF_NO_NETMASK. 424 */ 425 if (mask >= (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) { 426 return EINVAL; 427 } 428 return 0; 429 } 430 431 /* 432 * npf_table_insert: add an IP CIDR entry into the table. 433 */ 434 int 435 npf_table_insert(npf_table_t *t, const int alen, 436 const npf_addr_t *addr, const npf_netmask_t mask) 437 { 438 const u_int aidx = NPF_ADDRLEN2TREE(alen); 439 npf_tblent_t *ent; 440 int error; 441 442 error = table_cidr_check(aidx, addr, mask); 443 if (error) { 444 return error; 445 } 446 ent = pool_cache_get(tblent_cache, PR_WAITOK); 447 memcpy(&ent->te_addr, addr, alen); 448 ent->te_alen = alen; 449 450 /* 451 * Insert the entry. Return an error on duplicate. 452 */ 453 rw_enter(&t->t_lock, RW_WRITER); 454 switch (t->t_type) { 455 case NPF_TABLE_HASH: { 456 struct npf_hashl *htbl; 457 458 /* 459 * Hash tables by the concept support only IPs. 460 */ 461 if (mask != NPF_NO_NETMASK) { 462 error = EINVAL; 463 break; 464 } 465 if (!table_hash_lookup(t, addr, alen, &htbl)) { 466 LIST_INSERT_HEAD(htbl, ent, te_entry.hashq); 467 t->t_nitems++; 468 } else { 469 error = EEXIST; 470 } 471 break; 472 } 473 case NPF_TABLE_TREE: { 474 pt_tree_t *tree = &t->t_tree[aidx]; 475 bool ok; 476 477 /* 478 * If no mask specified, use maximum mask. 479 */ 480 ok = (mask != NPF_NO_NETMASK) ? 481 ptree_insert_mask_node(tree, ent, mask) : 482 ptree_insert_node(tree, ent); 483 if (ok) { 484 t->t_nitems++; 485 error = 0; 486 } else { 487 error = EEXIST; 488 } 489 break; 490 } 491 default: 492 KASSERT(false); 493 } 494 rw_exit(&t->t_lock); 495 496 if (error) { 497 pool_cache_put(tblent_cache, ent); 498 } 499 return error; 500 } 501 502 /* 503 * npf_table_remove: remove the IP CIDR entry from the table. 504 */ 505 int 506 npf_table_remove(npf_table_t *t, const int alen, 507 const npf_addr_t *addr, const npf_netmask_t mask) 508 { 509 const u_int aidx = NPF_ADDRLEN2TREE(alen); 510 npf_tblent_t *ent; 511 int error; 512 513 error = table_cidr_check(aidx, addr, mask); 514 if (error) { 515 return error; 516 } 517 518 rw_enter(&t->t_lock, RW_WRITER); 519 switch (t->t_type) { 520 case NPF_TABLE_HASH: { 521 struct npf_hashl *htbl; 522 523 ent = table_hash_lookup(t, addr, alen, &htbl); 524 if (__predict_true(ent != NULL)) { 525 LIST_REMOVE(ent, te_entry.hashq); 526 t->t_nitems--; 527 } 528 break; 529 } 530 case NPF_TABLE_TREE: { 531 pt_tree_t *tree = &t->t_tree[aidx]; 532 533 ent = ptree_find_node(tree, addr); 534 if (__predict_true(ent != NULL)) { 535 ptree_remove_node(tree, ent); 536 t->t_nitems--; 537 } 538 break; 539 } 540 default: 541 KASSERT(false); 542 ent = NULL; 543 } 544 rw_exit(&t->t_lock); 545 546 if (ent == NULL) { 547 return ENOENT; 548 } 549 pool_cache_put(tblent_cache, ent); 550 return 0; 551 } 552 553 /* 554 * npf_table_lookup: find the table according to ID, lookup and match 555 * the contents with the specified IP address. 556 */ 557 int 558 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr) 559 { 560 const u_int aidx = NPF_ADDRLEN2TREE(alen); 561 npf_tblent_t *ent; 562 563 if (__predict_false(aidx > 1)) { 564 return EINVAL; 565 } 566 567 rw_enter(&t->t_lock, RW_READER); 568 switch (t->t_type) { 569 case NPF_TABLE_HASH: { 570 struct npf_hashl *htbl; 571 ent = table_hash_lookup(t, addr, alen, &htbl); 572 break; 573 } 574 case NPF_TABLE_TREE: { 575 ent = ptree_find_node(&t->t_tree[aidx], addr); 576 break; 577 } 578 default: 579 KASSERT(false); 580 ent = NULL; 581 } 582 rw_exit(&t->t_lock); 583 584 return ent ? 0 : ENOENT; 585 } 586 587 static int 588 table_ent_copyout(npf_tblent_t *ent, npf_netmask_t mask, 589 void *ubuf, size_t len, size_t *off) 590 { 591 void *ubufp = (uint8_t *)ubuf + *off; 592 npf_ioctl_ent_t uent; 593 594 if ((*off += sizeof(npf_ioctl_ent_t)) > len) { 595 return ENOMEM; 596 } 597 uent.alen = ent->te_alen; 598 memcpy(&uent.addr, &ent->te_addr, sizeof(npf_addr_t)); 599 uent.mask = mask; 600 601 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t)); 602 } 603 604 static int 605 table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf, 606 size_t len, size_t *off) 607 { 608 npf_tblent_t *ent = NULL; 609 int error = 0; 610 611 while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) { 612 pt_bitlen_t blen; 613 614 if (!ptree_mask_node_p(tree, ent, &blen)) { 615 blen = maxmask; 616 } 617 error = table_ent_copyout(ent, blen, ubuf, len, off); 618 if (error) 619 break; 620 } 621 return error; 622 } 623 624 /* 625 * npf_table_list: copy a list of all table entries into a userspace buffer. 626 */ 627 int 628 npf_table_list(npf_table_t *t, void *ubuf, size_t len) 629 { 630 size_t off = 0; 631 int error = 0; 632 633 rw_enter(&t->t_lock, RW_READER); 634 switch (t->t_type) { 635 case NPF_TABLE_HASH: 636 for (unsigned n = 0; n <= t->t_hashmask; n++) { 637 npf_tblent_t *ent; 638 639 LIST_FOREACH(ent, &t->t_hashl[n], te_entry.hashq) 640 if ((error = table_ent_copyout(ent, 0, ubuf, 641 len, &off)) != 0) 642 break; 643 } 644 break; 645 case NPF_TABLE_TREE: 646 error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off); 647 if (error) 648 break; 649 error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off); 650 break; 651 default: 652 KASSERT(false); 653 } 654 rw_exit(&t->t_lock); 655 656 return error; 657 } 658 659 /* 660 * npf_table_flush: remove all table entries. 661 */ 662 int 663 npf_table_flush(npf_table_t *t) 664 { 665 rw_enter(&t->t_lock, RW_WRITER); 666 switch (t->t_type) { 667 case NPF_TABLE_HASH: 668 table_hash_destroy(t); 669 t->t_nitems = 0; 670 break; 671 case NPF_TABLE_TREE: 672 table_tree_destroy(&t->t_tree[0]); 673 table_tree_destroy(&t->t_tree[1]); 674 t->t_nitems = 0; 675 break; 676 default: 677 KASSERT(false); 678 } 679 rw_exit(&t->t_lock); 680 return 0; 681 } 682