1 /* $NetBSD: npf_tableset.c,v 1.17 2013/02/09 03:35:32 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2012 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * NPF tableset module. 34 * 35 * Notes 36 * 37 * The tableset is an array of tables. After the creation, the array 38 * is immutable. The caller is responsible to synchronise the access 39 * to the tableset. The table can either be a hash or a tree. Its 40 * entries are protected by a read-write lock. 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.17 2013/02/09 03:35:32 rmind Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/types.h> 48 49 #include <sys/atomic.h> 50 #include <sys/hash.h> 51 #include <sys/kmem.h> 52 #include <sys/pool.h> 53 #include <sys/queue.h> 54 #include <sys/rwlock.h> 55 #include <sys/systm.h> 56 #include <sys/types.h> 57 58 #include "npf_impl.h" 59 60 /* 61 * Table structures. 62 */ 63 64 typedef struct npf_tblent { 65 union { 66 LIST_ENTRY(npf_tblent) hashq; 67 pt_node_t node; 68 } te_entry; 69 int te_alen; 70 npf_addr_t te_addr; 71 } npf_tblent_t; 72 73 LIST_HEAD(npf_hashl, npf_tblent); 74 75 struct npf_table { 76 char t_name[16]; 77 /* Lock and reference count. */ 78 krwlock_t t_lock; 79 u_int t_refcnt; 80 /* Total number of items. */ 81 u_int t_nitems; 82 /* Table ID. */ 83 u_int t_id; 84 /* The storage type can be: a) hash b) tree. */ 85 int t_type; 86 struct npf_hashl * t_hashl; 87 u_long t_hashmask; 88 /* Separate trees for IPv4 and IPv6. */ 89 pt_tree_t t_tree[2]; 90 }; 91 92 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4) 93 94 static pool_cache_t tblent_cache __read_mostly; 95 96 /* 97 * npf_table_sysinit: initialise tableset structures. 98 */ 99 void 100 npf_tableset_sysinit(void) 101 { 102 103 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit, 104 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL); 105 } 106 107 void 108 npf_tableset_sysfini(void) 109 { 110 111 pool_cache_destroy(tblent_cache); 112 } 113 114 npf_tableset_t * 115 npf_tableset_create(void) 116 { 117 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *); 118 119 return kmem_zalloc(sz, KM_SLEEP); 120 } 121 122 void 123 npf_tableset_destroy(npf_tableset_t *tblset) 124 { 125 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *); 126 npf_table_t *t; 127 u_int tid; 128 129 /* 130 * Destroy all tables (no references should be held, as ruleset 131 * should be destroyed before). 132 */ 133 for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) { 134 t = tblset[tid]; 135 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) { 136 npf_table_destroy(t); 137 } 138 } 139 kmem_free(tblset, sz); 140 } 141 142 /* 143 * npf_tableset_insert: insert the table into the specified tableset. 144 * 145 * => Returns 0 on success. Fails and returns error if ID is already used. 146 */ 147 int 148 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t) 149 { 150 const u_int tid = t->t_id; 151 int error; 152 153 KASSERT((u_int)tid < NPF_TABLE_SLOTS); 154 155 if (tblset[tid] == NULL) { 156 atomic_inc_uint(&t->t_refcnt); 157 tblset[tid] = t; 158 error = 0; 159 } else { 160 error = EEXIST; 161 } 162 return error; 163 } 164 165 /* 166 * npf_tableset_reload: iterate all tables and if the new table is of the 167 * same type and has no items, then we preserve the old one and its entries. 168 * 169 * => The caller is responsible for providing synchronisation. 170 */ 171 void 172 npf_tableset_reload(npf_tableset_t *ntset, npf_tableset_t *otset) 173 { 174 for (int i = 0; i < NPF_TABLE_SLOTS; i++) { 175 npf_table_t *t = ntset[i], *ot = otset[i]; 176 177 if (t == NULL || ot == NULL) { 178 continue; 179 } 180 if (t->t_nitems || t->t_type != ot->t_type) { 181 continue; 182 } 183 184 /* 185 * Acquire a reference since the table has to be kept 186 * in the old tableset. 187 */ 188 atomic_inc_uint(&ot->t_refcnt); 189 ntset[i] = ot; 190 191 /* Only reference, never been visible. */ 192 t->t_refcnt--; 193 npf_table_destroy(t); 194 } 195 } 196 197 /* 198 * Few helper routines. 199 */ 200 201 static npf_tblent_t * 202 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr, 203 const int alen, struct npf_hashl **rhtbl) 204 { 205 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT); 206 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask]; 207 npf_tblent_t *ent; 208 209 /* 210 * Lookup the hash table and check for duplicates. 211 * Note: mask is ignored for the hash storage. 212 */ 213 LIST_FOREACH(ent, htbl, te_entry.hashq) { 214 if (ent->te_alen != alen) { 215 continue; 216 } 217 if (memcmp(&ent->te_addr, addr, alen) == 0) { 218 break; 219 } 220 } 221 *rhtbl = htbl; 222 return ent; 223 } 224 225 static void 226 table_tree_destroy(pt_tree_t *tree) 227 { 228 npf_tblent_t *ent; 229 230 while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) { 231 ptree_remove_node(tree, ent); 232 pool_cache_put(tblent_cache, ent); 233 } 234 } 235 236 /* 237 * npf_table_create: create table with a specified ID. 238 */ 239 npf_table_t * 240 npf_table_create(u_int tid, int type, size_t hsize) 241 { 242 npf_table_t *t; 243 244 KASSERT((u_int)tid < NPF_TABLE_SLOTS); 245 246 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP); 247 switch (type) { 248 case NPF_TABLE_TREE: 249 ptree_init(&t->t_tree[0], &npf_table_ptree_ops, 250 (void *)(sizeof(struct in_addr) / sizeof(uint32_t)), 251 offsetof(npf_tblent_t, te_entry.node), 252 offsetof(npf_tblent_t, te_addr)); 253 ptree_init(&t->t_tree[1], &npf_table_ptree_ops, 254 (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)), 255 offsetof(npf_tblent_t, te_entry.node), 256 offsetof(npf_tblent_t, te_addr)); 257 break; 258 case NPF_TABLE_HASH: 259 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask); 260 if (t->t_hashl == NULL) { 261 kmem_free(t, sizeof(npf_table_t)); 262 return NULL; 263 } 264 break; 265 default: 266 KASSERT(false); 267 } 268 rw_init(&t->t_lock); 269 t->t_type = type; 270 t->t_id = tid; 271 272 return t; 273 } 274 275 /* 276 * npf_table_destroy: free all table entries and table itself. 277 */ 278 void 279 npf_table_destroy(npf_table_t *t) 280 { 281 KASSERT(t->t_refcnt == 0); 282 283 switch (t->t_type) { 284 case NPF_TABLE_HASH: 285 for (unsigned n = 0; n <= t->t_hashmask; n++) { 286 npf_tblent_t *ent; 287 288 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) { 289 LIST_REMOVE(ent, te_entry.hashq); 290 pool_cache_put(tblent_cache, ent); 291 } 292 } 293 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask); 294 break; 295 case NPF_TABLE_TREE: 296 table_tree_destroy(&t->t_tree[0]); 297 table_tree_destroy(&t->t_tree[1]); 298 break; 299 default: 300 KASSERT(false); 301 } 302 rw_destroy(&t->t_lock); 303 kmem_free(t, sizeof(npf_table_t)); 304 } 305 306 /* 307 * npf_table_check: validate ID and type. 308 */ 309 int 310 npf_table_check(const npf_tableset_t *tset, u_int tid, int type) 311 { 312 313 if ((u_int)tid >= NPF_TABLE_SLOTS) { 314 return EINVAL; 315 } 316 if (tset[tid] != NULL) { 317 return EEXIST; 318 } 319 if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) { 320 return EINVAL; 321 } 322 return 0; 323 } 324 325 static int 326 table_cidr_check(const u_int aidx, const npf_addr_t *addr, 327 const npf_netmask_t mask) 328 { 329 330 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) { 331 return EINVAL; 332 } 333 if (aidx > 1) { 334 return EINVAL; 335 } 336 337 /* 338 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128. 339 * If it is a host - shall use NPF_NO_NETMASK. 340 */ 341 if (mask >= (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) { 342 return EINVAL; 343 } 344 return 0; 345 } 346 347 /* 348 * npf_table_insert: add an IP CIDR entry into the table. 349 */ 350 int 351 npf_table_insert(npf_tableset_t *tset, u_int tid, const int alen, 352 const npf_addr_t *addr, const npf_netmask_t mask) 353 { 354 const u_int aidx = NPF_ADDRLEN2TREE(alen); 355 npf_tblent_t *ent; 356 npf_table_t *t; 357 int error; 358 359 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) { 360 return EINVAL; 361 } 362 363 error = table_cidr_check(aidx, addr, mask); 364 if (error) { 365 return error; 366 } 367 ent = pool_cache_get(tblent_cache, PR_WAITOK); 368 memcpy(&ent->te_addr, addr, alen); 369 ent->te_alen = alen; 370 371 /* 372 * Insert the entry. Return an error on duplicate. 373 */ 374 rw_enter(&t->t_lock, RW_WRITER); 375 switch (t->t_type) { 376 case NPF_TABLE_HASH: { 377 struct npf_hashl *htbl; 378 379 /* 380 * Hash tables by the concept support only IPs. 381 */ 382 if (mask != NPF_NO_NETMASK) { 383 error = EINVAL; 384 break; 385 } 386 if (!table_hash_lookup(t, addr, alen, &htbl)) { 387 LIST_INSERT_HEAD(htbl, ent, te_entry.hashq); 388 t->t_nitems++; 389 } else { 390 error = EEXIST; 391 } 392 break; 393 } 394 case NPF_TABLE_TREE: { 395 pt_tree_t *tree = &t->t_tree[aidx]; 396 bool ok; 397 398 /* 399 * If no mask specified, use maximum mask. 400 */ 401 ok = (mask != NPF_NO_NETMASK) ? 402 ptree_insert_mask_node(tree, ent, mask) : 403 ptree_insert_node(tree, ent); 404 if (ok) { 405 t->t_nitems++; 406 error = 0; 407 } else { 408 error = EEXIST; 409 } 410 break; 411 } 412 default: 413 KASSERT(false); 414 } 415 rw_exit(&t->t_lock); 416 417 if (error) { 418 pool_cache_put(tblent_cache, ent); 419 } 420 return error; 421 } 422 423 /* 424 * npf_table_remove: remove the IP CIDR entry from the table. 425 */ 426 int 427 npf_table_remove(npf_tableset_t *tset, u_int tid, const int alen, 428 const npf_addr_t *addr, const npf_netmask_t mask) 429 { 430 const u_int aidx = NPF_ADDRLEN2TREE(alen); 431 npf_tblent_t *ent; 432 npf_table_t *t; 433 int error; 434 435 error = table_cidr_check(aidx, addr, mask); 436 if (error) { 437 return error; 438 } 439 440 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) { 441 return EINVAL; 442 } 443 444 rw_enter(&t->t_lock, RW_WRITER); 445 switch (t->t_type) { 446 case NPF_TABLE_HASH: { 447 struct npf_hashl *htbl; 448 449 ent = table_hash_lookup(t, addr, alen, &htbl); 450 if (__predict_true(ent != NULL)) { 451 LIST_REMOVE(ent, te_entry.hashq); 452 t->t_nitems--; 453 } 454 break; 455 } 456 case NPF_TABLE_TREE: { 457 pt_tree_t *tree = &t->t_tree[aidx]; 458 459 ent = ptree_find_node(tree, addr); 460 if (__predict_true(ent != NULL)) { 461 ptree_remove_node(tree, ent); 462 t->t_nitems--; 463 } 464 break; 465 } 466 default: 467 KASSERT(false); 468 ent = NULL; 469 } 470 rw_exit(&t->t_lock); 471 472 if (ent == NULL) { 473 return ENOENT; 474 } 475 pool_cache_put(tblent_cache, ent); 476 return 0; 477 } 478 479 /* 480 * npf_table_lookup: find the table according to ID, lookup and match 481 * the contents with the specified IP address. 482 */ 483 int 484 npf_table_lookup(npf_tableset_t *tset, u_int tid, 485 const int alen, const npf_addr_t *addr) 486 { 487 const u_int aidx = NPF_ADDRLEN2TREE(alen); 488 npf_tblent_t *ent; 489 npf_table_t *t; 490 491 if (__predict_false(aidx > 1)) { 492 return EINVAL; 493 } 494 495 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) { 496 return EINVAL; 497 } 498 499 rw_enter(&t->t_lock, RW_READER); 500 switch (t->t_type) { 501 case NPF_TABLE_HASH: { 502 struct npf_hashl *htbl; 503 ent = table_hash_lookup(t, addr, alen, &htbl); 504 break; 505 } 506 case NPF_TABLE_TREE: { 507 ent = ptree_find_node(&t->t_tree[aidx], addr); 508 break; 509 } 510 default: 511 KASSERT(false); 512 ent = NULL; 513 } 514 rw_exit(&t->t_lock); 515 516 return ent ? 0 : ENOENT; 517 } 518 519 static int 520 table_ent_copyout(npf_tblent_t *ent, npf_netmask_t mask, 521 void *ubuf, size_t len, size_t *off) 522 { 523 void *ubufp = (uint8_t *)ubuf + *off; 524 npf_ioctl_ent_t uent; 525 526 if ((*off += sizeof(npf_ioctl_ent_t)) > len) { 527 return ENOMEM; 528 } 529 uent.alen = ent->te_alen; 530 memcpy(&uent.addr, &ent->te_addr, sizeof(npf_addr_t)); 531 uent.mask = mask; 532 533 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t)); 534 } 535 536 static int 537 table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf, 538 size_t len, size_t *off) 539 { 540 npf_tblent_t *ent = NULL; 541 int error = 0; 542 543 while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) { 544 pt_bitlen_t blen; 545 546 if (!ptree_mask_node_p(tree, ent, &blen)) { 547 blen = maxmask; 548 } 549 error = table_ent_copyout(ent, blen, ubuf, len, off); 550 if (error) 551 break; 552 } 553 return error; 554 } 555 556 /* 557 * npf_table_list: copy a list of all table entries into a userspace buffer. 558 */ 559 int 560 npf_table_list(npf_tableset_t *tset, u_int tid, void *ubuf, size_t len) 561 { 562 npf_table_t *t; 563 size_t off = 0; 564 int error = 0; 565 566 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) { 567 return EINVAL; 568 } 569 570 rw_enter(&t->t_lock, RW_READER); 571 switch (t->t_type) { 572 case NPF_TABLE_HASH: 573 for (unsigned n = 0; n <= t->t_hashmask; n++) { 574 npf_tblent_t *ent; 575 576 LIST_FOREACH(ent, &t->t_hashl[n], te_entry.hashq) 577 if ((error = table_ent_copyout(ent, 0, ubuf, 578 len, &off)) != 0) 579 break; 580 } 581 break; 582 case NPF_TABLE_TREE: 583 error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off); 584 if (error) 585 break; 586 error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off); 587 break; 588 default: 589 KASSERT(false); 590 } 591 rw_exit(&t->t_lock); 592 593 return error; 594 } 595