1 /*- 2 * Copyright (c) 2014-2020 Mindaugas Rasiukevicius <rmind at noxt eu> 3 * Copyright (c) 2010-2014 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This material is based upon work partially supported by The 7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * NPF connection tracking for stateful filtering and translation. 33 * 34 * Overview 35 * 36 * Packets can be incoming or outgoing with respect to an interface. 37 * Connection direction is identified by the direction of its first 38 * packet. The meaning of incoming/outgoing packet in the context of 39 * connection direction can be confusing. Therefore, we will use the 40 * terms "forwards stream" and "backwards stream", where packets in 41 * the forwards stream mean the packets travelling in the direction 42 * as the connection direction. 43 * 44 * All connections have two keys and thus two entries: 45 * 46 * - npf_conn_getforwkey(con) -- for the forwards stream; 47 * - npf_conn_getbackkey(con, alen) -- for the backwards stream. 48 * 49 * Note: the keys are stored in npf_conn_t::c_keys[], which is used 50 * to allocate variable-length npf_conn_t structures based on whether 51 * the IPv4 or IPv6 addresses are used. 52 * 53 * The key is an n-tuple used to identify the connection flow: see the 54 * npf_connkey.c source file for the description of the key layouts. 55 * The key may be formed using translated values in a case of NAT. 56 * 57 * Connections can serve two purposes: for the implicit passing and/or 58 * to accommodate the dynamic NAT. Connections for the former purpose 59 * are created by the rules with "stateful" attribute and are used for 60 * stateful filtering. Such connections indicate that the packet of 61 * the backwards stream should be passed without inspection of the 62 * ruleset. The other purpose is to associate a dynamic NAT mechanism 63 * with a connection. Such connections are created by the NAT policies 64 * and they have a relationship with NAT translation structure via 65 * npf_conn_t::c_nat. A single connection can serve both purposes, 66 * which is a common case. 67 * 68 * Connection life-cycle 69 * 70 * Connections are established when a packet matches said rule or 71 * NAT policy. Both keys of the established connection are inserted 72 * into the connection database. A garbage collection thread 73 * periodically scans all connections and depending on connection 74 * properties (e.g. last activity time, protocol) removes connection 75 * entries and expires the actual connections. 76 * 77 * Each connection has a reference count. The reference is acquired 78 * on lookup and should be released by the caller. It guarantees that 79 * the connection will not be destroyed, although it may be expired. 80 * 81 * Synchronization 82 * 83 * Connection database is accessed in a lock-free manner by the main 84 * routines: npf_conn_inspect() and npf_conn_establish(). Since they 85 * are always called from a software interrupt, the database is 86 * protected using EBR. The main place which can destroy a connection 87 * is npf_conn_worker(). The database itself can be replaced and 88 * destroyed in npf_conn_reload(). 89 * 90 * ALG support 91 * 92 * Application-level gateways (ALGs) can override generic connection 93 * inspection (npf_alg_conn() call in npf_conn_inspect() function) by 94 * performing their own lookup using different key. Recursive call 95 * to npf_conn_inspect() is not allowed. The ALGs ought to use the 96 * npf_conn_lookup() function for this purpose. 97 * 98 * Lock order 99 * 100 * npf->config_lock -> 101 * conn_lock -> 102 * npf_conn_t::c_lock 103 */ 104 105 #ifdef _KERNEL 106 #include <sys/cdefs.h> 107 __KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.33 2021/01/25 17:18:55 christos Exp $"); 108 109 #include <sys/param.h> 110 #include <sys/types.h> 111 112 #include <netinet/in.h> 113 #include <netinet/tcp.h> 114 115 #include <sys/atomic.h> 116 #include <sys/kmem.h> 117 #include <sys/mutex.h> 118 #include <net/pfil.h> 119 #include <sys/pool.h> 120 #include <sys/queue.h> 121 #include <sys/systm.h> 122 #endif 123 124 #define __NPF_CONN_PRIVATE 125 #include "npf_conn.h" 126 #include "npf_impl.h" 127 128 /* A helper to select the IPv4 or IPv6 connection cache. */ 129 #define NPF_CONNCACHE(alen) (((alen) >> 4) & 0x1) 130 131 /* 132 * Connection flags: PFIL_IN and PFIL_OUT values are reserved for direction. 133 */ 134 CTASSERT(PFIL_ALL == (0x001 | 0x002)); 135 #define CONN_ACTIVE 0x004 /* visible on inspection */ 136 #define CONN_PASS 0x008 /* perform implicit passing */ 137 #define CONN_EXPIRE 0x010 /* explicitly expire */ 138 #define CONN_REMOVED 0x020 /* "forw/back" entries removed */ 139 140 enum { CONN_TRACKING_OFF, CONN_TRACKING_ON }; 141 142 static int npf_conn_export(npf_t *, npf_conn_t *, nvlist_t *); 143 144 /* 145 * npf_conn_sys{init,fini}: initialize/destroy connection tracking. 146 */ 147 148 void 149 npf_conn_init(npf_t *npf) 150 { 151 npf_conn_params_t *params = npf_param_allocgroup(npf, 152 NPF_PARAMS_CONN, sizeof(npf_conn_params_t)); 153 npf_param_t param_map[] = { 154 { 155 "state.key.interface", 156 ¶ms->connkey_interface, 157 .default_val = 1, // true 158 .min = 0, .max = 1 159 }, 160 { 161 "state.key.direction", 162 ¶ms->connkey_direction, 163 .default_val = 1, // true 164 .min = 0, .max = 1 165 }, 166 }; 167 npf_param_register(npf, param_map, __arraycount(param_map)); 168 169 npf->conn_cache[0] = pool_cache_init( 170 offsetof(npf_conn_t, c_keys[NPF_CONNKEY_V4WORDS * 2]), 171 0, 0, 0, "npfcn4pl", NULL, IPL_NET, NULL, NULL, NULL); 172 npf->conn_cache[1] = pool_cache_init( 173 offsetof(npf_conn_t, c_keys[NPF_CONNKEY_V6WORDS * 2]), 174 0, 0, 0, "npfcn6pl", NULL, IPL_NET, NULL, NULL, NULL); 175 176 mutex_init(&npf->conn_lock, MUTEX_DEFAULT, IPL_NONE); 177 atomic_store_relaxed(&npf->conn_tracking, CONN_TRACKING_OFF); 178 npf->conn_db = npf_conndb_create(); 179 npf_conndb_sysinit(npf); 180 181 npf_worker_addfunc(npf, npf_conn_worker); 182 } 183 184 void 185 npf_conn_fini(npf_t *npf) 186 { 187 const size_t len = sizeof(npf_conn_params_t); 188 189 /* Note: the caller should have flushed the connections. */ 190 KASSERT(atomic_load_relaxed(&npf->conn_tracking) == CONN_TRACKING_OFF); 191 192 npf_conndb_destroy(npf->conn_db); 193 pool_cache_destroy(npf->conn_cache[0]); 194 pool_cache_destroy(npf->conn_cache[1]); 195 mutex_destroy(&npf->conn_lock); 196 197 npf_param_freegroup(npf, NPF_PARAMS_CONN, len); 198 npf_conndb_sysfini(npf); 199 } 200 201 /* 202 * npf_conn_load: perform the load by flushing the current connection 203 * database and replacing it with the new one or just destroying. 204 * 205 * => The caller must disable the connection tracking and ensure that 206 * there are no connection database lookups or references in-flight. 207 */ 208 void 209 npf_conn_load(npf_t *npf, npf_conndb_t *ndb, bool track) 210 { 211 npf_conndb_t *odb = NULL; 212 213 KASSERT(npf_config_locked_p(npf)); 214 215 /* 216 * The connection database is in the quiescent state. 217 * Prevent G/C thread from running and install a new database. 218 */ 219 mutex_enter(&npf->conn_lock); 220 if (ndb) { 221 KASSERT(atomic_load_relaxed(&npf->conn_tracking) 222 == CONN_TRACKING_OFF); 223 odb = atomic_load_relaxed(&npf->conn_db); 224 membar_sync(); 225 atomic_store_relaxed(&npf->conn_db, ndb); 226 } 227 if (track) { 228 /* After this point lookups start flying in. */ 229 membar_producer(); 230 atomic_store_relaxed(&npf->conn_tracking, CONN_TRACKING_ON); 231 } 232 mutex_exit(&npf->conn_lock); 233 234 if (odb) { 235 /* 236 * Flush all, no sync since the caller did it for us. 237 * Also, release the pool cache memory. 238 */ 239 npf_conndb_gc(npf, odb, true, false); 240 npf_conndb_destroy(odb); 241 pool_cache_invalidate(npf->conn_cache[0]); 242 pool_cache_invalidate(npf->conn_cache[1]); 243 } 244 } 245 246 /* 247 * npf_conn_tracking: enable/disable connection tracking. 248 */ 249 void 250 npf_conn_tracking(npf_t *npf, bool track) 251 { 252 KASSERT(npf_config_locked_p(npf)); 253 atomic_store_relaxed(&npf->conn_tracking, 254 track ? CONN_TRACKING_ON : CONN_TRACKING_OFF); 255 } 256 257 static inline bool 258 npf_conn_trackable_p(const npf_cache_t *npc) 259 { 260 const npf_t *npf = npc->npc_ctx; 261 262 /* 263 * Check if connection tracking is on. Also, if layer 3 and 4 are 264 * not cached - protocol is not supported or packet is invalid. 265 */ 266 if (atomic_load_relaxed(&npf->conn_tracking) != CONN_TRACKING_ON) { 267 return false; 268 } 269 if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) { 270 return false; 271 } 272 return true; 273 } 274 275 static inline void 276 conn_update_atime(npf_conn_t *con) 277 { 278 struct timespec tsnow; 279 280 getnanouptime(&tsnow); 281 atomic_store_relaxed(&con->c_atime, tsnow.tv_sec); 282 } 283 284 /* 285 * npf_conn_check: check that: 286 * 287 * - the connection is active; 288 * 289 * - the packet is travelling in the right direction with the respect 290 * to the connection direction (if interface-id is not zero); 291 * 292 * - the packet is travelling on the same interface as the 293 * connection interface (if interface-id is not zero). 294 */ 295 static bool 296 npf_conn_check(const npf_conn_t *con, const nbuf_t *nbuf, 297 const unsigned di, const npf_flow_t flow) 298 { 299 const uint32_t flags = atomic_load_relaxed(&con->c_flags); 300 const unsigned ifid = atomic_load_relaxed(&con->c_ifid); 301 bool active; 302 303 active = (flags & (CONN_ACTIVE | CONN_EXPIRE)) == CONN_ACTIVE; 304 if (__predict_false(!active)) { 305 return false; 306 } 307 if (ifid && nbuf) { 308 const bool match = (flags & PFIL_ALL) == di; 309 npf_flow_t pflow = match ? NPF_FLOW_FORW : NPF_FLOW_BACK; 310 311 if (__predict_false(flow != pflow)) { 312 return false; 313 } 314 if (__predict_false(ifid != nbuf->nb_ifid)) { 315 return false; 316 } 317 } 318 return true; 319 } 320 321 /* 322 * npf_conn_lookup: lookup if there is an established connection. 323 * 324 * => If found, we will hold a reference for the caller. 325 */ 326 npf_conn_t * 327 npf_conn_lookup(const npf_cache_t *npc, const unsigned di, npf_flow_t *flow) 328 { 329 npf_t *npf = npc->npc_ctx; 330 const nbuf_t *nbuf = npc->npc_nbuf; 331 npf_conn_t *con; 332 npf_connkey_t key; 333 334 /* Construct a key and lookup for a connection in the store. */ 335 if (!npf_conn_conkey(npc, &key, di, NPF_FLOW_FORW)) { 336 return NULL; 337 } 338 con = npf_conndb_lookup(npf, &key, flow); 339 if (con == NULL) { 340 return NULL; 341 } 342 KASSERT(npc->npc_proto == atomic_load_relaxed(&con->c_proto)); 343 344 /* Extra checks for the connection and packet. */ 345 if (!npf_conn_check(con, nbuf, di, *flow)) { 346 atomic_dec_uint(&con->c_refcnt); 347 return NULL; 348 } 349 350 /* Update the last activity time. */ 351 conn_update_atime(con); 352 return con; 353 } 354 355 /* 356 * npf_conn_inspect: lookup a connection and inspecting the protocol data. 357 * 358 * => If found, we will hold a reference for the caller. 359 */ 360 npf_conn_t * 361 npf_conn_inspect(npf_cache_t *npc, const unsigned di, int *error) 362 { 363 nbuf_t *nbuf = npc->npc_nbuf; 364 npf_flow_t flow; 365 npf_conn_t *con; 366 bool ok; 367 368 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 369 if (!npf_conn_trackable_p(npc)) { 370 return NULL; 371 } 372 373 /* Query ALG which may lookup connection for us. */ 374 if ((con = npf_alg_conn(npc, di)) != NULL) { 375 /* Note: reference is held. */ 376 return con; 377 } 378 if (nbuf_head_mbuf(nbuf) == NULL) { 379 *error = ENOMEM; 380 return NULL; 381 } 382 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 383 384 /* The main lookup of the connection (acquires a reference). */ 385 if ((con = npf_conn_lookup(npc, di, &flow)) == NULL) { 386 return NULL; 387 } 388 389 /* Inspect the protocol data and handle state changes. */ 390 mutex_enter(&con->c_lock); 391 ok = npf_state_inspect(npc, &con->c_state, flow); 392 mutex_exit(&con->c_lock); 393 394 /* If invalid state: let the rules deal with it. */ 395 if (__predict_false(!ok)) { 396 npf_conn_release(con); 397 npf_stats_inc(npc->npc_ctx, NPF_STAT_INVALID_STATE); 398 return NULL; 399 } 400 #if 0 401 /* 402 * TODO -- determine when this might be wanted/used. 403 * 404 * Note: skipping the connection lookup and ruleset inspection 405 * on other interfaces will also bypass dynamic NAT. 406 */ 407 if (atomic_load_relaxed(&con->c_flags) & CONN_GPASS) { 408 /* 409 * Note: if tagging fails, then give this packet a chance 410 * to go through a regular ruleset. 411 */ 412 (void)nbuf_add_tag(nbuf, NPF_NTAG_PASS); 413 } 414 #endif 415 return con; 416 } 417 418 /* 419 * npf_conn_establish: create a new connection, insert into the global list. 420 * 421 * => Connection is created with the reference held for the caller. 422 * => Connection will be activated on the first reference release. 423 */ 424 npf_conn_t * 425 npf_conn_establish(npf_cache_t *npc, const unsigned di, bool global) 426 { 427 npf_t *npf = npc->npc_ctx; 428 const unsigned alen = npc->npc_alen; 429 const unsigned idx = NPF_CONNCACHE(alen); 430 const nbuf_t *nbuf = npc->npc_nbuf; 431 npf_connkey_t *fw, *bk; 432 npf_conndb_t *conn_db; 433 npf_conn_t *con; 434 int error = 0; 435 436 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 437 438 if (!npf_conn_trackable_p(npc)) { 439 return NULL; 440 } 441 442 /* Allocate and initialize the new connection. */ 443 con = pool_cache_get(npf->conn_cache[idx], PR_NOWAIT); 444 if (__predict_false(!con)) { 445 npf_worker_signal(npf); 446 return NULL; 447 } 448 NPF_PRINTF(("NPF: create conn %p\n", con)); 449 npf_stats_inc(npf, NPF_STAT_CONN_CREATE); 450 451 mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET); 452 atomic_store_relaxed(&con->c_flags, di & PFIL_ALL); 453 atomic_store_relaxed(&con->c_refcnt, 0); 454 con->c_rproc = NULL; 455 con->c_nat = NULL; 456 457 con->c_proto = npc->npc_proto; 458 CTASSERT(sizeof(con->c_proto) >= sizeof(npc->npc_proto)); 459 con->c_alen = alen; 460 461 /* Initialize the protocol state. */ 462 if (!npf_state_init(npc, &con->c_state)) { 463 npf_conn_destroy(npf, con); 464 return NULL; 465 } 466 KASSERT(npf_iscached(npc, NPC_IP46)); 467 468 fw = npf_conn_getforwkey(con); 469 bk = npf_conn_getbackkey(con, alen); 470 471 /* 472 * Construct "forwards" and "backwards" keys. Also, set the 473 * interface ID for this connection (unless it is global). 474 */ 475 if (!npf_conn_conkey(npc, fw, di, NPF_FLOW_FORW) || 476 !npf_conn_conkey(npc, bk, di ^ PFIL_ALL, NPF_FLOW_BACK)) { 477 npf_conn_destroy(npf, con); 478 return NULL; 479 } 480 con->c_ifid = global ? nbuf->nb_ifid : 0; 481 482 /* 483 * Set last activity time for a new connection and acquire 484 * a reference for the caller before we make it visible. 485 */ 486 conn_update_atime(con); 487 atomic_store_relaxed(&con->c_refcnt, 1); 488 489 /* 490 * Insert both keys (entries representing directions) of the 491 * connection. At this point it becomes visible, but we activate 492 * the connection later. 493 */ 494 mutex_enter(&con->c_lock); 495 conn_db = atomic_load_relaxed(&npf->conn_db); 496 if (!npf_conndb_insert(conn_db, fw, con, NPF_FLOW_FORW)) { 497 error = EISCONN; 498 goto err; 499 } 500 if (!npf_conndb_insert(conn_db, bk, con, NPF_FLOW_BACK)) { 501 npf_conn_t *ret __diagused; 502 ret = npf_conndb_remove(conn_db, fw); 503 KASSERT(ret == con); 504 error = EISCONN; 505 goto err; 506 } 507 err: 508 /* 509 * If we have hit the duplicate: mark the connection as expired 510 * and let the G/C thread to take care of it. We cannot do it 511 * here since there might be references acquired already. 512 */ 513 if (error) { 514 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE); 515 atomic_dec_uint(&con->c_refcnt); 516 npf_stats_inc(npf, NPF_STAT_RACE_CONN); 517 } else { 518 NPF_PRINTF(("NPF: establish conn %p\n", con)); 519 } 520 521 /* Finally, insert into the connection list. */ 522 npf_conndb_enqueue(conn_db, con); 523 mutex_exit(&con->c_lock); 524 525 return error ? NULL : con; 526 } 527 528 void 529 npf_conn_destroy(npf_t *npf, npf_conn_t *con) 530 { 531 const unsigned idx __unused = NPF_CONNCACHE(con->c_alen); 532 533 KASSERT(atomic_load_relaxed(&con->c_refcnt) == 0); 534 535 if (con->c_nat) { 536 /* Release any NAT structures. */ 537 npf_nat_destroy(con, con->c_nat); 538 } 539 if (con->c_rproc) { 540 /* Release the rule procedure. */ 541 npf_rproc_release(con->c_rproc); 542 } 543 544 /* Destroy the state. */ 545 npf_state_destroy(&con->c_state); 546 mutex_destroy(&con->c_lock); 547 548 /* Free the structure, increase the counter. */ 549 pool_cache_put(npf->conn_cache[idx], con); 550 npf_stats_inc(npf, NPF_STAT_CONN_DESTROY); 551 NPF_PRINTF(("NPF: conn %p destroyed\n", con)); 552 } 553 554 /* 555 * npf_conn_setnat: associate NAT entry with the connection, update and 556 * re-insert connection entry using the translation values. 557 * 558 * => The caller must be holding a reference. 559 */ 560 int 561 npf_conn_setnat(const npf_cache_t *npc, npf_conn_t *con, 562 npf_nat_t *nt, unsigned ntype) 563 { 564 static const unsigned nat_type_which[] = { 565 /* See the description in npf_nat_which(). */ 566 [NPF_NATOUT] = NPF_DST, 567 [NPF_NATIN] = NPF_SRC, 568 }; 569 npf_t *npf = npc->npc_ctx; 570 npf_conn_t *ret __diagused; 571 npf_conndb_t *conn_db; 572 npf_connkey_t *bk; 573 npf_addr_t *taddr; 574 in_port_t tport; 575 uint32_t flags; 576 577 KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0); 578 579 npf_nat_gettrans(nt, &taddr, &tport); 580 KASSERT(ntype == NPF_NATOUT || ntype == NPF_NATIN); 581 582 /* Acquire the lock and check for the races. */ 583 mutex_enter(&con->c_lock); 584 flags = atomic_load_relaxed(&con->c_flags); 585 if (__predict_false(flags & CONN_EXPIRE)) { 586 /* The connection got expired. */ 587 mutex_exit(&con->c_lock); 588 return EINVAL; 589 } 590 KASSERT((flags & CONN_REMOVED) == 0); 591 592 if (__predict_false(con->c_nat != NULL)) { 593 /* Race with a duplicate packet. */ 594 mutex_exit(&con->c_lock); 595 npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT); 596 return EISCONN; 597 } 598 599 /* Remove the "backwards" key. */ 600 conn_db = atomic_load_relaxed(&npf->conn_db); 601 bk = npf_conn_getbackkey(con, con->c_alen); 602 ret = npf_conndb_remove(conn_db, bk); 603 KASSERT(ret == con); 604 605 /* Set the source/destination IDs to the translation values. */ 606 npf_conn_adjkey(bk, taddr, tport, nat_type_which[ntype]); 607 608 /* Finally, re-insert the "backwards" key. */ 609 if (!npf_conndb_insert(conn_db, bk, con, NPF_FLOW_BACK)) { 610 /* 611 * Race: we have hit the duplicate, remove the "forwards" 612 * key and expire our connection; it is no longer valid. 613 */ 614 npf_connkey_t *fw = npf_conn_getforwkey(con); 615 ret = npf_conndb_remove(conn_db, fw); 616 KASSERT(ret == con); 617 618 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE); 619 mutex_exit(&con->c_lock); 620 621 npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT); 622 return EISCONN; 623 } 624 625 /* Associate the NAT entry and release the lock. */ 626 con->c_nat = nt; 627 mutex_exit(&con->c_lock); 628 return 0; 629 } 630 631 /* 632 * npf_conn_expire: explicitly mark connection as expired. 633 * 634 * => Must be called with: a) reference held b) the relevant lock held. 635 * The relevant lock should prevent from connection destruction, e.g. 636 * npf_t::conn_lock or npf_natpolicy_t::n_lock. 637 */ 638 void 639 npf_conn_expire(npf_conn_t *con) 640 { 641 atomic_or_uint(&con->c_flags, CONN_EXPIRE); 642 } 643 644 /* 645 * npf_conn_pass: return true if connection is "pass" one, otherwise false. 646 */ 647 bool 648 npf_conn_pass(const npf_conn_t *con, npf_match_info_t *mi, npf_rproc_t **rp) 649 { 650 KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0); 651 if (__predict_true(atomic_load_relaxed(&con->c_flags) & CONN_PASS)) { 652 mi->mi_retfl = atomic_load_relaxed(&con->c_retfl); 653 mi->mi_rid = con->c_rid; 654 *rp = con->c_rproc; 655 return true; 656 } 657 return false; 658 } 659 660 /* 661 * npf_conn_setpass: mark connection as a "pass" one and associate the 662 * rule procedure with it. 663 */ 664 void 665 npf_conn_setpass(npf_conn_t *con, const npf_match_info_t *mi, npf_rproc_t *rp) 666 { 667 KASSERT((atomic_load_relaxed(&con->c_flags) & CONN_ACTIVE) == 0); 668 KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0); 669 KASSERT(con->c_rproc == NULL); 670 671 /* 672 * No need for atomic since the connection is not yet active. 673 * If rproc is set, the caller transfers its reference to us, 674 * which will be released on npf_conn_destroy(). 675 */ 676 atomic_or_uint(&con->c_flags, CONN_PASS); 677 con->c_rproc = rp; 678 if (rp) { 679 con->c_rid = mi->mi_rid; 680 con->c_retfl = mi->mi_retfl; 681 } 682 } 683 684 /* 685 * npf_conn_release: release a reference, which might allow G/C thread 686 * to destroy this connection. 687 */ 688 void 689 npf_conn_release(npf_conn_t *con) 690 { 691 const unsigned flags = atomic_load_relaxed(&con->c_flags); 692 693 if ((flags & (CONN_ACTIVE | CONN_EXPIRE)) == 0) { 694 /* Activate: after this, connection is globally visible. */ 695 atomic_or_uint(&con->c_flags, CONN_ACTIVE); 696 } 697 KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0); 698 atomic_dec_uint(&con->c_refcnt); 699 } 700 701 /* 702 * npf_conn_getnat: return the associated NAT entry, if any. 703 */ 704 npf_nat_t * 705 npf_conn_getnat(const npf_conn_t *con) 706 { 707 return con->c_nat; 708 } 709 710 /* 711 * npf_conn_expired: criterion to check if connection is expired. 712 */ 713 bool 714 npf_conn_expired(npf_t *npf, const npf_conn_t *con, uint64_t tsnow) 715 { 716 const unsigned flags = atomic_load_relaxed(&con->c_flags); 717 const int etime = npf_state_etime(npf, &con->c_state, con->c_proto); 718 int elapsed; 719 720 if (__predict_false(flags & CONN_EXPIRE)) { 721 /* Explicitly marked to be expired. */ 722 return true; 723 } 724 725 /* 726 * Note: another thread may update 'atime' and it might 727 * become greater than 'now'. 728 */ 729 elapsed = (int64_t)tsnow - atomic_load_relaxed(&con->c_atime); 730 return elapsed > etime; 731 } 732 733 /* 734 * npf_conn_remove: unlink the connection and mark as expired. 735 */ 736 void 737 npf_conn_remove(npf_conndb_t *cd, npf_conn_t *con) 738 { 739 /* Remove both entries of the connection. */ 740 mutex_enter(&con->c_lock); 741 if ((atomic_load_relaxed(&con->c_flags) & CONN_REMOVED) == 0) { 742 npf_connkey_t *fw, *bk; 743 npf_conn_t *ret __diagused; 744 745 fw = npf_conn_getforwkey(con); 746 ret = npf_conndb_remove(cd, fw); 747 KASSERT(ret == con); 748 749 bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw)); 750 ret = npf_conndb_remove(cd, bk); 751 KASSERT(ret == con); 752 } 753 754 /* Flag the removal and expiration. */ 755 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE); 756 mutex_exit(&con->c_lock); 757 } 758 759 /* 760 * npf_conn_worker: G/C to run from a worker thread or via npfk_gc(). 761 */ 762 void 763 npf_conn_worker(npf_t *npf) 764 { 765 npf_conndb_t *conn_db = atomic_load_relaxed(&npf->conn_db); 766 npf_conndb_gc(npf, conn_db, false, true); 767 } 768 769 /* 770 * npf_conndb_export: construct a list of connections prepared for saving. 771 * Note: this is expected to be an expensive operation. 772 */ 773 int 774 npf_conndb_export(npf_t *npf, nvlist_t *nvl) 775 { 776 npf_conn_t *head, *con; 777 npf_conndb_t *conn_db; 778 779 /* 780 * Note: acquire conn_lock to prevent from the database 781 * destruction and G/C thread. 782 */ 783 mutex_enter(&npf->conn_lock); 784 if (atomic_load_relaxed(&npf->conn_tracking) != CONN_TRACKING_ON) { 785 mutex_exit(&npf->conn_lock); 786 return 0; 787 } 788 conn_db = atomic_load_relaxed(&npf->conn_db); 789 head = npf_conndb_getlist(conn_db); 790 con = head; 791 while (con) { 792 nvlist_t *con_nvl; 793 794 con_nvl = nvlist_create(0); 795 if (npf_conn_export(npf, con, con_nvl) == 0) { 796 nvlist_append_nvlist_array(nvl, "conn-list", con_nvl); 797 } 798 nvlist_destroy(con_nvl); 799 800 if ((con = npf_conndb_getnext(conn_db, con)) == head) { 801 break; 802 } 803 } 804 mutex_exit(&npf->conn_lock); 805 return 0; 806 } 807 808 /* 809 * npf_conn_export: serialize a single connection. 810 */ 811 static int 812 npf_conn_export(npf_t *npf, npf_conn_t *con, nvlist_t *nvl) 813 { 814 nvlist_t *knvl; 815 npf_connkey_t *fw, *bk; 816 unsigned flags, alen; 817 818 flags = atomic_load_relaxed(&con->c_flags); 819 if ((flags & (CONN_ACTIVE|CONN_EXPIRE)) != CONN_ACTIVE) { 820 return ESRCH; 821 } 822 nvlist_add_number(nvl, "flags", flags); 823 nvlist_add_number(nvl, "proto", con->c_proto); 824 if (con->c_ifid) { 825 char ifname[IFNAMSIZ]; 826 npf_ifmap_copyname(npf, con->c_ifid, ifname, sizeof(ifname)); 827 nvlist_add_string(nvl, "ifname", ifname); 828 } 829 nvlist_add_binary(nvl, "state", &con->c_state, sizeof(npf_state_t)); 830 831 fw = npf_conn_getforwkey(con); 832 alen = NPF_CONNKEY_ALEN(fw); 833 KASSERT(alen == con->c_alen); 834 bk = npf_conn_getbackkey(con, alen); 835 836 knvl = npf_connkey_export(npf, fw); 837 nvlist_move_nvlist(nvl, "forw-key", knvl); 838 839 knvl = npf_connkey_export(npf, bk); 840 nvlist_move_nvlist(nvl, "back-key", knvl); 841 842 /* Let the address length be based on on first key. */ 843 nvlist_add_number(nvl, "alen", alen); 844 845 if (con->c_nat) { 846 npf_nat_export(npf, con->c_nat, nvl); 847 } 848 return 0; 849 } 850 851 /* 852 * npf_conn_import: fully reconstruct a single connection from a 853 * nvlist and insert into the given database. 854 */ 855 int 856 npf_conn_import(npf_t *npf, npf_conndb_t *cd, const nvlist_t *cdict, 857 npf_ruleset_t *natlist) 858 { 859 npf_conn_t *con; 860 npf_connkey_t *fw, *bk; 861 const nvlist_t *nat, *conkey; 862 unsigned flags, alen, idx; 863 const char *ifname; 864 const void *state; 865 size_t len; 866 867 /* 868 * To determine the length of the connection, which depends 869 * on the address length in the connection keys. 870 */ 871 alen = dnvlist_get_number(cdict, "alen", 0); 872 idx = NPF_CONNCACHE(alen); 873 874 /* Allocate a connection and initialize it (clear first). */ 875 con = pool_cache_get(npf->conn_cache[idx], PR_WAITOK); 876 memset(con, 0, sizeof(npf_conn_t)); 877 mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET); 878 npf_stats_inc(npf, NPF_STAT_CONN_CREATE); 879 880 con->c_proto = dnvlist_get_number(cdict, "proto", 0); 881 flags = dnvlist_get_number(cdict, "flags", 0); 882 flags &= PFIL_ALL | CONN_ACTIVE | CONN_PASS; 883 atomic_store_relaxed(&con->c_flags, flags); 884 conn_update_atime(con); 885 886 ifname = dnvlist_get_string(cdict, "ifname", NULL); 887 if (ifname && (con->c_ifid = npf_ifmap_register(npf, ifname)) == 0) { 888 goto err; 889 } 890 891 state = dnvlist_get_binary(cdict, "state", &len, NULL, 0); 892 if (!state || len != sizeof(npf_state_t)) { 893 goto err; 894 } 895 memcpy(&con->c_state, state, sizeof(npf_state_t)); 896 897 /* Reconstruct NAT association, if any. */ 898 if ((nat = dnvlist_get_nvlist(cdict, "nat", NULL)) != NULL && 899 (con->c_nat = npf_nat_import(npf, nat, natlist, con)) == NULL) { 900 goto err; 901 } 902 903 /* 904 * Fetch and copy the keys for each direction. 905 */ 906 fw = npf_conn_getforwkey(con); 907 conkey = dnvlist_get_nvlist(cdict, "forw-key", NULL); 908 if (conkey == NULL || !npf_connkey_import(npf, conkey, fw)) { 909 goto err; 910 } 911 bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw)); 912 conkey = dnvlist_get_nvlist(cdict, "back-key", NULL); 913 if (conkey == NULL || !npf_connkey_import(npf, conkey, bk)) { 914 goto err; 915 } 916 917 /* Guard against the contradicting address lengths. */ 918 if (NPF_CONNKEY_ALEN(fw) != alen || NPF_CONNKEY_ALEN(bk) != alen) { 919 goto err; 920 } 921 922 /* Insert the entries and the connection itself. */ 923 if (!npf_conndb_insert(cd, fw, con, NPF_FLOW_FORW)) { 924 goto err; 925 } 926 if (!npf_conndb_insert(cd, bk, con, NPF_FLOW_BACK)) { 927 npf_conndb_remove(cd, fw); 928 goto err; 929 } 930 931 NPF_PRINTF(("NPF: imported conn %p\n", con)); 932 npf_conndb_enqueue(cd, con); 933 return 0; 934 err: 935 npf_conn_destroy(npf, con); 936 return EINVAL; 937 } 938 939 /* 940 * npf_conn_find: lookup a connection in the list of connections 941 */ 942 int 943 npf_conn_find(npf_t *npf, const nvlist_t *req, nvlist_t *resp) 944 { 945 const nvlist_t *key_nv; 946 npf_conn_t *con; 947 npf_connkey_t key; 948 npf_flow_t flow; 949 int error; 950 951 key_nv = dnvlist_get_nvlist(req, "key", NULL); 952 if (!key_nv || !npf_connkey_import(npf, key_nv, &key)) { 953 return EINVAL; 954 } 955 con = npf_conndb_lookup(npf, &key, &flow); 956 if (con == NULL) { 957 return ESRCH; 958 } 959 if (!npf_conn_check(con, NULL, 0, NPF_FLOW_FORW)) { 960 atomic_dec_uint(&con->c_refcnt); 961 return ESRCH; 962 } 963 error = npf_conn_export(npf, con, resp); 964 nvlist_add_number(resp, "flow", flow); 965 atomic_dec_uint(&con->c_refcnt); 966 return error; 967 } 968 969 #if defined(DDB) || defined(_NPF_TESTING) 970 971 void 972 npf_conn_print(npf_conn_t *con) 973 { 974 const npf_connkey_t *fw = npf_conn_getforwkey(con); 975 const npf_connkey_t *bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw)); 976 const unsigned flags = atomic_load_relaxed(&con->c_flags); 977 const unsigned proto = con->c_proto; 978 struct timespec tspnow; 979 980 getnanouptime(&tspnow); 981 printf("%p:\n\tproto %d flags 0x%x tsdiff %ld etime %d\n", con, 982 proto, flags, (long)(tspnow.tv_sec - con->c_atime), 983 npf_state_etime(npf_getkernctx(), &con->c_state, proto)); 984 npf_connkey_print(fw); 985 npf_connkey_print(bk); 986 npf_state_dump(&con->c_state); 987 if (con->c_nat) { 988 npf_nat_dump(con->c_nat); 989 } 990 } 991 992 #endif 993