1 /*- 2 * Copyright (c) 2014-2018 Mindaugas Rasiukevicius <rmind at netbsd org> 3 * Copyright (c) 2010-2014 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This material is based upon work partially supported by The 7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * NPF connection tracking for stateful filtering and translation. 33 * 34 * Overview 35 * 36 * Packets can be incoming or outgoing with respect to an interface. 37 * Connection direction is identified by the direction of its first 38 * packet. The meaning of incoming/outgoing packet in the context of 39 * connection direction can be confusing. Therefore, we will use the 40 * terms "forwards stream" and "backwards stream", where packets in 41 * the forwards stream mean the packets travelling in the direction 42 * as the connection direction. 43 * 44 * All connections have two keys and thus two entries: 45 * 46 * - npf_conn_getforwkey(con) -- for the forwards stream; 47 * - npf_conn_getbackkey(con, alen) -- for the backwards stream. 48 * 49 * Note: the keys are stored in npf_conn_t::c_keys[], which is used 50 * to allocate variable-length npf_conn_t structures based on whether 51 * the IPv4 or IPv6 addresses are used. See the npf_connkey.c source 52 * file for the description of the key layouts. 53 * 54 * The keys are formed from the 5-tuple (source/destination address, 55 * source/destination port and the protocol). Additional matching 56 * is performed for the interface (a common behaviour is equivalent 57 * to the 6-tuple lookup including the interface ID). Note that the 58 * key may be formed using translated values in a case of NAT. 59 * 60 * Connections can serve two purposes: for the implicit passing or 61 * to accommodate the dynamic NAT. Connections for the former purpose 62 * are created by the rules with "stateful" attribute and are used for 63 * stateful filtering. Such connections indicate that the packet of 64 * the backwards stream should be passed without inspection of the 65 * ruleset. The other purpose is to associate a dynamic NAT mechanism 66 * with a connection. Such connections are created by the NAT policies 67 * and they have a relationship with NAT translation structure via 68 * npf_conn_t::c_nat. A single connection can serve both purposes, 69 * which is a common case. 70 * 71 * Connection life-cycle 72 * 73 * Connections are established when a packet matches said rule or 74 * NAT policy. Both keys of the established connection are inserted 75 * into the connection database. A garbage collection thread 76 * periodically scans all connections and depending on connection 77 * properties (e.g. last activity time, protocol) removes connection 78 * entries and expires the actual connections. 79 * 80 * Each connection has a reference count. The reference is acquired 81 * on lookup and should be released by the caller. It guarantees that 82 * the connection will not be destroyed, although it may be expired. 83 * 84 * Synchronisation 85 * 86 * Connection database is accessed in a lock-less manner by the main 87 * routines: npf_conn_inspect() and npf_conn_establish(). Since they 88 * are always called from a software interrupt, the database is 89 * protected using passive serialisation. The main place which can 90 * destroy a connection is npf_conn_worker(). The database itself 91 * can be replaced and destroyed in npf_conn_reload(). 92 * 93 * ALG support 94 * 95 * Application-level gateways (ALGs) can override generic connection 96 * inspection (npf_alg_conn() call in npf_conn_inspect() function) by 97 * performing their own lookup using different key. Recursive call 98 * to npf_conn_inspect() is not allowed. The ALGs ought to use the 99 * npf_conn_lookup() function for this purpose. 100 * 101 * Lock order 102 * 103 * npf_config_lock -> 104 * conn_lock -> 105 * npf_conn_t::c_lock 106 */ 107 108 #ifdef _KERNEL 109 #include <sys/cdefs.h> 110 __KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.30 2019/09/29 17:00:29 rmind Exp $"); 111 112 #include <sys/param.h> 113 #include <sys/types.h> 114 115 #include <netinet/in.h> 116 #include <netinet/tcp.h> 117 118 #include <sys/atomic.h> 119 #include <sys/kmem.h> 120 #include <sys/mutex.h> 121 #include <net/pfil.h> 122 #include <sys/pool.h> 123 #include <sys/queue.h> 124 #include <sys/systm.h> 125 #endif 126 127 #define __NPF_CONN_PRIVATE 128 #include "npf_conn.h" 129 #include "npf_impl.h" 130 131 /* A helper to select the IPv4 or IPv6 connection cache. */ 132 #define NPF_CONNCACHE(alen) (((alen) >> 4) & 0x1) 133 134 /* 135 * Connection flags: PFIL_IN and PFIL_OUT values are reserved for direction. 136 */ 137 CTASSERT(PFIL_ALL == (0x001 | 0x002)); 138 #define CONN_ACTIVE 0x004 /* visible on inspection */ 139 #define CONN_PASS 0x008 /* perform implicit passing */ 140 #define CONN_EXPIRE 0x010 /* explicitly expire */ 141 #define CONN_REMOVED 0x020 /* "forw/back" entries removed */ 142 143 enum { CONN_TRACKING_OFF, CONN_TRACKING_ON }; 144 145 static nvlist_t *npf_conn_export(npf_t *, npf_conn_t *); 146 147 /* 148 * npf_conn_sys{init,fini}: initialise/destroy connection tracking. 149 */ 150 151 void 152 npf_conn_init(npf_t *npf) 153 { 154 npf->conn_cache[0] = pool_cache_init( 155 offsetof(npf_conn_t, c_keys[NPF_CONNKEY_V4WORDS * 2]), 156 0, 0, 0, "npfcn4pl", NULL, IPL_NET, NULL, NULL, NULL); 157 npf->conn_cache[1] = pool_cache_init( 158 offsetof(npf_conn_t, c_keys[NPF_CONNKEY_V6WORDS * 2]), 159 0, 0, 0, "npfcn6pl", NULL, IPL_NET, NULL, NULL, NULL); 160 161 mutex_init(&npf->conn_lock, MUTEX_DEFAULT, IPL_NONE); 162 npf->conn_tracking = CONN_TRACKING_OFF; 163 npf->conn_db = npf_conndb_create(); 164 npf_conndb_sysinit(npf); 165 } 166 167 void 168 npf_conn_fini(npf_t *npf) 169 { 170 npf_conndb_sysfini(npf); 171 172 /* Note: the caller should have flushed the connections. */ 173 KASSERT(npf->conn_tracking == CONN_TRACKING_OFF); 174 npf_worker_unregister(npf, npf_conn_worker); 175 176 npf_conndb_destroy(npf->conn_db); 177 pool_cache_destroy(npf->conn_cache[0]); 178 pool_cache_destroy(npf->conn_cache[1]); 179 mutex_destroy(&npf->conn_lock); 180 } 181 182 /* 183 * npf_conn_load: perform the load by flushing the current connection 184 * database and replacing it with the new one or just destroying. 185 * 186 * => The caller must disable the connection tracking and ensure that 187 * there are no connection database lookups or references in-flight. 188 */ 189 void 190 npf_conn_load(npf_t *npf, npf_conndb_t *ndb, bool track) 191 { 192 npf_conndb_t *odb = NULL; 193 194 KASSERT(npf_config_locked_p(npf)); 195 196 /* 197 * The connection database is in the quiescent state. 198 * Prevent G/C thread from running and install a new database. 199 */ 200 mutex_enter(&npf->conn_lock); 201 if (ndb) { 202 KASSERT(npf->conn_tracking == CONN_TRACKING_OFF); 203 odb = npf->conn_db; 204 npf->conn_db = ndb; 205 membar_sync(); 206 } 207 if (track) { 208 /* After this point lookups start flying in. */ 209 npf->conn_tracking = CONN_TRACKING_ON; 210 } 211 mutex_exit(&npf->conn_lock); 212 213 if (odb) { 214 /* 215 * Flush all, no sync since the caller did it for us. 216 * Also, release the pool cache memory. 217 */ 218 npf_conndb_gc(npf, odb, true, false); 219 npf_conndb_destroy(odb); 220 pool_cache_invalidate(npf->conn_cache[0]); 221 pool_cache_invalidate(npf->conn_cache[1]); 222 } 223 } 224 225 /* 226 * npf_conn_tracking: enable/disable connection tracking. 227 */ 228 void 229 npf_conn_tracking(npf_t *npf, bool track) 230 { 231 KASSERT(npf_config_locked_p(npf)); 232 npf->conn_tracking = track ? CONN_TRACKING_ON : CONN_TRACKING_OFF; 233 } 234 235 static inline bool 236 npf_conn_trackable_p(const npf_cache_t *npc) 237 { 238 const npf_t *npf = npc->npc_ctx; 239 240 /* 241 * Check if connection tracking is on. Also, if layer 3 and 4 are 242 * not cached - protocol is not supported or packet is invalid. 243 */ 244 if (npf->conn_tracking != CONN_TRACKING_ON) { 245 return false; 246 } 247 if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) { 248 return false; 249 } 250 return true; 251 } 252 253 static inline void 254 conn_update_atime(npf_conn_t *con) 255 { 256 struct timespec tsnow; 257 258 getnanouptime(&tsnow); 259 con->c_atime = tsnow.tv_sec; 260 } 261 262 /* 263 * npf_conn_check: check that: 264 * 265 * - the connection is active; 266 * 267 * - the packet is travelling in the right direction with the respect 268 * to the connection direction (if interface-id is not zero); 269 * 270 * - the packet is travelling on the same interface as the 271 * connection interface (if interface-id is not zero). 272 */ 273 static bool 274 npf_conn_check(const npf_conn_t *con, const nbuf_t *nbuf, 275 const unsigned di, const bool forw) 276 { 277 const uint32_t flags = con->c_flags; 278 const unsigned ifid = con->c_ifid; 279 bool active, pforw; 280 281 active = (flags & (CONN_ACTIVE | CONN_EXPIRE)) == CONN_ACTIVE; 282 if (__predict_false(!active)) { 283 return false; 284 } 285 if (ifid && nbuf) { 286 pforw = (flags & PFIL_ALL) == (unsigned)di; 287 if (__predict_false(forw != pforw)) { 288 return false; 289 } 290 if (__predict_false(ifid != nbuf->nb_ifid)) { 291 return false; 292 } 293 } 294 return true; 295 } 296 297 /* 298 * npf_conn_lookup: lookup if there is an established connection. 299 * 300 * => If found, we will hold a reference for the caller. 301 */ 302 npf_conn_t * 303 npf_conn_lookup(const npf_cache_t *npc, const int di, bool *forw) 304 { 305 npf_t *npf = npc->npc_ctx; 306 const nbuf_t *nbuf = npc->npc_nbuf; 307 npf_conn_t *con; 308 npf_connkey_t key; 309 310 /* Construct a key and lookup for a connection in the store. */ 311 if (!npf_conn_conkey(npc, &key, true)) { 312 return NULL; 313 } 314 con = npf_conndb_lookup(npf->conn_db, &key, forw); 315 if (con == NULL) { 316 return NULL; 317 } 318 KASSERT(npc->npc_proto == con->c_proto); 319 320 /* Extra checks for the connection and packet. */ 321 if (!npf_conn_check(con, nbuf, di, *forw)) { 322 atomic_dec_uint(&con->c_refcnt); 323 return NULL; 324 } 325 326 /* Update the last activity time. */ 327 conn_update_atime(con); 328 return con; 329 } 330 331 /* 332 * npf_conn_inspect: lookup a connection and inspecting the protocol data. 333 * 334 * => If found, we will hold a reference for the caller. 335 */ 336 npf_conn_t * 337 npf_conn_inspect(npf_cache_t *npc, const int di, int *error) 338 { 339 nbuf_t *nbuf = npc->npc_nbuf; 340 npf_conn_t *con; 341 bool forw, ok; 342 343 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 344 if (!npf_conn_trackable_p(npc)) { 345 return NULL; 346 } 347 348 /* Query ALG which may lookup connection for us. */ 349 if ((con = npf_alg_conn(npc, di)) != NULL) { 350 /* Note: reference is held. */ 351 return con; 352 } 353 if (nbuf_head_mbuf(nbuf) == NULL) { 354 *error = ENOMEM; 355 return NULL; 356 } 357 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 358 359 /* Main lookup of the connection. */ 360 if ((con = npf_conn_lookup(npc, di, &forw)) == NULL) { 361 return NULL; 362 } 363 364 /* Inspect the protocol data and handle state changes. */ 365 mutex_enter(&con->c_lock); 366 ok = npf_state_inspect(npc, &con->c_state, forw); 367 mutex_exit(&con->c_lock); 368 369 /* If invalid state: let the rules deal with it. */ 370 if (__predict_false(!ok)) { 371 npf_conn_release(con); 372 npf_stats_inc(npc->npc_ctx, NPF_STAT_INVALID_STATE); 373 return NULL; 374 } 375 376 /* 377 * If this is multi-end state, then specially tag the packet 378 * so it will be just passed-through on other interfaces. 379 */ 380 if (con->c_ifid == 0 && nbuf_add_tag(nbuf, NPF_NTAG_PASS) != 0) { 381 npf_conn_release(con); 382 *error = ENOMEM; 383 return NULL; 384 } 385 return con; 386 } 387 388 /* 389 * npf_conn_establish: create a new connection, insert into the global list. 390 * 391 * => Connection is created with the reference held for the caller. 392 * => Connection will be activated on the first reference release. 393 */ 394 npf_conn_t * 395 npf_conn_establish(npf_cache_t *npc, int di, bool global) 396 { 397 npf_t *npf = npc->npc_ctx; 398 const unsigned alen = npc->npc_alen; 399 const unsigned idx = NPF_CONNCACHE(alen); 400 const nbuf_t *nbuf = npc->npc_nbuf; 401 npf_connkey_t *fw, *bk; 402 npf_conn_t *con; 403 int error = 0; 404 405 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 406 407 if (!npf_conn_trackable_p(npc)) { 408 return NULL; 409 } 410 411 /* Allocate and initialise the new connection. */ 412 con = pool_cache_get(npf->conn_cache[idx], PR_NOWAIT); 413 if (__predict_false(!con)) { 414 npf_worker_signal(npf); 415 return NULL; 416 } 417 NPF_PRINTF(("NPF: create conn %p\n", con)); 418 npf_stats_inc(npf, NPF_STAT_CONN_CREATE); 419 420 mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET); 421 con->c_flags = (di & PFIL_ALL); 422 con->c_refcnt = 0; 423 con->c_rproc = NULL; 424 con->c_nat = NULL; 425 426 con->c_proto = npc->npc_proto; 427 CTASSERT(sizeof(con->c_proto) >= sizeof(npc->npc_proto)); 428 con->c_alen = alen; 429 430 /* Initialize the protocol state. */ 431 if (!npf_state_init(npc, &con->c_state)) { 432 npf_conn_destroy(npf, con); 433 return NULL; 434 } 435 KASSERT(npf_iscached(npc, NPC_IP46)); 436 437 fw = npf_conn_getforwkey(con); 438 bk = npf_conn_getbackkey(con, alen); 439 440 /* 441 * Construct "forwards" and "backwards" keys. Also, set the 442 * interface ID for this connection (unless it is global). 443 */ 444 if (!npf_conn_conkey(npc, fw, true) || 445 !npf_conn_conkey(npc, bk, false)) { 446 npf_conn_destroy(npf, con); 447 return NULL; 448 } 449 con->c_ifid = global ? nbuf->nb_ifid : 0; 450 451 /* 452 * Set last activity time for a new connection and acquire 453 * a reference for the caller before we make it visible. 454 */ 455 conn_update_atime(con); 456 con->c_refcnt = 1; 457 458 /* 459 * Insert both keys (entries representing directions) of the 460 * connection. At this point it becomes visible, but we activate 461 * the connection later. 462 */ 463 mutex_enter(&con->c_lock); 464 if (!npf_conndb_insert(npf->conn_db, fw, con, true)) { 465 error = EISCONN; 466 goto err; 467 } 468 if (!npf_conndb_insert(npf->conn_db, bk, con, false)) { 469 npf_conn_t *ret __diagused; 470 ret = npf_conndb_remove(npf->conn_db, fw); 471 KASSERT(ret == con); 472 error = EISCONN; 473 goto err; 474 } 475 err: 476 /* 477 * If we have hit the duplicate: mark the connection as expired 478 * and let the G/C thread to take care of it. We cannot do it 479 * here since there might be references acquired already. 480 */ 481 if (error) { 482 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE); 483 atomic_dec_uint(&con->c_refcnt); 484 npf_stats_inc(npf, NPF_STAT_RACE_CONN); 485 } else { 486 NPF_PRINTF(("NPF: establish conn %p\n", con)); 487 } 488 489 /* Finally, insert into the connection list. */ 490 npf_conndb_enqueue(npf->conn_db, con); 491 mutex_exit(&con->c_lock); 492 493 return error ? NULL : con; 494 } 495 496 void 497 npf_conn_destroy(npf_t *npf, npf_conn_t *con) 498 { 499 const unsigned idx __unused = NPF_CONNCACHE(con->c_alen); 500 501 KASSERT(con->c_refcnt == 0); 502 503 if (con->c_nat) { 504 /* Release any NAT structures. */ 505 npf_nat_destroy(con->c_nat); 506 } 507 if (con->c_rproc) { 508 /* Release the rule procedure. */ 509 npf_rproc_release(con->c_rproc); 510 } 511 512 /* Destroy the state. */ 513 npf_state_destroy(&con->c_state); 514 mutex_destroy(&con->c_lock); 515 516 /* Free the structure, increase the counter. */ 517 pool_cache_put(npf->conn_cache[idx], con); 518 npf_stats_inc(npf, NPF_STAT_CONN_DESTROY); 519 NPF_PRINTF(("NPF: conn %p destroyed\n", con)); 520 } 521 522 /* 523 * npf_conn_setnat: associate NAT entry with the connection, update and 524 * re-insert connection entry using the translation values. 525 * 526 * => The caller must be holding a reference. 527 */ 528 int 529 npf_conn_setnat(const npf_cache_t *npc, npf_conn_t *con, 530 npf_nat_t *nt, unsigned ntype) 531 { 532 static const u_int nat_type_dimap[] = { 533 [NPF_NATOUT] = NPF_DST, 534 [NPF_NATIN] = NPF_SRC, 535 }; 536 npf_t *npf = npc->npc_ctx; 537 npf_connkey_t key, *fw, *bk; 538 npf_conn_t *ret __diagused; 539 npf_addr_t *taddr; 540 in_port_t tport; 541 542 KASSERT(con->c_refcnt > 0); 543 544 npf_nat_gettrans(nt, &taddr, &tport); 545 KASSERT(ntype == NPF_NATOUT || ntype == NPF_NATIN); 546 547 /* Construct a "backwards" key. */ 548 if (!npf_conn_conkey(npc, &key, false)) { 549 return EINVAL; 550 } 551 552 /* Acquire the lock and check for the races. */ 553 mutex_enter(&con->c_lock); 554 if (__predict_false(con->c_flags & CONN_EXPIRE)) { 555 /* The connection got expired. */ 556 mutex_exit(&con->c_lock); 557 return EINVAL; 558 } 559 KASSERT((con->c_flags & CONN_REMOVED) == 0); 560 561 if (__predict_false(con->c_nat != NULL)) { 562 /* Race with a duplicate packet. */ 563 mutex_exit(&con->c_lock); 564 npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT); 565 return EISCONN; 566 } 567 568 /* Remove the "backwards" key. */ 569 fw = npf_conn_getforwkey(con); 570 bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw)); 571 ret = npf_conndb_remove(npf->conn_db, bk); 572 KASSERT(ret == con); 573 574 /* Set the source/destination IDs to the translation values. */ 575 npf_conn_adjkey(bk, taddr, tport, nat_type_dimap[ntype]); 576 577 /* Finally, re-insert the "backwards" key. */ 578 if (!npf_conndb_insert(npf->conn_db, bk, con, false)) { 579 /* 580 * Race: we have hit the duplicate, remove the "forwards" 581 * key and expire our connection; it is no longer valid. 582 */ 583 ret = npf_conndb_remove(npf->conn_db, fw); 584 KASSERT(ret == con); 585 586 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE); 587 mutex_exit(&con->c_lock); 588 589 npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT); 590 return EISCONN; 591 } 592 593 /* Associate the NAT entry and release the lock. */ 594 con->c_nat = nt; 595 mutex_exit(&con->c_lock); 596 return 0; 597 } 598 599 /* 600 * npf_conn_expire: explicitly mark connection as expired. 601 */ 602 void 603 npf_conn_expire(npf_conn_t *con) 604 { 605 /* KASSERT(con->c_refcnt > 0); XXX: npf_nat_freepolicy() */ 606 atomic_or_uint(&con->c_flags, CONN_EXPIRE); 607 } 608 609 /* 610 * npf_conn_pass: return true if connection is "pass" one, otherwise false. 611 */ 612 bool 613 npf_conn_pass(const npf_conn_t *con, npf_match_info_t *mi, npf_rproc_t **rp) 614 { 615 KASSERT(con->c_refcnt > 0); 616 if (__predict_true(con->c_flags & CONN_PASS)) { 617 mi->mi_rid = con->c_rid; 618 mi->mi_retfl = con->c_retfl; 619 *rp = con->c_rproc; 620 return true; 621 } 622 return false; 623 } 624 625 /* 626 * npf_conn_setpass: mark connection as a "pass" one and associate the 627 * rule procedure with it. 628 */ 629 void 630 npf_conn_setpass(npf_conn_t *con, const npf_match_info_t *mi, npf_rproc_t *rp) 631 { 632 KASSERT((con->c_flags & CONN_ACTIVE) == 0); 633 KASSERT(con->c_refcnt > 0); 634 KASSERT(con->c_rproc == NULL); 635 636 /* 637 * No need for atomic since the connection is not yet active. 638 * If rproc is set, the caller transfers its reference to us, 639 * which will be released on npf_conn_destroy(). 640 */ 641 atomic_or_uint(&con->c_flags, CONN_PASS); 642 con->c_rproc = rp; 643 if (rp) { 644 con->c_rid = mi->mi_rid; 645 con->c_retfl = mi->mi_retfl; 646 } 647 } 648 649 /* 650 * npf_conn_release: release a reference, which might allow G/C thread 651 * to destroy this connection. 652 */ 653 void 654 npf_conn_release(npf_conn_t *con) 655 { 656 if ((con->c_flags & (CONN_ACTIVE | CONN_EXPIRE)) == 0) { 657 /* Activate: after this, connection is globally visible. */ 658 atomic_or_uint(&con->c_flags, CONN_ACTIVE); 659 } 660 KASSERT(con->c_refcnt > 0); 661 atomic_dec_uint(&con->c_refcnt); 662 } 663 664 /* 665 * npf_conn_getnat: return associated NAT data entry and indicate 666 * whether it is a "forwards" or "backwards" stream. 667 */ 668 npf_nat_t * 669 npf_conn_getnat(npf_conn_t *con, const int di, bool *forw) 670 { 671 KASSERT(con->c_refcnt > 0); 672 *forw = (con->c_flags & PFIL_ALL) == (u_int)di; 673 return con->c_nat; 674 } 675 676 /* 677 * npf_conn_expired: criterion to check if connection is expired. 678 */ 679 bool 680 npf_conn_expired(npf_t *npf, const npf_conn_t *con, uint64_t tsnow) 681 { 682 const int etime = npf_state_etime(npf, &con->c_state, con->c_proto); 683 int elapsed; 684 685 if (__predict_false(con->c_flags & CONN_EXPIRE)) { 686 /* Explicitly marked to be expired. */ 687 return true; 688 } 689 690 /* 691 * Note: another thread may update 'atime' and it might 692 * become greater than 'now'. 693 */ 694 elapsed = (int64_t)tsnow - con->c_atime; 695 return elapsed > etime; 696 } 697 698 /* 699 * npf_conn_remove: unlink the connection and mark as expired. 700 */ 701 void 702 npf_conn_remove(npf_conndb_t *cd, npf_conn_t *con) 703 { 704 /* Remove both entries of the connection. */ 705 mutex_enter(&con->c_lock); 706 if ((con->c_flags & CONN_REMOVED) == 0) { 707 npf_connkey_t *fw, *bk; 708 npf_conn_t *ret __diagused; 709 710 fw = npf_conn_getforwkey(con); 711 ret = npf_conndb_remove(cd, fw); 712 KASSERT(ret == con); 713 714 bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw)); 715 ret = npf_conndb_remove(cd, bk); 716 KASSERT(ret == con); 717 } 718 719 /* Flag the removal and expiration. */ 720 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE); 721 mutex_exit(&con->c_lock); 722 } 723 724 /* 725 * npf_conn_worker: G/C to run from a worker thread. 726 */ 727 void 728 npf_conn_worker(npf_t *npf) 729 { 730 npf_conndb_gc(npf, npf->conn_db, false, true); 731 } 732 733 /* 734 * npf_conndb_export: construct a list of connections prepared for saving. 735 * Note: this is expected to be an expensive operation. 736 */ 737 int 738 npf_conndb_export(npf_t *npf, nvlist_t *npf_dict) 739 { 740 npf_conn_t *head, *con; 741 742 /* 743 * Note: acquire conn_lock to prevent from the database 744 * destruction and G/C thread. 745 */ 746 mutex_enter(&npf->conn_lock); 747 if (npf->conn_tracking != CONN_TRACKING_ON) { 748 mutex_exit(&npf->conn_lock); 749 return 0; 750 } 751 head = npf_conndb_getlist(npf->conn_db); 752 con = head; 753 while (con) { 754 nvlist_t *cdict; 755 756 if ((cdict = npf_conn_export(npf, con)) != NULL) { 757 nvlist_append_nvlist_array(npf_dict, "conn-list", cdict); 758 nvlist_destroy(cdict); 759 } 760 if ((con = npf_conndb_getnext(npf->conn_db, con)) == head) { 761 break; 762 } 763 } 764 mutex_exit(&npf->conn_lock); 765 return 0; 766 } 767 768 /* 769 * npf_conn_export: serialise a single connection. 770 */ 771 static nvlist_t * 772 npf_conn_export(npf_t *npf, npf_conn_t *con) 773 { 774 nvlist_t *cdict, *kdict; 775 npf_connkey_t *fw, *bk; 776 unsigned alen; 777 778 if ((con->c_flags & (CONN_ACTIVE|CONN_EXPIRE)) != CONN_ACTIVE) { 779 return NULL; 780 } 781 cdict = nvlist_create(0); 782 nvlist_add_number(cdict, "flags", con->c_flags); 783 nvlist_add_number(cdict, "proto", con->c_proto); 784 if (con->c_ifid) { 785 char ifname[IFNAMSIZ]; 786 npf_ifmap_copyname(npf, con->c_ifid, ifname, sizeof(ifname)); 787 nvlist_add_string(cdict, "ifname", ifname); 788 } 789 nvlist_add_binary(cdict, "state", &con->c_state, sizeof(npf_state_t)); 790 791 fw = npf_conn_getforwkey(con); 792 alen = NPF_CONNKEY_ALEN(fw); 793 KASSERT(alen == con->c_alen); 794 bk = npf_conn_getbackkey(con, alen); 795 796 kdict = npf_connkey_export(fw); 797 nvlist_move_nvlist(cdict, "forw-key", kdict); 798 799 kdict = npf_connkey_export(bk); 800 nvlist_move_nvlist(cdict, "back-key", kdict); 801 802 /* Let the address length be based on on first key. */ 803 nvlist_add_number(cdict, "alen", alen); 804 805 if (con->c_nat) { 806 npf_nat_export(cdict, con->c_nat); 807 } 808 return cdict; 809 } 810 811 /* 812 * npf_conn_import: fully reconstruct a single connection from a 813 * nvlist and insert into the given database. 814 */ 815 int 816 npf_conn_import(npf_t *npf, npf_conndb_t *cd, const nvlist_t *cdict, 817 npf_ruleset_t *natlist) 818 { 819 npf_conn_t *con; 820 npf_connkey_t *fw, *bk; 821 const nvlist_t *nat, *conkey; 822 const char *ifname; 823 const void *state; 824 unsigned alen, idx; 825 size_t len; 826 827 /* 828 * To determine the length of the connection, which depends 829 * on the address length in the connection keys. 830 */ 831 alen = dnvlist_get_number(cdict, "alen", 0); 832 idx = NPF_CONNCACHE(alen); 833 834 /* Allocate a connection and initialise it (clear first). */ 835 con = pool_cache_get(npf->conn_cache[idx], PR_WAITOK); 836 memset(con, 0, sizeof(npf_conn_t)); 837 mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET); 838 npf_stats_inc(npf, NPF_STAT_CONN_CREATE); 839 840 con->c_proto = dnvlist_get_number(cdict, "proto", 0); 841 con->c_flags = dnvlist_get_number(cdict, "flags", 0); 842 con->c_flags &= PFIL_ALL | CONN_ACTIVE | CONN_PASS; 843 conn_update_atime(con); 844 845 ifname = dnvlist_get_string(cdict, "ifname", NULL); 846 if (ifname && (con->c_ifid = npf_ifmap_register(npf, ifname)) == 0) { 847 goto err; 848 } 849 850 state = dnvlist_get_binary(cdict, "state", &len, NULL, 0); 851 if (!state || len != sizeof(npf_state_t)) { 852 goto err; 853 } 854 memcpy(&con->c_state, state, sizeof(npf_state_t)); 855 856 /* Reconstruct NAT association, if any. */ 857 if ((nat = dnvlist_get_nvlist(cdict, "nat", NULL)) != NULL && 858 (con->c_nat = npf_nat_import(npf, nat, natlist, con)) == NULL) { 859 goto err; 860 } 861 862 /* 863 * Fetch and copy the keys for each direction. 864 */ 865 fw = npf_conn_getforwkey(con); 866 conkey = dnvlist_get_nvlist(cdict, "forw-key", NULL); 867 if (conkey == NULL || !npf_connkey_import(conkey, fw)) { 868 goto err; 869 } 870 bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw)); 871 conkey = dnvlist_get_nvlist(cdict, "back-key", NULL); 872 if (conkey == NULL || !npf_connkey_import(conkey, bk)) { 873 goto err; 874 } 875 876 /* Guard against the contradicting address lengths. */ 877 if (NPF_CONNKEY_ALEN(fw) != alen || NPF_CONNKEY_ALEN(bk) != alen) { 878 goto err; 879 } 880 881 /* Insert the entries and the connection itself. */ 882 if (!npf_conndb_insert(cd, fw, con, true)) { 883 goto err; 884 } 885 if (!npf_conndb_insert(cd, bk, con, false)) { 886 npf_conndb_remove(cd, fw); 887 goto err; 888 } 889 890 NPF_PRINTF(("NPF: imported conn %p\n", con)); 891 npf_conndb_enqueue(cd, con); 892 return 0; 893 err: 894 npf_conn_destroy(npf, con); 895 return EINVAL; 896 } 897 898 int 899 npf_conn_find(npf_t *npf, const nvlist_t *idict, nvlist_t **odict) 900 { 901 const nvlist_t *kdict; 902 npf_connkey_t key; 903 npf_conn_t *con; 904 uint16_t dir; 905 bool forw; 906 907 kdict = dnvlist_get_nvlist(idict, "key", NULL); 908 if (!kdict || !npf_connkey_import(kdict, &key)) { 909 return EINVAL; 910 } 911 con = npf_conndb_lookup(npf->conn_db, &key, &forw); 912 if (con == NULL) { 913 return ESRCH; 914 } 915 dir = dnvlist_get_number(idict, "direction", 0); 916 if (!npf_conn_check(con, NULL, dir, true)) { 917 atomic_dec_uint(&con->c_refcnt); 918 return ESRCH; 919 } 920 *odict = npf_conn_export(npf, con); 921 atomic_dec_uint(&con->c_refcnt); 922 return *odict ? 0 : ENOSPC; 923 } 924 925 #if defined(DDB) || defined(_NPF_TESTING) 926 927 void 928 npf_conn_print(npf_conn_t *con) 929 { 930 const npf_connkey_t *fw = npf_conn_getforwkey(con); 931 const npf_connkey_t *bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw)); 932 const unsigned proto = con->c_proto; 933 struct timespec tspnow; 934 935 getnanouptime(&tspnow); 936 printf("%p:\n\tproto %d flags 0x%x tsdiff %ld etime %d\n", con, 937 proto, con->c_flags, (long)(tspnow.tv_sec - con->c_atime), 938 npf_state_etime(npf_getkernctx(), &con->c_state, proto)); 939 npf_connkey_print(fw); 940 npf_connkey_print(bk); 941 npf_state_dump(&con->c_state); 942 if (con->c_nat) { 943 npf_nat_dump(con->c_nat); 944 } 945 } 946 947 #endif 948