1 /*- 2 * Copyright (c) 2014-2018 Mindaugas Rasiukevicius <rmind at netbsd org> 3 * Copyright (c) 2010-2014 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This material is based upon work partially supported by The 7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * NPF connection tracking for stateful filtering and translation. 33 * 34 * Overview 35 * 36 * Packets can be incoming or outgoing with respect to an interface. 37 * Connection direction is identified by the direction of its first 38 * packet. The meaning of incoming/outgoing packet in the context of 39 * connection direction can be confusing. Therefore, we will use the 40 * terms "forwards stream" and "backwards stream", where packets in 41 * the forwards stream mean the packets travelling in the direction 42 * as the connection direction. 43 * 44 * All connections have two keys and thus two entries: 45 * 46 * npf_conn_t::c_forw_entry for the forwards stream and 47 * npf_conn_t::c_back_entry for the backwards stream. 48 * 49 * The keys are formed from the 5-tuple (source/destination address, 50 * source/destination port and the protocol). Additional matching 51 * is performed for the interface (a common behaviour is equivalent 52 * to the 6-tuple lookup including the interface ID). Note that the 53 * key may be formed using translated values in a case of NAT. 54 * 55 * Connections can serve two purposes: for the implicit passing or 56 * to accommodate the dynamic NAT. Connections for the former purpose 57 * are created by the rules with "stateful" attribute and are used for 58 * stateful filtering. Such connections indicate that the packet of 59 * the backwards stream should be passed without inspection of the 60 * ruleset. The other purpose is to associate a dynamic NAT mechanism 61 * with a connection. Such connections are created by the NAT policies 62 * and they have a relationship with NAT translation structure via 63 * npf_conn_t::c_nat. A single connection can serve both purposes, 64 * which is a common case. 65 * 66 * Connection life-cycle 67 * 68 * Connections are established when a packet matches said rule or 69 * NAT policy. Both keys of the established connection are inserted 70 * into the connection database. A garbage collection thread 71 * periodically scans all connections and depending on connection 72 * properties (e.g. last activity time, protocol) removes connection 73 * entries and expires the actual connections. 74 * 75 * Each connection has a reference count. The reference is acquired 76 * on lookup and should be released by the caller. It guarantees that 77 * the connection will not be destroyed, although it may be expired. 78 * 79 * Synchronisation 80 * 81 * Connection database is accessed in a lock-less manner by the main 82 * routines: npf_conn_inspect() and npf_conn_establish(). Since they 83 * are always called from a software interrupt, the database is 84 * protected using passive serialisation. The main place which can 85 * destroy a connection is npf_conn_worker(). The database itself 86 * can be replaced and destroyed in npf_conn_reload(). 87 * 88 * ALG support 89 * 90 * Application-level gateways (ALGs) can override generic connection 91 * inspection (npf_alg_conn() call in npf_conn_inspect() function) by 92 * performing their own lookup using different key. Recursive call 93 * to npf_conn_inspect() is not allowed. The ALGs ought to use the 94 * npf_conn_lookup() function for this purpose. 95 * 96 * Lock order 97 * 98 * npf_config_lock -> 99 * conn_lock -> 100 * npf_conn_t::c_lock 101 */ 102 103 #ifdef _KERNEL 104 #include <sys/cdefs.h> 105 __KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.26 2019/01/19 21:19:31 rmind Exp $"); 106 107 #include <sys/param.h> 108 #include <sys/types.h> 109 110 #include <netinet/in.h> 111 #include <netinet/tcp.h> 112 113 #include <sys/atomic.h> 114 #include <sys/condvar.h> 115 #include <sys/kmem.h> 116 #include <sys/kthread.h> 117 #include <sys/mutex.h> 118 #include <net/pfil.h> 119 #include <sys/pool.h> 120 #include <sys/queue.h> 121 #include <sys/systm.h> 122 #endif 123 124 #define __NPF_CONN_PRIVATE 125 #include "npf_conn.h" 126 #include "npf_impl.h" 127 128 /* 129 * Connection flags: PFIL_IN and PFIL_OUT values are reserved for direction. 130 */ 131 CTASSERT(PFIL_ALL == (0x001 | 0x002)); 132 #define CONN_ACTIVE 0x004 /* visible on inspection */ 133 #define CONN_PASS 0x008 /* perform implicit passing */ 134 #define CONN_EXPIRE 0x010 /* explicitly expire */ 135 #define CONN_REMOVED 0x020 /* "forw/back" entries removed */ 136 137 enum { CONN_TRACKING_OFF, CONN_TRACKING_ON }; 138 139 static nvlist_t *npf_conn_export(npf_t *, const npf_conn_t *); 140 141 /* 142 * npf_conn_sys{init,fini}: initialise/destroy connection tracking. 143 */ 144 145 void 146 npf_conn_init(npf_t *npf, int flags) 147 { 148 npf->conn_cache = pool_cache_init(sizeof(npf_conn_t), coherency_unit, 149 0, 0, "npfconpl", NULL, IPL_NET, NULL, NULL, NULL); 150 mutex_init(&npf->conn_lock, MUTEX_DEFAULT, IPL_NONE); 151 npf->conn_tracking = CONN_TRACKING_OFF; 152 npf->conn_db = npf_conndb_create(); 153 154 if ((flags & NPF_NO_GC) == 0) { 155 npf_worker_register(npf, npf_conn_worker); 156 } 157 } 158 159 void 160 npf_conn_fini(npf_t *npf) 161 { 162 /* Note: the caller should have flushed the connections. */ 163 KASSERT(npf->conn_tracking == CONN_TRACKING_OFF); 164 npf_worker_unregister(npf, npf_conn_worker); 165 166 npf_conndb_destroy(npf->conn_db); 167 pool_cache_destroy(npf->conn_cache); 168 mutex_destroy(&npf->conn_lock); 169 } 170 171 /* 172 * npf_conn_load: perform the load by flushing the current connection 173 * database and replacing it with the new one or just destroying. 174 * 175 * => The caller must disable the connection tracking and ensure that 176 * there are no connection database lookups or references in-flight. 177 */ 178 void 179 npf_conn_load(npf_t *npf, npf_conndb_t *ndb, bool track) 180 { 181 npf_conndb_t *odb = NULL; 182 183 KASSERT(npf_config_locked_p(npf)); 184 185 /* 186 * The connection database is in the quiescent state. 187 * Prevent G/C thread from running and install a new database. 188 */ 189 mutex_enter(&npf->conn_lock); 190 if (ndb) { 191 KASSERT(npf->conn_tracking == CONN_TRACKING_OFF); 192 odb = npf->conn_db; 193 npf->conn_db = ndb; 194 membar_sync(); 195 } 196 if (track) { 197 /* After this point lookups start flying in. */ 198 npf->conn_tracking = CONN_TRACKING_ON; 199 } 200 mutex_exit(&npf->conn_lock); 201 202 if (odb) { 203 /* 204 * Flush all, no sync since the caller did it for us. 205 * Also, release the pool cache memory. 206 */ 207 npf_conndb_gc(npf, odb, true, false); 208 npf_conndb_destroy(odb); 209 pool_cache_invalidate(npf->conn_cache); 210 } 211 } 212 213 /* 214 * npf_conn_tracking: enable/disable connection tracking. 215 */ 216 void 217 npf_conn_tracking(npf_t *npf, bool track) 218 { 219 KASSERT(npf_config_locked_p(npf)); 220 npf->conn_tracking = track ? CONN_TRACKING_ON : CONN_TRACKING_OFF; 221 } 222 223 static inline bool 224 npf_conn_trackable_p(const npf_cache_t *npc) 225 { 226 const npf_t *npf = npc->npc_ctx; 227 228 /* 229 * Check if connection tracking is on. Also, if layer 3 and 4 are 230 * not cached - protocol is not supported or packet is invalid. 231 */ 232 if (npf->conn_tracking != CONN_TRACKING_ON) { 233 return false; 234 } 235 if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) { 236 return false; 237 } 238 return true; 239 } 240 241 static uint32_t 242 connkey_setkey(npf_connkey_t *key, uint16_t proto, const void *ipv, 243 const uint16_t *id, unsigned alen, bool forw) 244 { 245 uint32_t isrc, idst, *k = key->ck_key; 246 const npf_addr_t * const *ips = ipv; 247 248 if (__predict_true(forw)) { 249 isrc = NPF_SRC, idst = NPF_DST; 250 } else { 251 isrc = NPF_DST, idst = NPF_SRC; 252 } 253 254 /* 255 * Construct a key formed out of 32-bit integers. The key layout: 256 * 257 * Field: | proto | alen | src-id | dst-id | src-addr | dst-addr | 258 * +--------+--------+--------+--------+----------+----------+ 259 * Bits: | 16 | 16 | 16 | 16 | 32-128 | 32-128 | 260 * 261 * The source and destination are inverted if they key is for the 262 * backwards stream (forw == false). The address length depends 263 * on the 'alen' field; it is a length in bytes, either 4 or 16. 264 */ 265 266 k[0] = ((uint32_t)proto << 16) | (alen & 0xffff); 267 k[1] = ((uint32_t)id[isrc] << 16) | id[idst]; 268 269 if (__predict_true(alen == sizeof(in_addr_t))) { 270 k[2] = ips[isrc]->word32[0]; 271 k[3] = ips[idst]->word32[0]; 272 return 4 * sizeof(uint32_t); 273 } else { 274 const u_int nwords = alen >> 2; 275 memcpy(&k[2], ips[isrc], alen); 276 memcpy(&k[2 + nwords], ips[idst], alen); 277 return (2 + (nwords * 2)) * sizeof(uint32_t); 278 } 279 } 280 281 static void 282 connkey_getkey(const npf_connkey_t *key, uint16_t *proto, npf_addr_t *ips, 283 uint16_t *id, uint16_t *alen) 284 { 285 const uint32_t *k = key->ck_key; 286 287 *proto = k[0] >> 16; 288 *alen = k[0] & 0xffff; 289 id[NPF_SRC] = k[1] >> 16; 290 id[NPF_DST] = k[1] & 0xffff; 291 292 switch (*alen) { 293 case sizeof(struct in6_addr): 294 case sizeof(struct in_addr): 295 memcpy(&ips[NPF_SRC], &k[2], *alen); 296 memcpy(&ips[NPF_DST], &k[2 + ((unsigned)*alen >> 2)], *alen); 297 return; 298 default: 299 KASSERT(0); 300 } 301 } 302 303 /* 304 * npf_conn_conkey: construct a key for the connection lookup. 305 * 306 * => Returns the key length in bytes or zero on failure. 307 */ 308 unsigned 309 npf_conn_conkey(const npf_cache_t *npc, npf_connkey_t *key, const bool forw) 310 { 311 const u_int proto = npc->npc_proto; 312 const u_int alen = npc->npc_alen; 313 const struct tcphdr *th; 314 const struct udphdr *uh; 315 uint16_t id[2]; 316 317 switch (proto) { 318 case IPPROTO_TCP: 319 KASSERT(npf_iscached(npc, NPC_TCP)); 320 th = npc->npc_l4.tcp; 321 id[NPF_SRC] = th->th_sport; 322 id[NPF_DST] = th->th_dport; 323 break; 324 case IPPROTO_UDP: 325 KASSERT(npf_iscached(npc, NPC_UDP)); 326 uh = npc->npc_l4.udp; 327 id[NPF_SRC] = uh->uh_sport; 328 id[NPF_DST] = uh->uh_dport; 329 break; 330 case IPPROTO_ICMP: 331 if (npf_iscached(npc, NPC_ICMP_ID)) { 332 const struct icmp *ic = npc->npc_l4.icmp; 333 id[NPF_SRC] = ic->icmp_id; 334 id[NPF_DST] = ic->icmp_id; 335 break; 336 } 337 return 0; 338 case IPPROTO_ICMPV6: 339 if (npf_iscached(npc, NPC_ICMP_ID)) { 340 const struct icmp6_hdr *ic6 = npc->npc_l4.icmp6; 341 id[NPF_SRC] = ic6->icmp6_id; 342 id[NPF_DST] = ic6->icmp6_id; 343 break; 344 } 345 return 0; 346 default: 347 /* Unsupported protocol. */ 348 return 0; 349 } 350 return connkey_setkey(key, proto, npc->npc_ips, id, alen, forw); 351 } 352 353 static __inline void 354 connkey_set_addr(npf_connkey_t *key, const npf_addr_t *naddr, const int di) 355 { 356 const u_int alen = key->ck_key[0] & 0xffff; 357 uint32_t *addr = &key->ck_key[2 + ((alen >> 2) * di)]; 358 359 KASSERT(alen > 0); 360 memcpy(addr, naddr, alen); 361 } 362 363 static __inline void 364 connkey_set_id(npf_connkey_t *key, const uint16_t id, const int di) 365 { 366 const uint32_t oid = key->ck_key[1]; 367 const u_int shift = 16 * !di; 368 const uint32_t mask = 0xffff0000 >> shift; 369 370 key->ck_key[1] = ((uint32_t)id << shift) | (oid & mask); 371 } 372 373 static inline void 374 conn_update_atime(npf_conn_t *con) 375 { 376 struct timespec tsnow; 377 378 getnanouptime(&tsnow); 379 con->c_atime = tsnow.tv_sec; 380 } 381 382 /* 383 * npf_conn_ok: check if the connection is active and has the right direction. 384 */ 385 static bool 386 npf_conn_ok(const npf_conn_t *con, const int di, bool forw) 387 { 388 const uint32_t flags = con->c_flags; 389 390 /* Check if connection is active and not expired. */ 391 bool ok = (flags & (CONN_ACTIVE | CONN_EXPIRE)) == CONN_ACTIVE; 392 if (__predict_false(!ok)) { 393 return false; 394 } 395 396 /* Check if the direction is consistent */ 397 bool pforw = (flags & PFIL_ALL) == (unsigned)di; 398 if (__predict_false(forw != pforw)) { 399 return false; 400 } 401 return true; 402 } 403 404 /* 405 * npf_conn_lookup: lookup if there is an established connection. 406 * 407 * => If found, we will hold a reference for the caller. 408 */ 409 npf_conn_t * 410 npf_conn_lookup(const npf_cache_t *npc, const int di, bool *forw) 411 { 412 npf_t *npf = npc->npc_ctx; 413 const nbuf_t *nbuf = npc->npc_nbuf; 414 npf_conn_t *con; 415 npf_connkey_t key; 416 u_int cifid; 417 418 /* Construct a key and lookup for a connection in the store. */ 419 if (!npf_conn_conkey(npc, &key, true)) { 420 return NULL; 421 } 422 con = npf_conndb_lookup(npf->conn_db, &key, forw); 423 if (con == NULL) { 424 return NULL; 425 } 426 KASSERT(npc->npc_proto == con->c_proto); 427 428 /* Check if connection is active and not expired. */ 429 if (!npf_conn_ok(con, di, *forw)) { 430 atomic_dec_uint(&con->c_refcnt); 431 return NULL; 432 } 433 434 /* 435 * Match the interface and the direction of the connection entry 436 * and the packet. 437 */ 438 cifid = con->c_ifid; 439 if (__predict_false(cifid && cifid != nbuf->nb_ifid)) { 440 atomic_dec_uint(&con->c_refcnt); 441 return NULL; 442 } 443 444 /* Update the last activity time. */ 445 conn_update_atime(con); 446 return con; 447 } 448 449 /* 450 * npf_conn_inspect: lookup a connection and inspecting the protocol data. 451 * 452 * => If found, we will hold a reference for the caller. 453 */ 454 npf_conn_t * 455 npf_conn_inspect(npf_cache_t *npc, const int di, int *error) 456 { 457 nbuf_t *nbuf = npc->npc_nbuf; 458 npf_conn_t *con; 459 bool forw, ok; 460 461 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 462 if (!npf_conn_trackable_p(npc)) { 463 return NULL; 464 } 465 466 /* Query ALG which may lookup connection for us. */ 467 if ((con = npf_alg_conn(npc, di)) != NULL) { 468 /* Note: reference is held. */ 469 return con; 470 } 471 if (nbuf_head_mbuf(nbuf) == NULL) { 472 *error = ENOMEM; 473 return NULL; 474 } 475 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 476 477 /* Main lookup of the connection. */ 478 if ((con = npf_conn_lookup(npc, di, &forw)) == NULL) { 479 return NULL; 480 } 481 482 /* Inspect the protocol data and handle state changes. */ 483 mutex_enter(&con->c_lock); 484 ok = npf_state_inspect(npc, &con->c_state, forw); 485 mutex_exit(&con->c_lock); 486 487 /* If invalid state: let the rules deal with it. */ 488 if (__predict_false(!ok)) { 489 npf_conn_release(con); 490 npf_stats_inc(npc->npc_ctx, NPF_STAT_INVALID_STATE); 491 return NULL; 492 } 493 494 /* 495 * If this is multi-end state, then specially tag the packet 496 * so it will be just passed-through on other interfaces. 497 */ 498 if (con->c_ifid == 0 && nbuf_add_tag(nbuf, NPF_NTAG_PASS) != 0) { 499 npf_conn_release(con); 500 *error = ENOMEM; 501 return NULL; 502 } 503 return con; 504 } 505 506 /* 507 * npf_conn_establish: create a new connection, insert into the global list. 508 * 509 * => Connection is created with the reference held for the caller. 510 * => Connection will be activated on the first reference release. 511 */ 512 npf_conn_t * 513 npf_conn_establish(npf_cache_t *npc, int di, bool per_if) 514 { 515 npf_t *npf = npc->npc_ctx; 516 const nbuf_t *nbuf = npc->npc_nbuf; 517 npf_conn_t *con; 518 int error = 0; 519 520 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)); 521 522 if (!npf_conn_trackable_p(npc)) { 523 return NULL; 524 } 525 526 /* Allocate and initialise the new connection. */ 527 con = pool_cache_get(npf->conn_cache, PR_NOWAIT); 528 if (__predict_false(!con)) { 529 npf_worker_signal(npf); 530 return NULL; 531 } 532 NPF_PRINTF(("NPF: create conn %p\n", con)); 533 npf_stats_inc(npf, NPF_STAT_CONN_CREATE); 534 535 mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET); 536 con->c_flags = (di & PFIL_ALL); 537 con->c_refcnt = 0; 538 con->c_rproc = NULL; 539 con->c_nat = NULL; 540 541 /* Initialize the protocol state. */ 542 if (!npf_state_init(npc, &con->c_state)) { 543 npf_conn_destroy(npf, con); 544 return NULL; 545 } 546 547 KASSERT(npf_iscached(npc, NPC_IP46)); 548 npf_connkey_t *fw = &con->c_forw_entry; 549 npf_connkey_t *bk = &con->c_back_entry; 550 551 /* 552 * Construct "forwards" and "backwards" keys. Also, set the 553 * interface ID for this connection (unless it is global). 554 */ 555 if (!npf_conn_conkey(npc, fw, true) || 556 !npf_conn_conkey(npc, bk, false)) { 557 npf_conn_destroy(npf, con); 558 return NULL; 559 } 560 fw->ck_backptr = bk->ck_backptr = con; 561 con->c_ifid = per_if ? nbuf->nb_ifid : 0; 562 con->c_proto = npc->npc_proto; 563 564 /* 565 * Set last activity time for a new connection and acquire 566 * a reference for the caller before we make it visible. 567 */ 568 conn_update_atime(con); 569 con->c_refcnt = 1; 570 571 /* 572 * Insert both keys (entries representing directions) of the 573 * connection. At this point it becomes visible, but we activate 574 * the connection later. 575 */ 576 mutex_enter(&con->c_lock); 577 if (!npf_conndb_insert(npf->conn_db, fw)) { 578 error = EISCONN; 579 goto err; 580 } 581 if (!npf_conndb_insert(npf->conn_db, bk)) { 582 npf_conn_t *ret __diagused; 583 ret = npf_conndb_remove(npf->conn_db, fw); 584 KASSERT(ret == con); 585 error = EISCONN; 586 goto err; 587 } 588 err: 589 /* 590 * If we have hit the duplicate: mark the connection as expired 591 * and let the G/C thread to take care of it. We cannot do it 592 * here since there might be references acquired already. 593 */ 594 if (error) { 595 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE); 596 atomic_dec_uint(&con->c_refcnt); 597 npf_stats_inc(npf, NPF_STAT_RACE_CONN); 598 } else { 599 NPF_PRINTF(("NPF: establish conn %p\n", con)); 600 } 601 602 /* Finally, insert into the connection list. */ 603 npf_conndb_enqueue(npf->conn_db, con); 604 mutex_exit(&con->c_lock); 605 606 return error ? NULL : con; 607 } 608 609 void 610 npf_conn_destroy(npf_t *npf, npf_conn_t *con) 611 { 612 KASSERT(con->c_refcnt == 0); 613 614 if (con->c_nat) { 615 /* Release any NAT structures. */ 616 npf_nat_destroy(con->c_nat); 617 } 618 if (con->c_rproc) { 619 /* Release the rule procedure. */ 620 npf_rproc_release(con->c_rproc); 621 } 622 623 /* Destroy the state. */ 624 npf_state_destroy(&con->c_state); 625 mutex_destroy(&con->c_lock); 626 627 /* Free the structure, increase the counter. */ 628 pool_cache_put(npf->conn_cache, con); 629 npf_stats_inc(npf, NPF_STAT_CONN_DESTROY); 630 NPF_PRINTF(("NPF: conn %p destroyed\n", con)); 631 } 632 633 /* 634 * npf_conn_setnat: associate NAT entry with the connection, update and 635 * re-insert connection entry using the translation values. 636 * 637 * => The caller must be holding a reference. 638 */ 639 int 640 npf_conn_setnat(const npf_cache_t *npc, npf_conn_t *con, 641 npf_nat_t *nt, u_int ntype) 642 { 643 static const u_int nat_type_dimap[] = { 644 [NPF_NATOUT] = NPF_DST, 645 [NPF_NATIN] = NPF_SRC, 646 }; 647 npf_t *npf = npc->npc_ctx; 648 npf_connkey_t key, *bk; 649 npf_conn_t *ret __diagused; 650 npf_addr_t *taddr; 651 in_port_t tport; 652 u_int tidx; 653 654 KASSERT(con->c_refcnt > 0); 655 656 npf_nat_gettrans(nt, &taddr, &tport); 657 KASSERT(ntype == NPF_NATOUT || ntype == NPF_NATIN); 658 tidx = nat_type_dimap[ntype]; 659 660 /* Construct a "backwards" key. */ 661 if (!npf_conn_conkey(npc, &key, false)) { 662 return EINVAL; 663 } 664 665 /* Acquire the lock and check for the races. */ 666 mutex_enter(&con->c_lock); 667 if (__predict_false(con->c_flags & CONN_EXPIRE)) { 668 /* The connection got expired. */ 669 mutex_exit(&con->c_lock); 670 return EINVAL; 671 } 672 KASSERT((con->c_flags & CONN_REMOVED) == 0); 673 674 if (__predict_false(con->c_nat != NULL)) { 675 /* Race with a duplicate packet. */ 676 mutex_exit(&con->c_lock); 677 npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT); 678 return EISCONN; 679 } 680 681 /* Remove the "backwards" entry. */ 682 ret = npf_conndb_remove(npf->conn_db, &con->c_back_entry); 683 KASSERT(ret == con); 684 685 /* Set the source/destination IDs to the translation values. */ 686 bk = &con->c_back_entry; 687 connkey_set_addr(bk, taddr, tidx); 688 if (tport) { 689 connkey_set_id(bk, tport, tidx); 690 } 691 692 /* Finally, re-insert the "backwards" entry. */ 693 if (!npf_conndb_insert(npf->conn_db, bk)) { 694 /* 695 * Race: we have hit the duplicate, remove the "forwards" 696 * entry and expire our connection; it is no longer valid. 697 */ 698 ret = npf_conndb_remove(npf->conn_db, &con->c_forw_entry); 699 KASSERT(ret == con); 700 701 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE); 702 mutex_exit(&con->c_lock); 703 704 npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT); 705 return EISCONN; 706 } 707 708 /* Associate the NAT entry and release the lock. */ 709 con->c_nat = nt; 710 mutex_exit(&con->c_lock); 711 return 0; 712 } 713 714 /* 715 * npf_conn_expire: explicitly mark connection as expired. 716 */ 717 void 718 npf_conn_expire(npf_conn_t *con) 719 { 720 /* KASSERT(con->c_refcnt > 0); XXX: npf_nat_freepolicy() */ 721 atomic_or_uint(&con->c_flags, CONN_EXPIRE); 722 } 723 724 /* 725 * npf_conn_pass: return true if connection is "pass" one, otherwise false. 726 */ 727 bool 728 npf_conn_pass(const npf_conn_t *con, npf_match_info_t *mi, npf_rproc_t **rp) 729 { 730 KASSERT(con->c_refcnt > 0); 731 if (__predict_true(con->c_flags & CONN_PASS)) { 732 mi->mi_rid = con->c_rid; 733 mi->mi_retfl = con->c_retfl; 734 *rp = con->c_rproc; 735 return true; 736 } 737 return false; 738 } 739 740 /* 741 * npf_conn_setpass: mark connection as a "pass" one and associate the 742 * rule procedure with it. 743 */ 744 void 745 npf_conn_setpass(npf_conn_t *con, const npf_match_info_t *mi, npf_rproc_t *rp) 746 { 747 KASSERT((con->c_flags & CONN_ACTIVE) == 0); 748 KASSERT(con->c_refcnt > 0); 749 KASSERT(con->c_rproc == NULL); 750 751 /* 752 * No need for atomic since the connection is not yet active. 753 * If rproc is set, the caller transfers its reference to us, 754 * which will be released on npf_conn_destroy(). 755 */ 756 atomic_or_uint(&con->c_flags, CONN_PASS); 757 con->c_rproc = rp; 758 if (rp) { 759 con->c_rid = mi->mi_rid; 760 con->c_retfl = mi->mi_retfl; 761 } 762 } 763 764 /* 765 * npf_conn_release: release a reference, which might allow G/C thread 766 * to destroy this connection. 767 */ 768 void 769 npf_conn_release(npf_conn_t *con) 770 { 771 if ((con->c_flags & (CONN_ACTIVE | CONN_EXPIRE)) == 0) { 772 /* Activate: after this, connection is globally visible. */ 773 atomic_or_uint(&con->c_flags, CONN_ACTIVE); 774 } 775 KASSERT(con->c_refcnt > 0); 776 atomic_dec_uint(&con->c_refcnt); 777 } 778 779 /* 780 * npf_conn_getnat: return associated NAT data entry and indicate 781 * whether it is a "forwards" or "backwards" stream. 782 */ 783 npf_nat_t * 784 npf_conn_getnat(npf_conn_t *con, const int di, bool *forw) 785 { 786 KASSERT(con->c_refcnt > 0); 787 *forw = (con->c_flags & PFIL_ALL) == (u_int)di; 788 return con->c_nat; 789 } 790 791 /* 792 * npf_conn_expired: criterion to check if connection is expired. 793 */ 794 bool 795 npf_conn_expired(const npf_conn_t *con, uint64_t tsnow) 796 { 797 const int etime = npf_state_etime(&con->c_state, con->c_proto); 798 int elapsed; 799 800 if (__predict_false(con->c_flags & CONN_EXPIRE)) { 801 /* Explicitly marked to be expired. */ 802 return true; 803 } 804 805 /* 806 * Note: another thread may update 'atime' and it might 807 * become greater than 'now'. 808 */ 809 elapsed = (int64_t)tsnow - con->c_atime; 810 return elapsed > etime; 811 } 812 813 /* 814 * npf_conn_remove: unlink the connection and mark as expired. 815 */ 816 void 817 npf_conn_remove(npf_conndb_t *cd, npf_conn_t *con) 818 { 819 /* Remove both entries of the connection. */ 820 mutex_enter(&con->c_lock); 821 if ((con->c_flags & CONN_REMOVED) == 0) { 822 npf_conn_t *ret __diagused; 823 824 ret = npf_conndb_remove(cd, &con->c_forw_entry); 825 KASSERT(ret == con); 826 ret = npf_conndb_remove(cd, &con->c_back_entry); 827 KASSERT(ret == con); 828 } 829 830 /* Flag the removal and expiration. */ 831 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE); 832 mutex_exit(&con->c_lock); 833 } 834 835 /* 836 * npf_conn_worker: G/C to run from a worker thread. 837 */ 838 void 839 npf_conn_worker(npf_t *npf) 840 { 841 npf_conndb_gc(npf, npf->conn_db, false, true); 842 } 843 844 /* 845 * npf_conndb_export: construct a list of connections prepared for saving. 846 * Note: this is expected to be an expensive operation. 847 */ 848 int 849 npf_conndb_export(npf_t *npf, nvlist_t *npf_dict) 850 { 851 npf_conn_t *head, *con; 852 853 /* 854 * Note: acquire conn_lock to prevent from the database 855 * destruction and G/C thread. 856 */ 857 mutex_enter(&npf->conn_lock); 858 if (npf->conn_tracking != CONN_TRACKING_ON) { 859 mutex_exit(&npf->conn_lock); 860 return 0; 861 } 862 head = npf_conndb_getlist(npf->conn_db); 863 con = head; 864 while (con) { 865 nvlist_t *cdict; 866 867 if ((cdict = npf_conn_export(npf, con)) != NULL) { 868 nvlist_append_nvlist_array(npf_dict, "conn-list", cdict); 869 nvlist_destroy(cdict); 870 } 871 if ((con = npf_conndb_getnext(npf->conn_db, con)) == head) { 872 break; 873 } 874 } 875 mutex_exit(&npf->conn_lock); 876 return 0; 877 } 878 879 static nvlist_t * 880 npf_connkey_export(const npf_connkey_t *key) 881 { 882 uint16_t id[2], alen, proto; 883 npf_addr_t ips[2]; 884 nvlist_t *kdict; 885 886 kdict = nvlist_create(0); 887 connkey_getkey(key, &proto, ips, id, &alen); 888 nvlist_add_number(kdict, "proto", proto); 889 nvlist_add_number(kdict, "sport", id[NPF_SRC]); 890 nvlist_add_number(kdict, "dport", id[NPF_DST]); 891 nvlist_add_binary(kdict, "saddr", &ips[NPF_SRC], alen); 892 nvlist_add_binary(kdict, "daddr", &ips[NPF_DST], alen); 893 return kdict; 894 } 895 896 /* 897 * npf_conn_export: serialise a single connection. 898 */ 899 static nvlist_t * 900 npf_conn_export(npf_t *npf, const npf_conn_t *con) 901 { 902 nvlist_t *cdict, *kdict; 903 904 if ((con->c_flags & (CONN_ACTIVE|CONN_EXPIRE)) != CONN_ACTIVE) { 905 return NULL; 906 } 907 cdict = nvlist_create(0); 908 nvlist_add_number(cdict, "flags", con->c_flags); 909 nvlist_add_number(cdict, "proto", con->c_proto); 910 if (con->c_ifid) { 911 const char *ifname = npf_ifmap_getname(npf, con->c_ifid); 912 nvlist_add_string(cdict, "ifname", ifname); 913 } 914 nvlist_add_binary(cdict, "state", &con->c_state, sizeof(npf_state_t)); 915 916 kdict = npf_connkey_export(&con->c_forw_entry); 917 nvlist_move_nvlist(cdict, "forw-key", kdict); 918 919 kdict = npf_connkey_export(&con->c_back_entry); 920 nvlist_move_nvlist(cdict, "back-key", kdict); 921 922 if (con->c_nat) { 923 npf_nat_export(cdict, con->c_nat); 924 } 925 return cdict; 926 } 927 928 static uint32_t 929 npf_connkey_import(const nvlist_t *kdict, npf_connkey_t *key) 930 { 931 npf_addr_t const * ips[2]; 932 uint16_t proto, id[2]; 933 size_t alen1, alen2; 934 935 proto = dnvlist_get_number(kdict, "proto", 0); 936 id[NPF_SRC] = dnvlist_get_number(kdict, "sport", 0); 937 id[NPF_DST] = dnvlist_get_number(kdict, "dport", 0); 938 ips[NPF_SRC] = dnvlist_get_binary(kdict, "saddr", &alen1, NULL, 0); 939 ips[NPF_DST] = dnvlist_get_binary(kdict, "daddr", &alen2, NULL, 0); 940 if (__predict_false(alen1 == 0 || alen1 != alen2)) { 941 return 0; 942 } 943 return connkey_setkey(key, proto, ips, id, alen1, true); 944 } 945 946 /* 947 * npf_conn_import: fully reconstruct a single connection from a 948 * nvlist and insert into the given database. 949 */ 950 int 951 npf_conn_import(npf_t *npf, npf_conndb_t *cd, const nvlist_t *cdict, 952 npf_ruleset_t *natlist) 953 { 954 npf_conn_t *con; 955 npf_connkey_t *fw, *bk; 956 const nvlist_t *nat, *conkey; 957 const char *ifname; 958 const void *state; 959 size_t len; 960 961 /* Allocate a connection and initialise it (clear first). */ 962 con = pool_cache_get(npf->conn_cache, PR_WAITOK); 963 memset(con, 0, sizeof(npf_conn_t)); 964 mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET); 965 npf_stats_inc(npf, NPF_STAT_CONN_CREATE); 966 967 con->c_proto = dnvlist_get_number(cdict, "proto", 0); 968 con->c_flags = dnvlist_get_number(cdict, "flags", 0); 969 con->c_flags &= PFIL_ALL | CONN_ACTIVE | CONN_PASS; 970 conn_update_atime(con); 971 972 ifname = dnvlist_get_string(cdict, "ifname", NULL); 973 if (ifname && (con->c_ifid = npf_ifmap_register(npf, ifname)) == 0) { 974 goto err; 975 } 976 977 state = dnvlist_get_binary(cdict, "state", &len, NULL, 0); 978 if (!state || len != sizeof(npf_state_t)) { 979 goto err; 980 } 981 memcpy(&con->c_state, state, sizeof(npf_state_t)); 982 983 /* Reconstruct NAT association, if any. */ 984 if ((nat = dnvlist_get_nvlist(cdict, "nat", NULL)) != NULL && 985 (con->c_nat = npf_nat_import(npf, nat, natlist, con)) == NULL) { 986 goto err; 987 } 988 989 /* 990 * Fetch and copy the keys for each direction. 991 */ 992 conkey = dnvlist_get_nvlist(cdict, "forw-key", NULL); 993 fw = &con->c_forw_entry; 994 if (conkey == NULL || !npf_connkey_import(conkey, fw)) { 995 goto err; 996 } 997 conkey = dnvlist_get_nvlist(cdict, "back-key", NULL); 998 bk = &con->c_back_entry; 999 if (conkey == NULL || !npf_connkey_import(conkey, bk)) { 1000 goto err; 1001 } 1002 fw->ck_backptr = bk->ck_backptr = con; 1003 1004 /* Insert the entries and the connection itself. */ 1005 if (!npf_conndb_insert(cd, fw)) { 1006 goto err; 1007 } 1008 if (!npf_conndb_insert(cd, bk)) { 1009 npf_conndb_remove(cd, fw); 1010 goto err; 1011 } 1012 1013 NPF_PRINTF(("NPF: imported conn %p\n", con)); 1014 npf_conndb_enqueue(cd, con); 1015 return 0; 1016 err: 1017 npf_conn_destroy(npf, con); 1018 return EINVAL; 1019 } 1020 1021 int 1022 npf_conn_find(npf_t *npf, const nvlist_t *idict, nvlist_t **odict) 1023 { 1024 const nvlist_t *kdict; 1025 npf_connkey_t key; 1026 npf_conn_t *con; 1027 uint16_t dir; 1028 bool forw; 1029 1030 kdict = dnvlist_get_nvlist(idict, "key", NULL); 1031 if (!kdict || !npf_connkey_import(kdict, &key)) { 1032 return EINVAL; 1033 } 1034 dir = dnvlist_get_number(idict, "direction", 0); 1035 con = npf_conndb_lookup(npf->conn_db, &key, &forw); 1036 if (con == NULL) { 1037 return ESRCH; 1038 } 1039 if (!npf_conn_ok(con, dir, true)) { 1040 atomic_dec_uint(&con->c_refcnt); 1041 return ESRCH; 1042 } 1043 *odict = npf_conn_export(npf, con); 1044 atomic_dec_uint(&con->c_refcnt); 1045 return *odict ? 0 : ENOSPC; 1046 } 1047 1048 #if defined(DDB) || defined(_NPF_TESTING) 1049 1050 void 1051 npf_conn_print(const npf_conn_t *con) 1052 { 1053 const u_int alen = NPF_CONN_GETALEN(&con->c_forw_entry); 1054 const uint32_t *fkey = con->c_forw_entry.ck_key; 1055 const uint32_t *bkey = con->c_back_entry.ck_key; 1056 const u_int proto = con->c_proto; 1057 struct timespec tspnow; 1058 const void *src, *dst; 1059 int etime; 1060 1061 getnanouptime(&tspnow); 1062 etime = npf_state_etime(&con->c_state, proto); 1063 1064 printf("%p:\n\tproto %d flags 0x%x tsdiff %ld etime %d\n", con, 1065 proto, con->c_flags, (long)(tspnow.tv_sec - con->c_atime), etime); 1066 1067 src = &fkey[2], dst = &fkey[2 + (alen >> 2)]; 1068 printf("\tforw %s:%d", npf_addr_dump(src, alen), ntohs(fkey[1] >> 16)); 1069 printf("-> %s:%d\n", npf_addr_dump(dst, alen), ntohs(fkey[1] & 0xffff)); 1070 1071 src = &bkey[2], dst = &bkey[2 + (alen >> 2)]; 1072 printf("\tback %s:%d", npf_addr_dump(src, alen), ntohs(bkey[1] >> 16)); 1073 printf("-> %s:%d\n", npf_addr_dump(dst, alen), ntohs(bkey[1] & 0xffff)); 1074 1075 npf_state_dump(&con->c_state); 1076 if (con->c_nat) { 1077 npf_nat_dump(con->c_nat); 1078 } 1079 } 1080 1081 #endif 1082