1 /*- 2 * Copyright (c) 2010-2019 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This material is based upon work partially supported by The 6 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * NPF connection storage. 32 * 33 * Warning (not applicable for the userspace npfkern): 34 * 35 * thmap is partially lock-free data structure that uses its own 36 * spin-locks on the writer side (insert/delete operations). 37 * 38 * The relevant interrupt priority level (IPL) must be set and the 39 * kernel preemption disabled across the critical paths to prevent 40 * deadlocks and priority inversion problems. These are essentially 41 * the same guarantees as a spinning mutex(9) would provide. 42 * 43 * This is achieved with SPL routines splsoftnet() and splx() around 44 * the thmap_del() and thmap_put() calls. 45 */ 46 47 #ifdef _KERNEL 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: npf_conndb.c,v 1.7 2019/12/14 15:21:51 riastradh Exp $"); 50 51 #include <sys/param.h> 52 #include <sys/types.h> 53 54 #include <sys/atomic.h> 55 #include <sys/kmem.h> 56 #include <sys/thmap.h> 57 #endif 58 59 #define __NPF_CONN_PRIVATE 60 #include "npf_conn.h" 61 #include "npf_impl.h" 62 63 struct npf_conndb { 64 thmap_t * cd_map; 65 66 /* 67 * There are three lists for connections: new, all and G/C. 68 * 69 * New connections are atomically inserted into the "new-list". 70 * The G/C worker will move them to the doubly-linked list of all 71 * active connections. 72 */ 73 npf_conn_t * cd_new; 74 LIST_HEAD(, npf_conn) cd_list; 75 LIST_HEAD(, npf_conn) cd_gclist; 76 77 /* The last inspected connection (for circular iteration). */ 78 npf_conn_t * cd_marker; 79 }; 80 81 typedef struct { 82 int gc_step; 83 int _reserved0; 84 } npf_conndb_params_t; 85 86 /* 87 * Pointer tag for connection keys which represent the "forwards" entry. 88 */ 89 #define CONNDB_FORW_BIT ((uintptr_t)0x1) 90 #define CONNDB_ISFORW_P(p) (((uintptr_t)(p) & CONNDB_FORW_BIT) != 0) 91 #define CONNDB_GET_PTR(p) ((void *)((uintptr_t)(p) & ~CONNDB_FORW_BIT)) 92 93 void 94 npf_conndb_sysinit(npf_t *npf) 95 { 96 npf_conndb_params_t *params = npf_param_allocgroup(npf, 97 NPF_PARAMS_CONNDB, sizeof(npf_conndb_params_t)); 98 npf_param_t param_map[] = { 99 { 100 "gc.step", 101 ¶ms->gc_step, 102 .default_val = 256, 103 .min = 1, .max = INT_MAX 104 } 105 }; 106 npf_param_register(npf, param_map, __arraycount(param_map)); 107 } 108 109 void 110 npf_conndb_sysfini(npf_t *npf) 111 { 112 const size_t len = sizeof(npf_conndb_params_t); 113 npf_param_freegroup(npf, NPF_PARAMS_CONNDB, len); 114 } 115 116 npf_conndb_t * 117 npf_conndb_create(void) 118 { 119 npf_conndb_t *cd; 120 121 cd = kmem_zalloc(sizeof(npf_conndb_t), KM_SLEEP); 122 cd->cd_map = thmap_create(0, NULL, THMAP_NOCOPY); 123 KASSERT(cd->cd_map != NULL); 124 125 LIST_INIT(&cd->cd_list); 126 LIST_INIT(&cd->cd_gclist); 127 return cd; 128 } 129 130 void 131 npf_conndb_destroy(npf_conndb_t *cd) 132 { 133 KASSERT(cd->cd_new == NULL); 134 KASSERT(cd->cd_marker == NULL); 135 KASSERT(LIST_EMPTY(&cd->cd_list)); 136 KASSERT(LIST_EMPTY(&cd->cd_gclist)); 137 138 thmap_destroy(cd->cd_map); 139 kmem_free(cd, sizeof(npf_conndb_t)); 140 } 141 142 /* 143 * npf_conndb_lookup: find a connection given the key. 144 */ 145 npf_conn_t * 146 npf_conndb_lookup(npf_conndb_t *cd, const npf_connkey_t *ck, bool *forw) 147 { 148 const unsigned keylen = NPF_CONNKEY_LEN(ck); 149 npf_conn_t *con; 150 void *val; 151 152 /* 153 * Lookup the connection key in the key-value map. 154 */ 155 val = thmap_get(cd->cd_map, ck->ck_key, keylen); 156 if (!val) { 157 return NULL; 158 } 159 160 /* 161 * Determine whether this is the "forwards" or "backwards" key 162 * and clear the pointer tag. 163 */ 164 *forw = CONNDB_ISFORW_P(val); 165 con = CONNDB_GET_PTR(val); 166 KASSERT(con != NULL); 167 168 /* 169 * Acquire a reference and return the connection. 170 */ 171 atomic_inc_uint(&con->c_refcnt); 172 return con; 173 } 174 175 /* 176 * npf_conndb_insert: insert the key representing the connection. 177 * 178 * => Returns true on success and false on failure. 179 */ 180 bool 181 npf_conndb_insert(npf_conndb_t *cd, const npf_connkey_t *ck, 182 npf_conn_t *con, bool forw) 183 { 184 const unsigned keylen = NPF_CONNKEY_LEN(ck); 185 const uintptr_t tag = (CONNDB_FORW_BIT * !!forw); 186 void *val; 187 bool ok; 188 189 /* 190 * Tag the connection pointer if this is the "forwards" key. 191 */ 192 KASSERT(!CONNDB_ISFORW_P(con)); 193 val = (void *)((uintptr_t)(void *)con | tag); 194 195 int s = splsoftnet(); 196 ok = thmap_put(cd->cd_map, ck->ck_key, keylen, val) == val; 197 splx(s); 198 199 return ok; 200 } 201 202 /* 203 * npf_conndb_remove: find and delete connection key, returning the 204 * connection it represents. 205 */ 206 npf_conn_t * 207 npf_conndb_remove(npf_conndb_t *cd, npf_connkey_t *ck) 208 { 209 const unsigned keylen = NPF_CONNKEY_LEN(ck); 210 void *val; 211 212 int s = splsoftnet(); 213 val = thmap_del(cd->cd_map, ck->ck_key, keylen); 214 splx(s); 215 216 return CONNDB_GET_PTR(val); 217 } 218 219 /* 220 * npf_conndb_enqueue: atomically insert the connection into the 221 * singly-linked list of the "new" connections. 222 */ 223 void 224 npf_conndb_enqueue(npf_conndb_t *cd, npf_conn_t *con) 225 { 226 npf_conn_t *head; 227 228 do { 229 head = cd->cd_new; 230 con->c_next = head; 231 } while (atomic_cas_ptr(&cd->cd_new, head, con) != head); 232 } 233 234 /* 235 * npf_conndb_update: migrate all new connections to the list of all 236 * connections; this must also be performed on npf_conndb_getlist() 237 * to provide a complete list of connections. 238 */ 239 static void 240 npf_conndb_update(npf_conndb_t *cd) 241 { 242 npf_conn_t *con; 243 244 con = atomic_swap_ptr(&cd->cd_new, NULL); 245 while (con) { 246 npf_conn_t *next = con->c_next; // union 247 LIST_INSERT_HEAD(&cd->cd_list, con, c_entry); 248 con = next; 249 } 250 } 251 252 /* 253 * npf_conndb_getlist: return the list of all connections. 254 */ 255 npf_conn_t * 256 npf_conndb_getlist(npf_conndb_t *cd) 257 { 258 npf_conndb_update(cd); 259 return LIST_FIRST(&cd->cd_list); 260 } 261 262 /* 263 * npf_conndb_getnext: return the next connection, implementing 264 * the circular iteration. 265 */ 266 npf_conn_t * 267 npf_conndb_getnext(npf_conndb_t *cd, npf_conn_t *con) 268 { 269 /* Next.. */ 270 if (con == NULL || (con = LIST_NEXT(con, c_entry)) == NULL) { 271 con = LIST_FIRST(&cd->cd_list); 272 } 273 return con; 274 } 275 276 /* 277 * npf_conndb_gc_incr: incremental G/C of the expired connections. 278 */ 279 static void 280 npf_conndb_gc_incr(npf_t *npf, npf_conndb_t *cd, const time_t now) 281 { 282 const npf_conndb_params_t *params = npf->params[NPF_PARAMS_CONNDB]; 283 unsigned target = params->gc_step; 284 npf_conn_t *con; 285 286 /* 287 * Second, start from the "last" (marker) connection. 288 * We must initialise the marker if it is not set yet. 289 */ 290 if ((con = cd->cd_marker) == NULL) { 291 con = npf_conndb_getnext(cd, NULL); 292 cd->cd_marker = con; 293 } 294 295 /* 296 * Scan the connections: 297 * - Limit the scan to the G/C step size. 298 * - Stop if we scanned all of them. 299 * - Update the marker connection. 300 */ 301 while (con && target--) { 302 npf_conn_t *next = npf_conndb_getnext(cd, con); 303 304 /* 305 * Can we G/C this connection? 306 */ 307 if (npf_conn_expired(npf, con, now)) { 308 /* Yes: move to the G/C list. */ 309 LIST_REMOVE(con, c_entry); 310 LIST_INSERT_HEAD(&cd->cd_gclist, con, c_entry); 311 npf_conn_remove(cd, con); 312 313 /* This connection cannot be a new marker anymore. */ 314 if (con == next) { 315 next = NULL; 316 } 317 if (con == cd->cd_marker) { 318 cd->cd_marker = next; 319 con = next; 320 continue; 321 } 322 } 323 con = next; 324 325 /* 326 * Circular iteration: if we returned back to the 327 * marker connection, then stop. 328 */ 329 if (con == cd->cd_marker) { 330 break; 331 } 332 } 333 cd->cd_marker = con; 334 } 335 336 /* 337 * npf_conndb_gc: garbage collect the expired connections. 338 * 339 * => Must run in a single-threaded manner. 340 * => If 'flush' is true, then destroy all connections. 341 * => If 'sync' is true, then perform passive serialisation. 342 */ 343 void 344 npf_conndb_gc(npf_t *npf, npf_conndb_t *cd, bool flush, bool sync) 345 { 346 struct timespec tsnow; 347 npf_conn_t *con; 348 void *gcref; 349 350 getnanouptime(&tsnow); 351 352 /* First, migrate all new connections. */ 353 mutex_enter(&npf->conn_lock); 354 npf_conndb_update(cd); 355 if (flush) { 356 /* Just unlink and move all connections to the G/C list. */ 357 while ((con = LIST_FIRST(&cd->cd_list)) != NULL) { 358 LIST_REMOVE(con, c_entry); 359 LIST_INSERT_HEAD(&cd->cd_gclist, con, c_entry); 360 npf_conn_remove(cd, con); 361 } 362 cd->cd_marker = NULL; 363 } else { 364 /* Incremental G/C of the expired connections. */ 365 npf_conndb_gc_incr(npf, cd, tsnow.tv_sec); 366 } 367 mutex_exit(&npf->conn_lock); 368 369 /* 370 * Ensure it is safe to destroy the connections. 371 * Note: drop the conn_lock (see the lock order). 372 */ 373 gcref = thmap_stage_gc(cd->cd_map); 374 if (sync && (gcref || !LIST_EMPTY(&cd->cd_gclist))) { 375 npf_config_enter(npf); 376 npf_config_sync(npf); 377 npf_config_exit(npf); 378 } 379 thmap_gc(cd->cd_map, gcref); 380 381 /* 382 * If there is nothing to G/C, then reduce the worker interval. 383 * We do not go below the lower watermark. 384 */ 385 if (LIST_EMPTY(&cd->cd_gclist)) { 386 // TODO: cd->next_gc = MAX(cd->next_gc >> 1, NPF_MIN_GC_TIME); 387 return; 388 } 389 390 /* 391 * Garbage collect all expired connections. 392 * May need to wait for the references to drain. 393 */ 394 while ((con = LIST_FIRST(&cd->cd_gclist)) != NULL) { 395 /* 396 * Destroy only if removed and no references. Otherwise, 397 * just do it next time, unless we are destroying all. 398 */ 399 if (__predict_false(con->c_refcnt)) { 400 if (!flush) { 401 break; 402 } 403 kpause("npfcongc", false, 1, NULL); 404 continue; 405 } 406 LIST_REMOVE(con, c_entry); 407 npf_conn_destroy(npf, con); 408 } 409 } 410