xref: /netbsd-src/sys/net/npf/npf_conndb.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /*-
2  * Copyright (c) 2010-2019 The NetBSD Foundation, Inc.
3  * All rights reserved.
4  *
5  * This material is based upon work partially supported by The
6  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  * POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * NPF connection storage.
32  *
33  * Warning (not applicable for the userspace npfkern):
34  *
35  *	thmap is partially lock-free data structure that uses its own
36  *	spin-locks on the writer side (insert/delete operations).
37  *
38  *	The relevant interrupt priority level (IPL) must be set and the
39  *	kernel preemption disabled across the critical paths to prevent
40  *	deadlocks and priority inversion problems.  These are essentially
41  *	the same guarantees as a spinning mutex(9) would provide.
42  *
43  *	This is achieved with SPL routines splsoftnet() and splx() around
44  *	the thmap_del() and thmap_put() calls.
45  */
46 
47 #ifdef _KERNEL
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: npf_conndb.c,v 1.7 2019/12/14 15:21:51 riastradh Exp $");
50 
51 #include <sys/param.h>
52 #include <sys/types.h>
53 
54 #include <sys/atomic.h>
55 #include <sys/kmem.h>
56 #include <sys/thmap.h>
57 #endif
58 
59 #define __NPF_CONN_PRIVATE
60 #include "npf_conn.h"
61 #include "npf_impl.h"
62 
63 struct npf_conndb {
64 	thmap_t *		cd_map;
65 
66 	/*
67 	 * There are three lists for connections: new, all and G/C.
68 	 *
69 	 * New connections are atomically inserted into the "new-list".
70 	 * The G/C worker will move them to the doubly-linked list of all
71 	 * active connections.
72 	 */
73 	npf_conn_t *		cd_new;
74 	LIST_HEAD(, npf_conn)	cd_list;
75 	LIST_HEAD(, npf_conn)	cd_gclist;
76 
77 	/* The last inspected connection (for circular iteration). */
78 	npf_conn_t *		cd_marker;
79 };
80 
81 typedef struct {
82 	int		gc_step;
83 	int		_reserved0;
84 } npf_conndb_params_t;
85 
86 /*
87  * Pointer tag for connection keys which represent the "forwards" entry.
88  */
89 #define	CONNDB_FORW_BIT		((uintptr_t)0x1)
90 #define	CONNDB_ISFORW_P(p)	(((uintptr_t)(p) & CONNDB_FORW_BIT) != 0)
91 #define	CONNDB_GET_PTR(p)	((void *)((uintptr_t)(p) & ~CONNDB_FORW_BIT))
92 
93 void
94 npf_conndb_sysinit(npf_t *npf)
95 {
96 	npf_conndb_params_t *params = npf_param_allocgroup(npf,
97 	    NPF_PARAMS_CONNDB, sizeof(npf_conndb_params_t));
98 	npf_param_t param_map[] = {
99 		{
100 			"gc.step",
101 			&params->gc_step,
102 			.default_val = 256,
103 			.min = 1, .max = INT_MAX
104 		}
105 	};
106 	npf_param_register(npf, param_map, __arraycount(param_map));
107 }
108 
109 void
110 npf_conndb_sysfini(npf_t *npf)
111 {
112 	const size_t len = sizeof(npf_conndb_params_t);
113 	npf_param_freegroup(npf, NPF_PARAMS_CONNDB, len);
114 }
115 
116 npf_conndb_t *
117 npf_conndb_create(void)
118 {
119 	npf_conndb_t *cd;
120 
121 	cd = kmem_zalloc(sizeof(npf_conndb_t), KM_SLEEP);
122 	cd->cd_map = thmap_create(0, NULL, THMAP_NOCOPY);
123 	KASSERT(cd->cd_map != NULL);
124 
125 	LIST_INIT(&cd->cd_list);
126 	LIST_INIT(&cd->cd_gclist);
127 	return cd;
128 }
129 
130 void
131 npf_conndb_destroy(npf_conndb_t *cd)
132 {
133 	KASSERT(cd->cd_new == NULL);
134 	KASSERT(cd->cd_marker == NULL);
135 	KASSERT(LIST_EMPTY(&cd->cd_list));
136 	KASSERT(LIST_EMPTY(&cd->cd_gclist));
137 
138 	thmap_destroy(cd->cd_map);
139 	kmem_free(cd, sizeof(npf_conndb_t));
140 }
141 
142 /*
143  * npf_conndb_lookup: find a connection given the key.
144  */
145 npf_conn_t *
146 npf_conndb_lookup(npf_conndb_t *cd, const npf_connkey_t *ck, bool *forw)
147 {
148 	const unsigned keylen = NPF_CONNKEY_LEN(ck);
149 	npf_conn_t *con;
150 	void *val;
151 
152 	/*
153 	 * Lookup the connection key in the key-value map.
154 	 */
155 	val = thmap_get(cd->cd_map, ck->ck_key, keylen);
156 	if (!val) {
157 		return NULL;
158 	}
159 
160 	/*
161 	 * Determine whether this is the "forwards" or "backwards" key
162 	 * and clear the pointer tag.
163 	 */
164 	*forw = CONNDB_ISFORW_P(val);
165 	con = CONNDB_GET_PTR(val);
166 	KASSERT(con != NULL);
167 
168 	/*
169 	 * Acquire a reference and return the connection.
170 	 */
171 	atomic_inc_uint(&con->c_refcnt);
172 	return con;
173 }
174 
175 /*
176  * npf_conndb_insert: insert the key representing the connection.
177  *
178  * => Returns true on success and false on failure.
179  */
180 bool
181 npf_conndb_insert(npf_conndb_t *cd, const npf_connkey_t *ck,
182     npf_conn_t *con, bool forw)
183 {
184 	const unsigned keylen = NPF_CONNKEY_LEN(ck);
185 	const uintptr_t tag = (CONNDB_FORW_BIT * !!forw);
186 	void *val;
187 	bool ok;
188 
189 	/*
190 	 * Tag the connection pointer if this is the "forwards" key.
191 	 */
192 	KASSERT(!CONNDB_ISFORW_P(con));
193 	val = (void *)((uintptr_t)(void *)con | tag);
194 
195 	int s = splsoftnet();
196 	ok = thmap_put(cd->cd_map, ck->ck_key, keylen, val) == val;
197 	splx(s);
198 
199 	return ok;
200 }
201 
202 /*
203  * npf_conndb_remove: find and delete connection key, returning the
204  * connection it represents.
205  */
206 npf_conn_t *
207 npf_conndb_remove(npf_conndb_t *cd, npf_connkey_t *ck)
208 {
209 	const unsigned keylen = NPF_CONNKEY_LEN(ck);
210 	void *val;
211 
212 	int s = splsoftnet();
213 	val = thmap_del(cd->cd_map, ck->ck_key, keylen);
214 	splx(s);
215 
216 	return CONNDB_GET_PTR(val);
217 }
218 
219 /*
220  * npf_conndb_enqueue: atomically insert the connection into the
221  * singly-linked list of the "new" connections.
222  */
223 void
224 npf_conndb_enqueue(npf_conndb_t *cd, npf_conn_t *con)
225 {
226 	npf_conn_t *head;
227 
228 	do {
229 		head = cd->cd_new;
230 		con->c_next = head;
231 	} while (atomic_cas_ptr(&cd->cd_new, head, con) != head);
232 }
233 
234 /*
235  * npf_conndb_update: migrate all new connections to the list of all
236  * connections; this must also be performed on npf_conndb_getlist()
237  * to provide a complete list of connections.
238  */
239 static void
240 npf_conndb_update(npf_conndb_t *cd)
241 {
242 	npf_conn_t *con;
243 
244 	con = atomic_swap_ptr(&cd->cd_new, NULL);
245 	while (con) {
246 		npf_conn_t *next = con->c_next; // union
247 		LIST_INSERT_HEAD(&cd->cd_list, con, c_entry);
248 		con = next;
249 	}
250 }
251 
252 /*
253  * npf_conndb_getlist: return the list of all connections.
254  */
255 npf_conn_t *
256 npf_conndb_getlist(npf_conndb_t *cd)
257 {
258 	npf_conndb_update(cd);
259 	return LIST_FIRST(&cd->cd_list);
260 }
261 
262 /*
263  * npf_conndb_getnext: return the next connection, implementing
264  * the circular iteration.
265  */
266 npf_conn_t *
267 npf_conndb_getnext(npf_conndb_t *cd, npf_conn_t *con)
268 {
269 	/* Next.. */
270 	if (con == NULL || (con = LIST_NEXT(con, c_entry)) == NULL) {
271 		con = LIST_FIRST(&cd->cd_list);
272 	}
273 	return con;
274 }
275 
276 /*
277  * npf_conndb_gc_incr: incremental G/C of the expired connections.
278  */
279 static void
280 npf_conndb_gc_incr(npf_t *npf, npf_conndb_t *cd, const time_t now)
281 {
282 	const npf_conndb_params_t *params = npf->params[NPF_PARAMS_CONNDB];
283 	unsigned target = params->gc_step;
284 	npf_conn_t *con;
285 
286 	/*
287 	 * Second, start from the "last" (marker) connection.
288 	 * We must initialise the marker if it is not set yet.
289 	 */
290 	if ((con = cd->cd_marker) == NULL) {
291 		con = npf_conndb_getnext(cd, NULL);
292 		cd->cd_marker = con;
293 	}
294 
295 	/*
296 	 * Scan the connections:
297 	 * - Limit the scan to the G/C step size.
298 	 * - Stop if we scanned all of them.
299 	 * - Update the marker connection.
300 	 */
301 	while (con && target--) {
302 		npf_conn_t *next = npf_conndb_getnext(cd, con);
303 
304 		/*
305 		 * Can we G/C this connection?
306 		 */
307 		if (npf_conn_expired(npf, con, now)) {
308 			/* Yes: move to the G/C list. */
309 			LIST_REMOVE(con, c_entry);
310 			LIST_INSERT_HEAD(&cd->cd_gclist, con, c_entry);
311 			npf_conn_remove(cd, con);
312 
313 			/* This connection cannot be a new marker anymore. */
314 			if (con == next) {
315 				next = NULL;
316 			}
317 			if (con == cd->cd_marker) {
318 				cd->cd_marker = next;
319 				con = next;
320 				continue;
321 			}
322 		}
323 		con = next;
324 
325 		/*
326 		 * Circular iteration: if we returned back to the
327 		 * marker connection, then stop.
328 		 */
329 		if (con == cd->cd_marker) {
330 			break;
331 		}
332 	}
333 	cd->cd_marker = con;
334 }
335 
336 /*
337  * npf_conndb_gc: garbage collect the expired connections.
338  *
339  * => Must run in a single-threaded manner.
340  * => If 'flush' is true, then destroy all connections.
341  * => If 'sync' is true, then perform passive serialisation.
342  */
343 void
344 npf_conndb_gc(npf_t *npf, npf_conndb_t *cd, bool flush, bool sync)
345 {
346 	struct timespec tsnow;
347 	npf_conn_t *con;
348 	void *gcref;
349 
350 	getnanouptime(&tsnow);
351 
352 	/* First, migrate all new connections. */
353 	mutex_enter(&npf->conn_lock);
354 	npf_conndb_update(cd);
355 	if (flush) {
356 		/* Just unlink and move all connections to the G/C list. */
357 		while ((con = LIST_FIRST(&cd->cd_list)) != NULL) {
358 			LIST_REMOVE(con, c_entry);
359 			LIST_INSERT_HEAD(&cd->cd_gclist, con, c_entry);
360 			npf_conn_remove(cd, con);
361 		}
362 		cd->cd_marker = NULL;
363 	} else {
364 		/* Incremental G/C of the expired connections. */
365 		npf_conndb_gc_incr(npf, cd, tsnow.tv_sec);
366 	}
367 	mutex_exit(&npf->conn_lock);
368 
369 	/*
370 	 * Ensure it is safe to destroy the connections.
371 	 * Note: drop the conn_lock (see the lock order).
372 	 */
373 	gcref = thmap_stage_gc(cd->cd_map);
374 	if (sync && (gcref || !LIST_EMPTY(&cd->cd_gclist))) {
375 		npf_config_enter(npf);
376 		npf_config_sync(npf);
377 		npf_config_exit(npf);
378 	}
379 	thmap_gc(cd->cd_map, gcref);
380 
381 	/*
382 	 * If there is nothing to G/C, then reduce the worker interval.
383 	 * We do not go below the lower watermark.
384 	 */
385 	if (LIST_EMPTY(&cd->cd_gclist)) {
386 		// TODO: cd->next_gc = MAX(cd->next_gc >> 1, NPF_MIN_GC_TIME);
387 		return;
388 	}
389 
390 	/*
391 	 * Garbage collect all expired connections.
392 	 * May need to wait for the references to drain.
393 	 */
394 	while ((con = LIST_FIRST(&cd->cd_gclist)) != NULL) {
395 		/*
396 		 * Destroy only if removed and no references.  Otherwise,
397 		 * just do it next time, unless we are destroying all.
398 		 */
399 		if (__predict_false(con->c_refcnt)) {
400 			if (!flush) {
401 				break;
402 			}
403 			kpause("npfcongc", false, 1, NULL);
404 			continue;
405 		}
406 		LIST_REMOVE(con, c_entry);
407 		npf_conn_destroy(npf, con);
408 	}
409 }
410