xref: /netbsd-src/external/mpl/bind/dist/lib/dns/badcache.c (revision a45db23f655e22f0c2354600d3b3c2cb98abf2dc)
1 /*	$NetBSD: badcache.c,v 1.7 2023/01/25 21:43:29 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 /*! \file */
17 
18 #include <inttypes.h>
19 #include <stdbool.h>
20 
21 #include <isc/buffer.h>
22 #include <isc/hash.h>
23 #include <isc/log.h>
24 #include <isc/mem.h>
25 #include <isc/mutex.h>
26 #include <isc/platform.h>
27 #include <isc/print.h>
28 #include <isc/rwlock.h>
29 #include <isc/string.h>
30 #include <isc/time.h>
31 #include <isc/util.h>
32 
33 #include <dns/badcache.h>
34 #include <dns/name.h>
35 #include <dns/rdatatype.h>
36 #include <dns/types.h>
37 
38 typedef struct dns_bcentry dns_bcentry_t;
39 
40 struct dns_badcache {
41 	unsigned int magic;
42 	isc_rwlock_t lock;
43 	isc_mem_t *mctx;
44 
45 	isc_mutex_t *tlocks;
46 	dns_bcentry_t **table;
47 
48 	atomic_uint_fast32_t count;
49 	atomic_uint_fast32_t sweep;
50 
51 	unsigned int minsize;
52 	unsigned int size;
53 };
54 
55 #define BADCACHE_MAGIC	  ISC_MAGIC('B', 'd', 'C', 'a')
56 #define VALID_BADCACHE(m) ISC_MAGIC_VALID(m, BADCACHE_MAGIC)
57 
58 struct dns_bcentry {
59 	dns_bcentry_t *next;
60 	dns_rdatatype_t type;
61 	isc_time_t expire;
62 	uint32_t flags;
63 	unsigned int hashval;
64 	dns_name_t name;
65 };
66 
67 static void
68 badcache_resize(dns_badcache_t *bc, isc_time_t *now);
69 
70 isc_result_t
71 dns_badcache_init(isc_mem_t *mctx, unsigned int size, dns_badcache_t **bcp) {
72 	dns_badcache_t *bc = NULL;
73 	unsigned int i;
74 
75 	REQUIRE(bcp != NULL && *bcp == NULL);
76 	REQUIRE(mctx != NULL);
77 
78 	bc = isc_mem_get(mctx, sizeof(dns_badcache_t));
79 	memset(bc, 0, sizeof(dns_badcache_t));
80 
81 	isc_mem_attach(mctx, &bc->mctx);
82 	isc_rwlock_init(&bc->lock, 0, 0);
83 
84 	bc->table = isc_mem_get(bc->mctx, sizeof(*bc->table) * size);
85 	bc->tlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * size);
86 	for (i = 0; i < size; i++) {
87 		isc_mutex_init(&bc->tlocks[i]);
88 	}
89 	bc->size = bc->minsize = size;
90 	memset(bc->table, 0, bc->size * sizeof(dns_bcentry_t *));
91 
92 	atomic_init(&bc->count, 0);
93 	atomic_init(&bc->sweep, 0);
94 	bc->magic = BADCACHE_MAGIC;
95 
96 	*bcp = bc;
97 	return (ISC_R_SUCCESS);
98 }
99 
100 void
101 dns_badcache_destroy(dns_badcache_t **bcp) {
102 	dns_badcache_t *bc;
103 	unsigned int i;
104 
105 	REQUIRE(bcp != NULL && *bcp != NULL);
106 	bc = *bcp;
107 	*bcp = NULL;
108 
109 	dns_badcache_flush(bc);
110 
111 	bc->magic = 0;
112 	isc_rwlock_destroy(&bc->lock);
113 	for (i = 0; i < bc->size; i++) {
114 		isc_mutex_destroy(&bc->tlocks[i]);
115 	}
116 	isc_mem_put(bc->mctx, bc->table, sizeof(dns_bcentry_t *) * bc->size);
117 	isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size);
118 	isc_mem_putanddetach(&bc->mctx, bc, sizeof(dns_badcache_t));
119 }
120 
121 static void
122 badcache_resize(dns_badcache_t *bc, isc_time_t *now) {
123 	dns_bcentry_t **newtable, *bad, *next;
124 	isc_mutex_t *newlocks;
125 	unsigned int newsize, i;
126 	bool grow;
127 
128 	RWLOCK(&bc->lock, isc_rwlocktype_write);
129 
130 	/*
131 	 * XXXWPK we will have a thundering herd problem here,
132 	 * as all threads will wait on the RWLOCK when there's
133 	 * a need to resize badcache.
134 	 * However, it happens so rarely it should not be a
135 	 * performance issue. This is because we double the
136 	 * size every time we grow it, and we don't shrink
137 	 * unless the number of entries really shrunk. In a
138 	 * high load situation, the number of badcache entries
139 	 * will eventually stabilize.
140 	 */
141 	if (atomic_load_relaxed(&bc->count) > bc->size * 8) {
142 		grow = true;
143 	} else if (atomic_load_relaxed(&bc->count) < bc->size * 2 &&
144 		   bc->size > bc->minsize)
145 	{
146 		grow = false;
147 	} else {
148 		/* Someone resized it already, bail. */
149 		RWUNLOCK(&bc->lock, isc_rwlocktype_write);
150 		return;
151 	}
152 
153 	if (grow) {
154 		newsize = bc->size * 2 + 1;
155 	} else {
156 		newsize = (bc->size - 1) / 2;
157 #ifdef __clang_analyzer__
158 		/*
159 		 * XXXWPK there's a bug in clang static analyzer -
160 		 * `value % newsize` is considered undefined even though
161 		 * we check if newsize is larger than 0. This helps.
162 		 */
163 		newsize += 1;
164 #endif
165 	}
166 	RUNTIME_CHECK(newsize > 0);
167 
168 	newtable = isc_mem_get(bc->mctx, sizeof(dns_bcentry_t *) * newsize);
169 	memset(newtable, 0, sizeof(dns_bcentry_t *) * newsize);
170 
171 	newlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * newsize);
172 
173 	/* Copy existing mutexes */
174 	for (i = 0; i < newsize && i < bc->size; i++) {
175 		newlocks[i] = bc->tlocks[i];
176 	}
177 	/* Initialize additional mutexes if we're growing */
178 	for (i = bc->size; i < newsize; i++) {
179 		isc_mutex_init(&newlocks[i]);
180 	}
181 	/* Destroy extra mutexes if we're shrinking */
182 	for (i = newsize; i < bc->size; i++) {
183 		isc_mutex_destroy(&bc->tlocks[i]);
184 	}
185 
186 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
187 		for (bad = bc->table[i]; bad != NULL; bad = next) {
188 			next = bad->next;
189 			if (isc_time_compare(&bad->expire, now) < 0) {
190 				isc_mem_put(bc->mctx, bad,
191 					    sizeof(*bad) + bad->name.length);
192 				atomic_fetch_sub_relaxed(&bc->count, 1);
193 			} else {
194 				bad->next = newtable[bad->hashval % newsize];
195 				newtable[bad->hashval % newsize] = bad;
196 			}
197 		}
198 		bc->table[i] = NULL;
199 	}
200 
201 	isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size);
202 	bc->tlocks = newlocks;
203 
204 	isc_mem_put(bc->mctx, bc->table, sizeof(*bc->table) * bc->size);
205 	bc->size = newsize;
206 	bc->table = newtable;
207 
208 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
209 }
210 
211 void
212 dns_badcache_add(dns_badcache_t *bc, const dns_name_t *name,
213 		 dns_rdatatype_t type, bool update, uint32_t flags,
214 		 isc_time_t *expire) {
215 	isc_result_t result;
216 	unsigned int hashval, hash;
217 	dns_bcentry_t *bad, *prev, *next;
218 	isc_time_t now;
219 	bool resize = false;
220 
221 	REQUIRE(VALID_BADCACHE(bc));
222 	REQUIRE(name != NULL);
223 	REQUIRE(expire != NULL);
224 
225 	RWLOCK(&bc->lock, isc_rwlocktype_read);
226 
227 	result = isc_time_now(&now);
228 	if (result != ISC_R_SUCCESS) {
229 		isc_time_settoepoch(&now);
230 	}
231 
232 	hashval = dns_name_hash(name, false);
233 	hash = hashval % bc->size;
234 	LOCK(&bc->tlocks[hash]);
235 	prev = NULL;
236 	for (bad = bc->table[hash]; bad != NULL; bad = next) {
237 		next = bad->next;
238 		if (bad->type == type && dns_name_equal(name, &bad->name)) {
239 			if (update) {
240 				bad->expire = *expire;
241 				bad->flags = flags;
242 			}
243 			break;
244 		}
245 		if (isc_time_compare(&bad->expire, &now) < 0) {
246 			if (prev == NULL) {
247 				bc->table[hash] = bad->next;
248 			} else {
249 				prev->next = bad->next;
250 			}
251 			isc_mem_put(bc->mctx, bad,
252 				    sizeof(*bad) + bad->name.length);
253 			atomic_fetch_sub_relaxed(&bc->count, 1);
254 		} else {
255 			prev = bad;
256 		}
257 	}
258 
259 	if (bad == NULL) {
260 		isc_buffer_t buffer;
261 		bad = isc_mem_get(bc->mctx, sizeof(*bad) + name->length);
262 		bad->type = type;
263 		bad->hashval = hashval;
264 		bad->expire = *expire;
265 		bad->flags = flags;
266 		isc_buffer_init(&buffer, bad + 1, name->length);
267 		dns_name_init(&bad->name, NULL);
268 		dns_name_copy(name, &bad->name, &buffer);
269 		bad->next = bc->table[hash];
270 		bc->table[hash] = bad;
271 		unsigned count = atomic_fetch_add_relaxed(&bc->count, 1);
272 		if ((count > bc->size * 8) ||
273 		    (count < bc->size * 2 && bc->size > bc->minsize))
274 		{
275 			resize = true;
276 		}
277 	} else {
278 		bad->expire = *expire;
279 	}
280 
281 	UNLOCK(&bc->tlocks[hash]);
282 	RWUNLOCK(&bc->lock, isc_rwlocktype_read);
283 	if (resize) {
284 		badcache_resize(bc, &now);
285 	}
286 }
287 
288 bool
289 dns_badcache_find(dns_badcache_t *bc, const dns_name_t *name,
290 		  dns_rdatatype_t type, uint32_t *flagp, isc_time_t *now) {
291 	dns_bcentry_t *bad, *prev, *next;
292 	bool answer = false;
293 	unsigned int i;
294 	unsigned int hash;
295 
296 	REQUIRE(VALID_BADCACHE(bc));
297 	REQUIRE(name != NULL);
298 	REQUIRE(now != NULL);
299 
300 	RWLOCK(&bc->lock, isc_rwlocktype_read);
301 
302 	/*
303 	 * XXXMUKS: dns_name_equal() is expensive as it does a
304 	 * octet-by-octet comparison, and it can be made better in two
305 	 * ways here. First, lowercase the names (use
306 	 * dns_name_downcase() instead of dns_name_copy() in
307 	 * dns_badcache_add()) so that dns_name_caseequal() can be used
308 	 * which the compiler will emit as SIMD instructions. Second,
309 	 * don't put multiple copies of the same name in the chain (or
310 	 * multiple names will have to be matched for equality), but use
311 	 * name->link to store the type specific part.
312 	 */
313 
314 	if (atomic_load_relaxed(&bc->count) == 0) {
315 		goto skip;
316 	}
317 
318 	hash = dns_name_hash(name, false) % bc->size;
319 	prev = NULL;
320 	LOCK(&bc->tlocks[hash]);
321 	for (bad = bc->table[hash]; bad != NULL; bad = next) {
322 		next = bad->next;
323 		/*
324 		 * Search the hash list. Clean out expired records as we go.
325 		 */
326 		if (isc_time_compare(&bad->expire, now) < 0) {
327 			if (prev != NULL) {
328 				prev->next = bad->next;
329 			} else {
330 				bc->table[hash] = bad->next;
331 			}
332 
333 			isc_mem_put(bc->mctx, bad,
334 				    sizeof(*bad) + bad->name.length);
335 			atomic_fetch_sub(&bc->count, 1);
336 			continue;
337 		}
338 		if (bad->type == type && dns_name_equal(name, &bad->name)) {
339 			if (flagp != NULL) {
340 				*flagp = bad->flags;
341 			}
342 			answer = true;
343 			break;
344 		}
345 		prev = bad;
346 	}
347 	UNLOCK(&bc->tlocks[hash]);
348 skip:
349 
350 	/*
351 	 * Slow sweep to clean out stale records.
352 	 */
353 	i = atomic_fetch_add(&bc->sweep, 1) % bc->size;
354 	if (isc_mutex_trylock(&bc->tlocks[i]) == ISC_R_SUCCESS) {
355 		bad = bc->table[i];
356 		if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) {
357 			bc->table[i] = bad->next;
358 			isc_mem_put(bc->mctx, bad,
359 				    sizeof(*bad) + bad->name.length);
360 			atomic_fetch_sub_relaxed(&bc->count, 1);
361 		}
362 		UNLOCK(&bc->tlocks[i]);
363 	}
364 
365 	RWUNLOCK(&bc->lock, isc_rwlocktype_read);
366 	return (answer);
367 }
368 
369 void
370 dns_badcache_flush(dns_badcache_t *bc) {
371 	dns_bcentry_t *entry, *next;
372 	unsigned int i;
373 
374 	RWLOCK(&bc->lock, isc_rwlocktype_write);
375 	REQUIRE(VALID_BADCACHE(bc));
376 
377 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
378 		for (entry = bc->table[i]; entry != NULL; entry = next) {
379 			next = entry->next;
380 			isc_mem_put(bc->mctx, entry,
381 				    sizeof(*entry) + entry->name.length);
382 			atomic_fetch_sub_relaxed(&bc->count, 1);
383 		}
384 		bc->table[i] = NULL;
385 	}
386 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
387 }
388 
389 void
390 dns_badcache_flushname(dns_badcache_t *bc, const dns_name_t *name) {
391 	dns_bcentry_t *bad, *prev, *next;
392 	isc_result_t result;
393 	isc_time_t now;
394 	unsigned int hash;
395 
396 	REQUIRE(VALID_BADCACHE(bc));
397 	REQUIRE(name != NULL);
398 
399 	RWLOCK(&bc->lock, isc_rwlocktype_read);
400 
401 	result = isc_time_now(&now);
402 	if (result != ISC_R_SUCCESS) {
403 		isc_time_settoepoch(&now);
404 	}
405 	hash = dns_name_hash(name, false) % bc->size;
406 	LOCK(&bc->tlocks[hash]);
407 	prev = NULL;
408 	for (bad = bc->table[hash]; bad != NULL; bad = next) {
409 		int n;
410 		next = bad->next;
411 		n = isc_time_compare(&bad->expire, &now);
412 		if (n < 0 || dns_name_equal(name, &bad->name)) {
413 			if (prev == NULL) {
414 				bc->table[hash] = bad->next;
415 			} else {
416 				prev->next = bad->next;
417 			}
418 
419 			isc_mem_put(bc->mctx, bad,
420 				    sizeof(*bad) + bad->name.length);
421 			atomic_fetch_sub_relaxed(&bc->count, 1);
422 		} else {
423 			prev = bad;
424 		}
425 	}
426 	UNLOCK(&bc->tlocks[hash]);
427 
428 	RWUNLOCK(&bc->lock, isc_rwlocktype_read);
429 }
430 
431 void
432 dns_badcache_flushtree(dns_badcache_t *bc, const dns_name_t *name) {
433 	dns_bcentry_t *bad, *prev, *next;
434 	unsigned int i;
435 	int n;
436 	isc_time_t now;
437 	isc_result_t result;
438 
439 	REQUIRE(VALID_BADCACHE(bc));
440 	REQUIRE(name != NULL);
441 
442 	/*
443 	 * We write lock the tree to avoid relocking every node
444 	 * individually.
445 	 */
446 	RWLOCK(&bc->lock, isc_rwlocktype_write);
447 
448 	result = isc_time_now(&now);
449 	if (result != ISC_R_SUCCESS) {
450 		isc_time_settoepoch(&now);
451 	}
452 
453 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
454 		prev = NULL;
455 		for (bad = bc->table[i]; bad != NULL; bad = next) {
456 			next = bad->next;
457 			n = isc_time_compare(&bad->expire, &now);
458 			if (n < 0 || dns_name_issubdomain(&bad->name, name)) {
459 				if (prev == NULL) {
460 					bc->table[i] = bad->next;
461 				} else {
462 					prev->next = bad->next;
463 				}
464 
465 				isc_mem_put(bc->mctx, bad,
466 					    sizeof(*bad) + bad->name.length);
467 				atomic_fetch_sub_relaxed(&bc->count, 1);
468 			} else {
469 				prev = bad;
470 			}
471 		}
472 	}
473 
474 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
475 }
476 
477 void
478 dns_badcache_print(dns_badcache_t *bc, const char *cachename, FILE *fp) {
479 	char namebuf[DNS_NAME_FORMATSIZE];
480 	char typebuf[DNS_RDATATYPE_FORMATSIZE];
481 	dns_bcentry_t *bad, *next, *prev;
482 	isc_time_t now;
483 	unsigned int i;
484 	uint64_t t;
485 
486 	REQUIRE(VALID_BADCACHE(bc));
487 	REQUIRE(cachename != NULL);
488 	REQUIRE(fp != NULL);
489 
490 	/*
491 	 * We write lock the tree to avoid relocking every node
492 	 * individually.
493 	 */
494 	RWLOCK(&bc->lock, isc_rwlocktype_write);
495 	fprintf(fp, ";\n; %s\n;\n", cachename);
496 
497 	TIME_NOW(&now);
498 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
499 		prev = NULL;
500 		for (bad = bc->table[i]; bad != NULL; bad = next) {
501 			next = bad->next;
502 			if (isc_time_compare(&bad->expire, &now) < 0) {
503 				if (prev != NULL) {
504 					prev->next = bad->next;
505 				} else {
506 					bc->table[i] = bad->next;
507 				}
508 
509 				isc_mem_put(bc->mctx, bad,
510 					    sizeof(*bad) + bad->name.length);
511 				atomic_fetch_sub_relaxed(&bc->count, 1);
512 				continue;
513 			}
514 			prev = bad;
515 			dns_name_format(&bad->name, namebuf, sizeof(namebuf));
516 			dns_rdatatype_format(bad->type, typebuf,
517 					     sizeof(typebuf));
518 			t = isc_time_microdiff(&bad->expire, &now);
519 			t /= 1000;
520 			fprintf(fp,
521 				"; %s/%s [ttl "
522 				"%" PRIu64 "]\n",
523 				namebuf, typebuf, t);
524 		}
525 	}
526 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
527 }
528