xref: /netbsd-src/external/mpl/bind/dist/lib/dns/badcache.c (revision ae082add65442546470c0ba499a860ee89eed305)
1 /*	$NetBSD: badcache.c,v 1.6 2022/09/23 12:15:29 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 /*! \file */
17 
18 #include <inttypes.h>
19 #include <stdbool.h>
20 
21 #include <isc/buffer.h>
22 #include <isc/hash.h>
23 #include <isc/log.h>
24 #include <isc/mem.h>
25 #include <isc/mutex.h>
26 #include <isc/platform.h>
27 #include <isc/print.h>
28 #include <isc/rwlock.h>
29 #include <isc/string.h>
30 #include <isc/time.h>
31 #include <isc/util.h>
32 
33 #include <dns/badcache.h>
34 #include <dns/name.h>
35 #include <dns/rdatatype.h>
36 #include <dns/types.h>
37 
38 typedef struct dns_bcentry dns_bcentry_t;
39 
40 struct dns_badcache {
41 	unsigned int magic;
42 	isc_rwlock_t lock;
43 	isc_mem_t *mctx;
44 
45 	isc_mutex_t *tlocks;
46 	dns_bcentry_t **table;
47 
48 	atomic_uint_fast32_t count;
49 	atomic_uint_fast32_t sweep;
50 
51 	unsigned int minsize;
52 	unsigned int size;
53 };
54 
55 #define BADCACHE_MAGIC	  ISC_MAGIC('B', 'd', 'C', 'a')
56 #define VALID_BADCACHE(m) ISC_MAGIC_VALID(m, BADCACHE_MAGIC)
57 
58 struct dns_bcentry {
59 	dns_bcentry_t *next;
60 	dns_rdatatype_t type;
61 	isc_time_t expire;
62 	uint32_t flags;
63 	unsigned int hashval;
64 	dns_name_t name;
65 };
66 
67 static void
68 badcache_resize(dns_badcache_t *bc, isc_time_t *now);
69 
70 isc_result_t
71 dns_badcache_init(isc_mem_t *mctx, unsigned int size, dns_badcache_t **bcp) {
72 	dns_badcache_t *bc = NULL;
73 	unsigned int i;
74 
75 	REQUIRE(bcp != NULL && *bcp == NULL);
76 	REQUIRE(mctx != NULL);
77 
78 	bc = isc_mem_get(mctx, sizeof(dns_badcache_t));
79 	memset(bc, 0, sizeof(dns_badcache_t));
80 
81 	isc_mem_attach(mctx, &bc->mctx);
82 	isc_rwlock_init(&bc->lock, 0, 0);
83 
84 	bc->table = isc_mem_get(bc->mctx, sizeof(*bc->table) * size);
85 	bc->tlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * size);
86 	for (i = 0; i < size; i++) {
87 		isc_mutex_init(&bc->tlocks[i]);
88 	}
89 	bc->size = bc->minsize = size;
90 	memset(bc->table, 0, bc->size * sizeof(dns_bcentry_t *));
91 
92 	atomic_init(&bc->count, 0);
93 	atomic_init(&bc->sweep, 0);
94 	bc->magic = BADCACHE_MAGIC;
95 
96 	*bcp = bc;
97 	return (ISC_R_SUCCESS);
98 }
99 
100 void
101 dns_badcache_destroy(dns_badcache_t **bcp) {
102 	dns_badcache_t *bc;
103 	unsigned int i;
104 
105 	REQUIRE(bcp != NULL && *bcp != NULL);
106 	bc = *bcp;
107 	*bcp = NULL;
108 
109 	dns_badcache_flush(bc);
110 
111 	bc->magic = 0;
112 	isc_rwlock_destroy(&bc->lock);
113 	for (i = 0; i < bc->size; i++) {
114 		isc_mutex_destroy(&bc->tlocks[i]);
115 	}
116 	isc_mem_put(bc->mctx, bc->table, sizeof(dns_bcentry_t *) * bc->size);
117 	isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size);
118 	isc_mem_putanddetach(&bc->mctx, bc, sizeof(dns_badcache_t));
119 }
120 
121 static void
122 badcache_resize(dns_badcache_t *bc, isc_time_t *now) {
123 	dns_bcentry_t **newtable, *bad, *next;
124 	isc_mutex_t *newlocks;
125 	unsigned int newsize, i;
126 	bool grow;
127 
128 	RWLOCK(&bc->lock, isc_rwlocktype_write);
129 
130 	/*
131 	 * XXXWPK we will have a thundering herd problem here,
132 	 * as all threads will wait on the RWLOCK when there's
133 	 * a need to resize badcache.
134 	 * However, it happens so rarely it should not be a
135 	 * performance issue. This is because we double the
136 	 * size every time we grow it, and we don't shrink
137 	 * unless the number of entries really shrunk. In a
138 	 * high load situation, the number of badcache entries
139 	 * will eventually stabilize.
140 	 */
141 	if (atomic_load_relaxed(&bc->count) > bc->size * 8) {
142 		grow = true;
143 	} else if (atomic_load_relaxed(&bc->count) < bc->size * 2 &&
144 		   bc->size > bc->minsize)
145 	{
146 		grow = false;
147 	} else {
148 		/* Someone resized it already, bail. */
149 		RWUNLOCK(&bc->lock, isc_rwlocktype_write);
150 		return;
151 	}
152 
153 	if (grow) {
154 		newsize = bc->size * 2 + 1;
155 	} else {
156 		newsize = (bc->size - 1) / 2;
157 #ifdef __clang_analyzer__
158 		/*
159 		 * XXXWPK there's a bug in clang static analyzer -
160 		 * `value % newsize` is considered undefined even though
161 		 * we check if newsize is larger than 0. This helps.
162 		 */
163 		newsize += 1;
164 #endif
165 	}
166 	RUNTIME_CHECK(newsize > 0);
167 
168 	newtable = isc_mem_get(bc->mctx, sizeof(dns_bcentry_t *) * newsize);
169 	memset(newtable, 0, sizeof(dns_bcentry_t *) * newsize);
170 
171 	newlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * newsize);
172 
173 	/* Copy existing mutexes */
174 	for (i = 0; i < newsize && i < bc->size; i++) {
175 		newlocks[i] = bc->tlocks[i];
176 	}
177 	/* Initialize additional mutexes if we're growing */
178 	for (i = bc->size; i < newsize; i++) {
179 		isc_mutex_init(&newlocks[i]);
180 	}
181 	/* Destroy extra mutexes if we're shrinking */
182 	for (i = newsize; i < bc->size; i++) {
183 		isc_mutex_destroy(&bc->tlocks[i]);
184 	}
185 
186 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
187 		for (bad = bc->table[i]; bad != NULL; bad = next) {
188 			next = bad->next;
189 			if (isc_time_compare(&bad->expire, now) < 0) {
190 				isc_mem_put(bc->mctx, bad,
191 					    sizeof(*bad) + bad->name.length);
192 				atomic_fetch_sub_relaxed(&bc->count, 1);
193 			} else {
194 				bad->next = newtable[bad->hashval % newsize];
195 				newtable[bad->hashval % newsize] = bad;
196 			}
197 		}
198 		bc->table[i] = NULL;
199 	}
200 
201 	isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size);
202 	bc->tlocks = newlocks;
203 
204 	isc_mem_put(bc->mctx, bc->table, sizeof(*bc->table) * bc->size);
205 	bc->size = newsize;
206 	bc->table = newtable;
207 
208 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
209 }
210 
211 void
212 dns_badcache_add(dns_badcache_t *bc, const dns_name_t *name,
213 		 dns_rdatatype_t type, bool update, uint32_t flags,
214 		 isc_time_t *expire) {
215 	isc_result_t result;
216 	unsigned int hashval, hash;
217 	dns_bcentry_t *bad, *prev, *next;
218 	isc_time_t now;
219 	bool resize = false;
220 
221 	REQUIRE(VALID_BADCACHE(bc));
222 	REQUIRE(name != NULL);
223 	REQUIRE(expire != NULL);
224 
225 	RWLOCK(&bc->lock, isc_rwlocktype_read);
226 
227 	result = isc_time_now(&now);
228 	if (result != ISC_R_SUCCESS) {
229 		isc_time_settoepoch(&now);
230 	}
231 
232 	hashval = dns_name_hash(name, false);
233 	hash = hashval % bc->size;
234 	LOCK(&bc->tlocks[hash]);
235 	prev = NULL;
236 	for (bad = bc->table[hash]; bad != NULL; bad = next) {
237 		next = bad->next;
238 		if (bad->type == type && dns_name_equal(name, &bad->name)) {
239 			if (update) {
240 				bad->expire = *expire;
241 				bad->flags = flags;
242 			}
243 			break;
244 		}
245 		if (isc_time_compare(&bad->expire, &now) < 0) {
246 			if (prev == NULL) {
247 				bc->table[hash] = bad->next;
248 			} else {
249 				prev->next = bad->next;
250 			}
251 			isc_mem_put(bc->mctx, bad,
252 				    sizeof(*bad) + bad->name.length);
253 			atomic_fetch_sub_relaxed(&bc->count, 1);
254 		} else {
255 			prev = bad;
256 		}
257 	}
258 
259 	if (bad == NULL) {
260 		isc_buffer_t buffer;
261 		bad = isc_mem_get(bc->mctx, sizeof(*bad) + name->length);
262 		bad->type = type;
263 		bad->hashval = hashval;
264 		bad->expire = *expire;
265 		bad->flags = flags;
266 		isc_buffer_init(&buffer, bad + 1, name->length);
267 		dns_name_init(&bad->name, NULL);
268 		dns_name_copy(name, &bad->name, &buffer);
269 		bad->next = bc->table[hash];
270 		bc->table[hash] = bad;
271 		unsigned count = atomic_fetch_add_relaxed(&bc->count, 1);
272 		if ((count > bc->size * 8) ||
273 		    (count < bc->size * 2 && bc->size > bc->minsize)) {
274 			resize = true;
275 		}
276 	} else {
277 		bad->expire = *expire;
278 	}
279 
280 	UNLOCK(&bc->tlocks[hash]);
281 	RWUNLOCK(&bc->lock, isc_rwlocktype_read);
282 	if (resize) {
283 		badcache_resize(bc, &now);
284 	}
285 }
286 
287 bool
288 dns_badcache_find(dns_badcache_t *bc, const dns_name_t *name,
289 		  dns_rdatatype_t type, uint32_t *flagp, isc_time_t *now) {
290 	dns_bcentry_t *bad, *prev, *next;
291 	bool answer = false;
292 	unsigned int i;
293 	unsigned int hash;
294 
295 	REQUIRE(VALID_BADCACHE(bc));
296 	REQUIRE(name != NULL);
297 	REQUIRE(now != NULL);
298 
299 	RWLOCK(&bc->lock, isc_rwlocktype_read);
300 
301 	/*
302 	 * XXXMUKS: dns_name_equal() is expensive as it does a
303 	 * octet-by-octet comparison, and it can be made better in two
304 	 * ways here. First, lowercase the names (use
305 	 * dns_name_downcase() instead of dns_name_copy() in
306 	 * dns_badcache_add()) so that dns_name_caseequal() can be used
307 	 * which the compiler will emit as SIMD instructions. Second,
308 	 * don't put multiple copies of the same name in the chain (or
309 	 * multiple names will have to be matched for equality), but use
310 	 * name->link to store the type specific part.
311 	 */
312 
313 	if (atomic_load_relaxed(&bc->count) == 0) {
314 		goto skip;
315 	}
316 
317 	hash = dns_name_hash(name, false) % bc->size;
318 	prev = NULL;
319 	LOCK(&bc->tlocks[hash]);
320 	for (bad = bc->table[hash]; bad != NULL; bad = next) {
321 		next = bad->next;
322 		/*
323 		 * Search the hash list. Clean out expired records as we go.
324 		 */
325 		if (isc_time_compare(&bad->expire, now) < 0) {
326 			if (prev != NULL) {
327 				prev->next = bad->next;
328 			} else {
329 				bc->table[hash] = bad->next;
330 			}
331 
332 			isc_mem_put(bc->mctx, bad,
333 				    sizeof(*bad) + bad->name.length);
334 			atomic_fetch_sub(&bc->count, 1);
335 			continue;
336 		}
337 		if (bad->type == type && dns_name_equal(name, &bad->name)) {
338 			if (flagp != NULL) {
339 				*flagp = bad->flags;
340 			}
341 			answer = true;
342 			break;
343 		}
344 		prev = bad;
345 	}
346 	UNLOCK(&bc->tlocks[hash]);
347 skip:
348 
349 	/*
350 	 * Slow sweep to clean out stale records.
351 	 */
352 	i = atomic_fetch_add(&bc->sweep, 1) % bc->size;
353 	if (isc_mutex_trylock(&bc->tlocks[i]) == ISC_R_SUCCESS) {
354 		bad = bc->table[i];
355 		if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) {
356 			bc->table[i] = bad->next;
357 			isc_mem_put(bc->mctx, bad,
358 				    sizeof(*bad) + bad->name.length);
359 			atomic_fetch_sub_relaxed(&bc->count, 1);
360 		}
361 		UNLOCK(&bc->tlocks[i]);
362 	}
363 
364 	RWUNLOCK(&bc->lock, isc_rwlocktype_read);
365 	return (answer);
366 }
367 
368 void
369 dns_badcache_flush(dns_badcache_t *bc) {
370 	dns_bcentry_t *entry, *next;
371 	unsigned int i;
372 
373 	RWLOCK(&bc->lock, isc_rwlocktype_write);
374 	REQUIRE(VALID_BADCACHE(bc));
375 
376 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
377 		for (entry = bc->table[i]; entry != NULL; entry = next) {
378 			next = entry->next;
379 			isc_mem_put(bc->mctx, entry,
380 				    sizeof(*entry) + entry->name.length);
381 			atomic_fetch_sub_relaxed(&bc->count, 1);
382 		}
383 		bc->table[i] = NULL;
384 	}
385 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
386 }
387 
388 void
389 dns_badcache_flushname(dns_badcache_t *bc, const dns_name_t *name) {
390 	dns_bcentry_t *bad, *prev, *next;
391 	isc_result_t result;
392 	isc_time_t now;
393 	unsigned int hash;
394 
395 	REQUIRE(VALID_BADCACHE(bc));
396 	REQUIRE(name != NULL);
397 
398 	RWLOCK(&bc->lock, isc_rwlocktype_read);
399 
400 	result = isc_time_now(&now);
401 	if (result != ISC_R_SUCCESS) {
402 		isc_time_settoepoch(&now);
403 	}
404 	hash = dns_name_hash(name, false) % bc->size;
405 	LOCK(&bc->tlocks[hash]);
406 	prev = NULL;
407 	for (bad = bc->table[hash]; bad != NULL; bad = next) {
408 		int n;
409 		next = bad->next;
410 		n = isc_time_compare(&bad->expire, &now);
411 		if (n < 0 || dns_name_equal(name, &bad->name)) {
412 			if (prev == NULL) {
413 				bc->table[hash] = bad->next;
414 			} else {
415 				prev->next = bad->next;
416 			}
417 
418 			isc_mem_put(bc->mctx, bad,
419 				    sizeof(*bad) + bad->name.length);
420 			atomic_fetch_sub_relaxed(&bc->count, 1);
421 		} else {
422 			prev = bad;
423 		}
424 	}
425 	UNLOCK(&bc->tlocks[hash]);
426 
427 	RWUNLOCK(&bc->lock, isc_rwlocktype_read);
428 }
429 
430 void
431 dns_badcache_flushtree(dns_badcache_t *bc, const dns_name_t *name) {
432 	dns_bcentry_t *bad, *prev, *next;
433 	unsigned int i;
434 	int n;
435 	isc_time_t now;
436 	isc_result_t result;
437 
438 	REQUIRE(VALID_BADCACHE(bc));
439 	REQUIRE(name != NULL);
440 
441 	/*
442 	 * We write lock the tree to avoid relocking every node
443 	 * individually.
444 	 */
445 	RWLOCK(&bc->lock, isc_rwlocktype_write);
446 
447 	result = isc_time_now(&now);
448 	if (result != ISC_R_SUCCESS) {
449 		isc_time_settoepoch(&now);
450 	}
451 
452 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
453 		prev = NULL;
454 		for (bad = bc->table[i]; bad != NULL; bad = next) {
455 			next = bad->next;
456 			n = isc_time_compare(&bad->expire, &now);
457 			if (n < 0 || dns_name_issubdomain(&bad->name, name)) {
458 				if (prev == NULL) {
459 					bc->table[i] = bad->next;
460 				} else {
461 					prev->next = bad->next;
462 				}
463 
464 				isc_mem_put(bc->mctx, bad,
465 					    sizeof(*bad) + bad->name.length);
466 				atomic_fetch_sub_relaxed(&bc->count, 1);
467 			} else {
468 				prev = bad;
469 			}
470 		}
471 	}
472 
473 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
474 }
475 
476 void
477 dns_badcache_print(dns_badcache_t *bc, const char *cachename, FILE *fp) {
478 	char namebuf[DNS_NAME_FORMATSIZE];
479 	char typebuf[DNS_RDATATYPE_FORMATSIZE];
480 	dns_bcentry_t *bad, *next, *prev;
481 	isc_time_t now;
482 	unsigned int i;
483 	uint64_t t;
484 
485 	REQUIRE(VALID_BADCACHE(bc));
486 	REQUIRE(cachename != NULL);
487 	REQUIRE(fp != NULL);
488 
489 	/*
490 	 * We write lock the tree to avoid relocking every node
491 	 * individually.
492 	 */
493 	RWLOCK(&bc->lock, isc_rwlocktype_write);
494 	fprintf(fp, ";\n; %s\n;\n", cachename);
495 
496 	TIME_NOW(&now);
497 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
498 		prev = NULL;
499 		for (bad = bc->table[i]; bad != NULL; bad = next) {
500 			next = bad->next;
501 			if (isc_time_compare(&bad->expire, &now) < 0) {
502 				if (prev != NULL) {
503 					prev->next = bad->next;
504 				} else {
505 					bc->table[i] = bad->next;
506 				}
507 
508 				isc_mem_put(bc->mctx, bad,
509 					    sizeof(*bad) + bad->name.length);
510 				atomic_fetch_sub_relaxed(&bc->count, 1);
511 				continue;
512 			}
513 			prev = bad;
514 			dns_name_format(&bad->name, namebuf, sizeof(namebuf));
515 			dns_rdatatype_format(bad->type, typebuf,
516 					     sizeof(typebuf));
517 			t = isc_time_microdiff(&bad->expire, &now);
518 			t /= 1000;
519 			fprintf(fp,
520 				"; %s/%s [ttl "
521 				"%" PRIu64 "]\n",
522 				namebuf, typebuf, t);
523 		}
524 	}
525 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
526 }
527