1 /* $NetBSD: badcache.c,v 1.7 2023/01/25 21:43:29 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 /*! \file */ 17 18 #include <inttypes.h> 19 #include <stdbool.h> 20 21 #include <isc/buffer.h> 22 #include <isc/hash.h> 23 #include <isc/log.h> 24 #include <isc/mem.h> 25 #include <isc/mutex.h> 26 #include <isc/platform.h> 27 #include <isc/print.h> 28 #include <isc/rwlock.h> 29 #include <isc/string.h> 30 #include <isc/time.h> 31 #include <isc/util.h> 32 33 #include <dns/badcache.h> 34 #include <dns/name.h> 35 #include <dns/rdatatype.h> 36 #include <dns/types.h> 37 38 typedef struct dns_bcentry dns_bcentry_t; 39 40 struct dns_badcache { 41 unsigned int magic; 42 isc_rwlock_t lock; 43 isc_mem_t *mctx; 44 45 isc_mutex_t *tlocks; 46 dns_bcentry_t **table; 47 48 atomic_uint_fast32_t count; 49 atomic_uint_fast32_t sweep; 50 51 unsigned int minsize; 52 unsigned int size; 53 }; 54 55 #define BADCACHE_MAGIC ISC_MAGIC('B', 'd', 'C', 'a') 56 #define VALID_BADCACHE(m) ISC_MAGIC_VALID(m, BADCACHE_MAGIC) 57 58 struct dns_bcentry { 59 dns_bcentry_t *next; 60 dns_rdatatype_t type; 61 isc_time_t expire; 62 uint32_t flags; 63 unsigned int hashval; 64 dns_name_t name; 65 }; 66 67 static void 68 badcache_resize(dns_badcache_t *bc, isc_time_t *now); 69 70 isc_result_t 71 dns_badcache_init(isc_mem_t *mctx, unsigned int size, dns_badcache_t **bcp) { 72 dns_badcache_t *bc = NULL; 73 unsigned int i; 74 75 REQUIRE(bcp != NULL && *bcp == NULL); 76 REQUIRE(mctx != NULL); 77 78 bc = isc_mem_get(mctx, sizeof(dns_badcache_t)); 79 memset(bc, 0, sizeof(dns_badcache_t)); 80 81 isc_mem_attach(mctx, &bc->mctx); 82 isc_rwlock_init(&bc->lock, 0, 0); 83 84 bc->table = isc_mem_get(bc->mctx, sizeof(*bc->table) * size); 85 bc->tlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * size); 86 for (i = 0; i < size; i++) { 87 isc_mutex_init(&bc->tlocks[i]); 88 } 89 bc->size = bc->minsize = size; 90 memset(bc->table, 0, bc->size * sizeof(dns_bcentry_t *)); 91 92 atomic_init(&bc->count, 0); 93 atomic_init(&bc->sweep, 0); 94 bc->magic = BADCACHE_MAGIC; 95 96 *bcp = bc; 97 return (ISC_R_SUCCESS); 98 } 99 100 void 101 dns_badcache_destroy(dns_badcache_t **bcp) { 102 dns_badcache_t *bc; 103 unsigned int i; 104 105 REQUIRE(bcp != NULL && *bcp != NULL); 106 bc = *bcp; 107 *bcp = NULL; 108 109 dns_badcache_flush(bc); 110 111 bc->magic = 0; 112 isc_rwlock_destroy(&bc->lock); 113 for (i = 0; i < bc->size; i++) { 114 isc_mutex_destroy(&bc->tlocks[i]); 115 } 116 isc_mem_put(bc->mctx, bc->table, sizeof(dns_bcentry_t *) * bc->size); 117 isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size); 118 isc_mem_putanddetach(&bc->mctx, bc, sizeof(dns_badcache_t)); 119 } 120 121 static void 122 badcache_resize(dns_badcache_t *bc, isc_time_t *now) { 123 dns_bcentry_t **newtable, *bad, *next; 124 isc_mutex_t *newlocks; 125 unsigned int newsize, i; 126 bool grow; 127 128 RWLOCK(&bc->lock, isc_rwlocktype_write); 129 130 /* 131 * XXXWPK we will have a thundering herd problem here, 132 * as all threads will wait on the RWLOCK when there's 133 * a need to resize badcache. 134 * However, it happens so rarely it should not be a 135 * performance issue. This is because we double the 136 * size every time we grow it, and we don't shrink 137 * unless the number of entries really shrunk. In a 138 * high load situation, the number of badcache entries 139 * will eventually stabilize. 140 */ 141 if (atomic_load_relaxed(&bc->count) > bc->size * 8) { 142 grow = true; 143 } else if (atomic_load_relaxed(&bc->count) < bc->size * 2 && 144 bc->size > bc->minsize) 145 { 146 grow = false; 147 } else { 148 /* Someone resized it already, bail. */ 149 RWUNLOCK(&bc->lock, isc_rwlocktype_write); 150 return; 151 } 152 153 if (grow) { 154 newsize = bc->size * 2 + 1; 155 } else { 156 newsize = (bc->size - 1) / 2; 157 #ifdef __clang_analyzer__ 158 /* 159 * XXXWPK there's a bug in clang static analyzer - 160 * `value % newsize` is considered undefined even though 161 * we check if newsize is larger than 0. This helps. 162 */ 163 newsize += 1; 164 #endif 165 } 166 RUNTIME_CHECK(newsize > 0); 167 168 newtable = isc_mem_get(bc->mctx, sizeof(dns_bcentry_t *) * newsize); 169 memset(newtable, 0, sizeof(dns_bcentry_t *) * newsize); 170 171 newlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * newsize); 172 173 /* Copy existing mutexes */ 174 for (i = 0; i < newsize && i < bc->size; i++) { 175 newlocks[i] = bc->tlocks[i]; 176 } 177 /* Initialize additional mutexes if we're growing */ 178 for (i = bc->size; i < newsize; i++) { 179 isc_mutex_init(&newlocks[i]); 180 } 181 /* Destroy extra mutexes if we're shrinking */ 182 for (i = newsize; i < bc->size; i++) { 183 isc_mutex_destroy(&bc->tlocks[i]); 184 } 185 186 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) { 187 for (bad = bc->table[i]; bad != NULL; bad = next) { 188 next = bad->next; 189 if (isc_time_compare(&bad->expire, now) < 0) { 190 isc_mem_put(bc->mctx, bad, 191 sizeof(*bad) + bad->name.length); 192 atomic_fetch_sub_relaxed(&bc->count, 1); 193 } else { 194 bad->next = newtable[bad->hashval % newsize]; 195 newtable[bad->hashval % newsize] = bad; 196 } 197 } 198 bc->table[i] = NULL; 199 } 200 201 isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size); 202 bc->tlocks = newlocks; 203 204 isc_mem_put(bc->mctx, bc->table, sizeof(*bc->table) * bc->size); 205 bc->size = newsize; 206 bc->table = newtable; 207 208 RWUNLOCK(&bc->lock, isc_rwlocktype_write); 209 } 210 211 void 212 dns_badcache_add(dns_badcache_t *bc, const dns_name_t *name, 213 dns_rdatatype_t type, bool update, uint32_t flags, 214 isc_time_t *expire) { 215 isc_result_t result; 216 unsigned int hashval, hash; 217 dns_bcentry_t *bad, *prev, *next; 218 isc_time_t now; 219 bool resize = false; 220 221 REQUIRE(VALID_BADCACHE(bc)); 222 REQUIRE(name != NULL); 223 REQUIRE(expire != NULL); 224 225 RWLOCK(&bc->lock, isc_rwlocktype_read); 226 227 result = isc_time_now(&now); 228 if (result != ISC_R_SUCCESS) { 229 isc_time_settoepoch(&now); 230 } 231 232 hashval = dns_name_hash(name, false); 233 hash = hashval % bc->size; 234 LOCK(&bc->tlocks[hash]); 235 prev = NULL; 236 for (bad = bc->table[hash]; bad != NULL; bad = next) { 237 next = bad->next; 238 if (bad->type == type && dns_name_equal(name, &bad->name)) { 239 if (update) { 240 bad->expire = *expire; 241 bad->flags = flags; 242 } 243 break; 244 } 245 if (isc_time_compare(&bad->expire, &now) < 0) { 246 if (prev == NULL) { 247 bc->table[hash] = bad->next; 248 } else { 249 prev->next = bad->next; 250 } 251 isc_mem_put(bc->mctx, bad, 252 sizeof(*bad) + bad->name.length); 253 atomic_fetch_sub_relaxed(&bc->count, 1); 254 } else { 255 prev = bad; 256 } 257 } 258 259 if (bad == NULL) { 260 isc_buffer_t buffer; 261 bad = isc_mem_get(bc->mctx, sizeof(*bad) + name->length); 262 bad->type = type; 263 bad->hashval = hashval; 264 bad->expire = *expire; 265 bad->flags = flags; 266 isc_buffer_init(&buffer, bad + 1, name->length); 267 dns_name_init(&bad->name, NULL); 268 dns_name_copy(name, &bad->name, &buffer); 269 bad->next = bc->table[hash]; 270 bc->table[hash] = bad; 271 unsigned count = atomic_fetch_add_relaxed(&bc->count, 1); 272 if ((count > bc->size * 8) || 273 (count < bc->size * 2 && bc->size > bc->minsize)) 274 { 275 resize = true; 276 } 277 } else { 278 bad->expire = *expire; 279 } 280 281 UNLOCK(&bc->tlocks[hash]); 282 RWUNLOCK(&bc->lock, isc_rwlocktype_read); 283 if (resize) { 284 badcache_resize(bc, &now); 285 } 286 } 287 288 bool 289 dns_badcache_find(dns_badcache_t *bc, const dns_name_t *name, 290 dns_rdatatype_t type, uint32_t *flagp, isc_time_t *now) { 291 dns_bcentry_t *bad, *prev, *next; 292 bool answer = false; 293 unsigned int i; 294 unsigned int hash; 295 296 REQUIRE(VALID_BADCACHE(bc)); 297 REQUIRE(name != NULL); 298 REQUIRE(now != NULL); 299 300 RWLOCK(&bc->lock, isc_rwlocktype_read); 301 302 /* 303 * XXXMUKS: dns_name_equal() is expensive as it does a 304 * octet-by-octet comparison, and it can be made better in two 305 * ways here. First, lowercase the names (use 306 * dns_name_downcase() instead of dns_name_copy() in 307 * dns_badcache_add()) so that dns_name_caseequal() can be used 308 * which the compiler will emit as SIMD instructions. Second, 309 * don't put multiple copies of the same name in the chain (or 310 * multiple names will have to be matched for equality), but use 311 * name->link to store the type specific part. 312 */ 313 314 if (atomic_load_relaxed(&bc->count) == 0) { 315 goto skip; 316 } 317 318 hash = dns_name_hash(name, false) % bc->size; 319 prev = NULL; 320 LOCK(&bc->tlocks[hash]); 321 for (bad = bc->table[hash]; bad != NULL; bad = next) { 322 next = bad->next; 323 /* 324 * Search the hash list. Clean out expired records as we go. 325 */ 326 if (isc_time_compare(&bad->expire, now) < 0) { 327 if (prev != NULL) { 328 prev->next = bad->next; 329 } else { 330 bc->table[hash] = bad->next; 331 } 332 333 isc_mem_put(bc->mctx, bad, 334 sizeof(*bad) + bad->name.length); 335 atomic_fetch_sub(&bc->count, 1); 336 continue; 337 } 338 if (bad->type == type && dns_name_equal(name, &bad->name)) { 339 if (flagp != NULL) { 340 *flagp = bad->flags; 341 } 342 answer = true; 343 break; 344 } 345 prev = bad; 346 } 347 UNLOCK(&bc->tlocks[hash]); 348 skip: 349 350 /* 351 * Slow sweep to clean out stale records. 352 */ 353 i = atomic_fetch_add(&bc->sweep, 1) % bc->size; 354 if (isc_mutex_trylock(&bc->tlocks[i]) == ISC_R_SUCCESS) { 355 bad = bc->table[i]; 356 if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) { 357 bc->table[i] = bad->next; 358 isc_mem_put(bc->mctx, bad, 359 sizeof(*bad) + bad->name.length); 360 atomic_fetch_sub_relaxed(&bc->count, 1); 361 } 362 UNLOCK(&bc->tlocks[i]); 363 } 364 365 RWUNLOCK(&bc->lock, isc_rwlocktype_read); 366 return (answer); 367 } 368 369 void 370 dns_badcache_flush(dns_badcache_t *bc) { 371 dns_bcentry_t *entry, *next; 372 unsigned int i; 373 374 RWLOCK(&bc->lock, isc_rwlocktype_write); 375 REQUIRE(VALID_BADCACHE(bc)); 376 377 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) { 378 for (entry = bc->table[i]; entry != NULL; entry = next) { 379 next = entry->next; 380 isc_mem_put(bc->mctx, entry, 381 sizeof(*entry) + entry->name.length); 382 atomic_fetch_sub_relaxed(&bc->count, 1); 383 } 384 bc->table[i] = NULL; 385 } 386 RWUNLOCK(&bc->lock, isc_rwlocktype_write); 387 } 388 389 void 390 dns_badcache_flushname(dns_badcache_t *bc, const dns_name_t *name) { 391 dns_bcentry_t *bad, *prev, *next; 392 isc_result_t result; 393 isc_time_t now; 394 unsigned int hash; 395 396 REQUIRE(VALID_BADCACHE(bc)); 397 REQUIRE(name != NULL); 398 399 RWLOCK(&bc->lock, isc_rwlocktype_read); 400 401 result = isc_time_now(&now); 402 if (result != ISC_R_SUCCESS) { 403 isc_time_settoepoch(&now); 404 } 405 hash = dns_name_hash(name, false) % bc->size; 406 LOCK(&bc->tlocks[hash]); 407 prev = NULL; 408 for (bad = bc->table[hash]; bad != NULL; bad = next) { 409 int n; 410 next = bad->next; 411 n = isc_time_compare(&bad->expire, &now); 412 if (n < 0 || dns_name_equal(name, &bad->name)) { 413 if (prev == NULL) { 414 bc->table[hash] = bad->next; 415 } else { 416 prev->next = bad->next; 417 } 418 419 isc_mem_put(bc->mctx, bad, 420 sizeof(*bad) + bad->name.length); 421 atomic_fetch_sub_relaxed(&bc->count, 1); 422 } else { 423 prev = bad; 424 } 425 } 426 UNLOCK(&bc->tlocks[hash]); 427 428 RWUNLOCK(&bc->lock, isc_rwlocktype_read); 429 } 430 431 void 432 dns_badcache_flushtree(dns_badcache_t *bc, const dns_name_t *name) { 433 dns_bcentry_t *bad, *prev, *next; 434 unsigned int i; 435 int n; 436 isc_time_t now; 437 isc_result_t result; 438 439 REQUIRE(VALID_BADCACHE(bc)); 440 REQUIRE(name != NULL); 441 442 /* 443 * We write lock the tree to avoid relocking every node 444 * individually. 445 */ 446 RWLOCK(&bc->lock, isc_rwlocktype_write); 447 448 result = isc_time_now(&now); 449 if (result != ISC_R_SUCCESS) { 450 isc_time_settoepoch(&now); 451 } 452 453 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) { 454 prev = NULL; 455 for (bad = bc->table[i]; bad != NULL; bad = next) { 456 next = bad->next; 457 n = isc_time_compare(&bad->expire, &now); 458 if (n < 0 || dns_name_issubdomain(&bad->name, name)) { 459 if (prev == NULL) { 460 bc->table[i] = bad->next; 461 } else { 462 prev->next = bad->next; 463 } 464 465 isc_mem_put(bc->mctx, bad, 466 sizeof(*bad) + bad->name.length); 467 atomic_fetch_sub_relaxed(&bc->count, 1); 468 } else { 469 prev = bad; 470 } 471 } 472 } 473 474 RWUNLOCK(&bc->lock, isc_rwlocktype_write); 475 } 476 477 void 478 dns_badcache_print(dns_badcache_t *bc, const char *cachename, FILE *fp) { 479 char namebuf[DNS_NAME_FORMATSIZE]; 480 char typebuf[DNS_RDATATYPE_FORMATSIZE]; 481 dns_bcentry_t *bad, *next, *prev; 482 isc_time_t now; 483 unsigned int i; 484 uint64_t t; 485 486 REQUIRE(VALID_BADCACHE(bc)); 487 REQUIRE(cachename != NULL); 488 REQUIRE(fp != NULL); 489 490 /* 491 * We write lock the tree to avoid relocking every node 492 * individually. 493 */ 494 RWLOCK(&bc->lock, isc_rwlocktype_write); 495 fprintf(fp, ";\n; %s\n;\n", cachename); 496 497 TIME_NOW(&now); 498 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) { 499 prev = NULL; 500 for (bad = bc->table[i]; bad != NULL; bad = next) { 501 next = bad->next; 502 if (isc_time_compare(&bad->expire, &now) < 0) { 503 if (prev != NULL) { 504 prev->next = bad->next; 505 } else { 506 bc->table[i] = bad->next; 507 } 508 509 isc_mem_put(bc->mctx, bad, 510 sizeof(*bad) + bad->name.length); 511 atomic_fetch_sub_relaxed(&bc->count, 1); 512 continue; 513 } 514 prev = bad; 515 dns_name_format(&bad->name, namebuf, sizeof(namebuf)); 516 dns_rdatatype_format(bad->type, typebuf, 517 sizeof(typebuf)); 518 t = isc_time_microdiff(&bad->expire, &now); 519 t /= 1000; 520 fprintf(fp, 521 "; %s/%s [ttl " 522 "%" PRIu64 "]\n", 523 namebuf, typebuf, t); 524 } 525 } 526 RWUNLOCK(&bc->lock, isc_rwlocktype_write); 527 } 528