1 /* $NetBSD: badcache.c,v 1.6 2022/09/23 12:15:29 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 /*! \file */ 17 18 #include <inttypes.h> 19 #include <stdbool.h> 20 21 #include <isc/buffer.h> 22 #include <isc/hash.h> 23 #include <isc/log.h> 24 #include <isc/mem.h> 25 #include <isc/mutex.h> 26 #include <isc/platform.h> 27 #include <isc/print.h> 28 #include <isc/rwlock.h> 29 #include <isc/string.h> 30 #include <isc/time.h> 31 #include <isc/util.h> 32 33 #include <dns/badcache.h> 34 #include <dns/name.h> 35 #include <dns/rdatatype.h> 36 #include <dns/types.h> 37 38 typedef struct dns_bcentry dns_bcentry_t; 39 40 struct dns_badcache { 41 unsigned int magic; 42 isc_rwlock_t lock; 43 isc_mem_t *mctx; 44 45 isc_mutex_t *tlocks; 46 dns_bcentry_t **table; 47 48 atomic_uint_fast32_t count; 49 atomic_uint_fast32_t sweep; 50 51 unsigned int minsize; 52 unsigned int size; 53 }; 54 55 #define BADCACHE_MAGIC ISC_MAGIC('B', 'd', 'C', 'a') 56 #define VALID_BADCACHE(m) ISC_MAGIC_VALID(m, BADCACHE_MAGIC) 57 58 struct dns_bcentry { 59 dns_bcentry_t *next; 60 dns_rdatatype_t type; 61 isc_time_t expire; 62 uint32_t flags; 63 unsigned int hashval; 64 dns_name_t name; 65 }; 66 67 static void 68 badcache_resize(dns_badcache_t *bc, isc_time_t *now); 69 70 isc_result_t 71 dns_badcache_init(isc_mem_t *mctx, unsigned int size, dns_badcache_t **bcp) { 72 dns_badcache_t *bc = NULL; 73 unsigned int i; 74 75 REQUIRE(bcp != NULL && *bcp == NULL); 76 REQUIRE(mctx != NULL); 77 78 bc = isc_mem_get(mctx, sizeof(dns_badcache_t)); 79 memset(bc, 0, sizeof(dns_badcache_t)); 80 81 isc_mem_attach(mctx, &bc->mctx); 82 isc_rwlock_init(&bc->lock, 0, 0); 83 84 bc->table = isc_mem_get(bc->mctx, sizeof(*bc->table) * size); 85 bc->tlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * size); 86 for (i = 0; i < size; i++) { 87 isc_mutex_init(&bc->tlocks[i]); 88 } 89 bc->size = bc->minsize = size; 90 memset(bc->table, 0, bc->size * sizeof(dns_bcentry_t *)); 91 92 atomic_init(&bc->count, 0); 93 atomic_init(&bc->sweep, 0); 94 bc->magic = BADCACHE_MAGIC; 95 96 *bcp = bc; 97 return (ISC_R_SUCCESS); 98 } 99 100 void 101 dns_badcache_destroy(dns_badcache_t **bcp) { 102 dns_badcache_t *bc; 103 unsigned int i; 104 105 REQUIRE(bcp != NULL && *bcp != NULL); 106 bc = *bcp; 107 *bcp = NULL; 108 109 dns_badcache_flush(bc); 110 111 bc->magic = 0; 112 isc_rwlock_destroy(&bc->lock); 113 for (i = 0; i < bc->size; i++) { 114 isc_mutex_destroy(&bc->tlocks[i]); 115 } 116 isc_mem_put(bc->mctx, bc->table, sizeof(dns_bcentry_t *) * bc->size); 117 isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size); 118 isc_mem_putanddetach(&bc->mctx, bc, sizeof(dns_badcache_t)); 119 } 120 121 static void 122 badcache_resize(dns_badcache_t *bc, isc_time_t *now) { 123 dns_bcentry_t **newtable, *bad, *next; 124 isc_mutex_t *newlocks; 125 unsigned int newsize, i; 126 bool grow; 127 128 RWLOCK(&bc->lock, isc_rwlocktype_write); 129 130 /* 131 * XXXWPK we will have a thundering herd problem here, 132 * as all threads will wait on the RWLOCK when there's 133 * a need to resize badcache. 134 * However, it happens so rarely it should not be a 135 * performance issue. This is because we double the 136 * size every time we grow it, and we don't shrink 137 * unless the number of entries really shrunk. In a 138 * high load situation, the number of badcache entries 139 * will eventually stabilize. 140 */ 141 if (atomic_load_relaxed(&bc->count) > bc->size * 8) { 142 grow = true; 143 } else if (atomic_load_relaxed(&bc->count) < bc->size * 2 && 144 bc->size > bc->minsize) 145 { 146 grow = false; 147 } else { 148 /* Someone resized it already, bail. */ 149 RWUNLOCK(&bc->lock, isc_rwlocktype_write); 150 return; 151 } 152 153 if (grow) { 154 newsize = bc->size * 2 + 1; 155 } else { 156 newsize = (bc->size - 1) / 2; 157 #ifdef __clang_analyzer__ 158 /* 159 * XXXWPK there's a bug in clang static analyzer - 160 * `value % newsize` is considered undefined even though 161 * we check if newsize is larger than 0. This helps. 162 */ 163 newsize += 1; 164 #endif 165 } 166 RUNTIME_CHECK(newsize > 0); 167 168 newtable = isc_mem_get(bc->mctx, sizeof(dns_bcentry_t *) * newsize); 169 memset(newtable, 0, sizeof(dns_bcentry_t *) * newsize); 170 171 newlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * newsize); 172 173 /* Copy existing mutexes */ 174 for (i = 0; i < newsize && i < bc->size; i++) { 175 newlocks[i] = bc->tlocks[i]; 176 } 177 /* Initialize additional mutexes if we're growing */ 178 for (i = bc->size; i < newsize; i++) { 179 isc_mutex_init(&newlocks[i]); 180 } 181 /* Destroy extra mutexes if we're shrinking */ 182 for (i = newsize; i < bc->size; i++) { 183 isc_mutex_destroy(&bc->tlocks[i]); 184 } 185 186 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) { 187 for (bad = bc->table[i]; bad != NULL; bad = next) { 188 next = bad->next; 189 if (isc_time_compare(&bad->expire, now) < 0) { 190 isc_mem_put(bc->mctx, bad, 191 sizeof(*bad) + bad->name.length); 192 atomic_fetch_sub_relaxed(&bc->count, 1); 193 } else { 194 bad->next = newtable[bad->hashval % newsize]; 195 newtable[bad->hashval % newsize] = bad; 196 } 197 } 198 bc->table[i] = NULL; 199 } 200 201 isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size); 202 bc->tlocks = newlocks; 203 204 isc_mem_put(bc->mctx, bc->table, sizeof(*bc->table) * bc->size); 205 bc->size = newsize; 206 bc->table = newtable; 207 208 RWUNLOCK(&bc->lock, isc_rwlocktype_write); 209 } 210 211 void 212 dns_badcache_add(dns_badcache_t *bc, const dns_name_t *name, 213 dns_rdatatype_t type, bool update, uint32_t flags, 214 isc_time_t *expire) { 215 isc_result_t result; 216 unsigned int hashval, hash; 217 dns_bcentry_t *bad, *prev, *next; 218 isc_time_t now; 219 bool resize = false; 220 221 REQUIRE(VALID_BADCACHE(bc)); 222 REQUIRE(name != NULL); 223 REQUIRE(expire != NULL); 224 225 RWLOCK(&bc->lock, isc_rwlocktype_read); 226 227 result = isc_time_now(&now); 228 if (result != ISC_R_SUCCESS) { 229 isc_time_settoepoch(&now); 230 } 231 232 hashval = dns_name_hash(name, false); 233 hash = hashval % bc->size; 234 LOCK(&bc->tlocks[hash]); 235 prev = NULL; 236 for (bad = bc->table[hash]; bad != NULL; bad = next) { 237 next = bad->next; 238 if (bad->type == type && dns_name_equal(name, &bad->name)) { 239 if (update) { 240 bad->expire = *expire; 241 bad->flags = flags; 242 } 243 break; 244 } 245 if (isc_time_compare(&bad->expire, &now) < 0) { 246 if (prev == NULL) { 247 bc->table[hash] = bad->next; 248 } else { 249 prev->next = bad->next; 250 } 251 isc_mem_put(bc->mctx, bad, 252 sizeof(*bad) + bad->name.length); 253 atomic_fetch_sub_relaxed(&bc->count, 1); 254 } else { 255 prev = bad; 256 } 257 } 258 259 if (bad == NULL) { 260 isc_buffer_t buffer; 261 bad = isc_mem_get(bc->mctx, sizeof(*bad) + name->length); 262 bad->type = type; 263 bad->hashval = hashval; 264 bad->expire = *expire; 265 bad->flags = flags; 266 isc_buffer_init(&buffer, bad + 1, name->length); 267 dns_name_init(&bad->name, NULL); 268 dns_name_copy(name, &bad->name, &buffer); 269 bad->next = bc->table[hash]; 270 bc->table[hash] = bad; 271 unsigned count = atomic_fetch_add_relaxed(&bc->count, 1); 272 if ((count > bc->size * 8) || 273 (count < bc->size * 2 && bc->size > bc->minsize)) { 274 resize = true; 275 } 276 } else { 277 bad->expire = *expire; 278 } 279 280 UNLOCK(&bc->tlocks[hash]); 281 RWUNLOCK(&bc->lock, isc_rwlocktype_read); 282 if (resize) { 283 badcache_resize(bc, &now); 284 } 285 } 286 287 bool 288 dns_badcache_find(dns_badcache_t *bc, const dns_name_t *name, 289 dns_rdatatype_t type, uint32_t *flagp, isc_time_t *now) { 290 dns_bcentry_t *bad, *prev, *next; 291 bool answer = false; 292 unsigned int i; 293 unsigned int hash; 294 295 REQUIRE(VALID_BADCACHE(bc)); 296 REQUIRE(name != NULL); 297 REQUIRE(now != NULL); 298 299 RWLOCK(&bc->lock, isc_rwlocktype_read); 300 301 /* 302 * XXXMUKS: dns_name_equal() is expensive as it does a 303 * octet-by-octet comparison, and it can be made better in two 304 * ways here. First, lowercase the names (use 305 * dns_name_downcase() instead of dns_name_copy() in 306 * dns_badcache_add()) so that dns_name_caseequal() can be used 307 * which the compiler will emit as SIMD instructions. Second, 308 * don't put multiple copies of the same name in the chain (or 309 * multiple names will have to be matched for equality), but use 310 * name->link to store the type specific part. 311 */ 312 313 if (atomic_load_relaxed(&bc->count) == 0) { 314 goto skip; 315 } 316 317 hash = dns_name_hash(name, false) % bc->size; 318 prev = NULL; 319 LOCK(&bc->tlocks[hash]); 320 for (bad = bc->table[hash]; bad != NULL; bad = next) { 321 next = bad->next; 322 /* 323 * Search the hash list. Clean out expired records as we go. 324 */ 325 if (isc_time_compare(&bad->expire, now) < 0) { 326 if (prev != NULL) { 327 prev->next = bad->next; 328 } else { 329 bc->table[hash] = bad->next; 330 } 331 332 isc_mem_put(bc->mctx, bad, 333 sizeof(*bad) + bad->name.length); 334 atomic_fetch_sub(&bc->count, 1); 335 continue; 336 } 337 if (bad->type == type && dns_name_equal(name, &bad->name)) { 338 if (flagp != NULL) { 339 *flagp = bad->flags; 340 } 341 answer = true; 342 break; 343 } 344 prev = bad; 345 } 346 UNLOCK(&bc->tlocks[hash]); 347 skip: 348 349 /* 350 * Slow sweep to clean out stale records. 351 */ 352 i = atomic_fetch_add(&bc->sweep, 1) % bc->size; 353 if (isc_mutex_trylock(&bc->tlocks[i]) == ISC_R_SUCCESS) { 354 bad = bc->table[i]; 355 if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) { 356 bc->table[i] = bad->next; 357 isc_mem_put(bc->mctx, bad, 358 sizeof(*bad) + bad->name.length); 359 atomic_fetch_sub_relaxed(&bc->count, 1); 360 } 361 UNLOCK(&bc->tlocks[i]); 362 } 363 364 RWUNLOCK(&bc->lock, isc_rwlocktype_read); 365 return (answer); 366 } 367 368 void 369 dns_badcache_flush(dns_badcache_t *bc) { 370 dns_bcentry_t *entry, *next; 371 unsigned int i; 372 373 RWLOCK(&bc->lock, isc_rwlocktype_write); 374 REQUIRE(VALID_BADCACHE(bc)); 375 376 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) { 377 for (entry = bc->table[i]; entry != NULL; entry = next) { 378 next = entry->next; 379 isc_mem_put(bc->mctx, entry, 380 sizeof(*entry) + entry->name.length); 381 atomic_fetch_sub_relaxed(&bc->count, 1); 382 } 383 bc->table[i] = NULL; 384 } 385 RWUNLOCK(&bc->lock, isc_rwlocktype_write); 386 } 387 388 void 389 dns_badcache_flushname(dns_badcache_t *bc, const dns_name_t *name) { 390 dns_bcentry_t *bad, *prev, *next; 391 isc_result_t result; 392 isc_time_t now; 393 unsigned int hash; 394 395 REQUIRE(VALID_BADCACHE(bc)); 396 REQUIRE(name != NULL); 397 398 RWLOCK(&bc->lock, isc_rwlocktype_read); 399 400 result = isc_time_now(&now); 401 if (result != ISC_R_SUCCESS) { 402 isc_time_settoepoch(&now); 403 } 404 hash = dns_name_hash(name, false) % bc->size; 405 LOCK(&bc->tlocks[hash]); 406 prev = NULL; 407 for (bad = bc->table[hash]; bad != NULL; bad = next) { 408 int n; 409 next = bad->next; 410 n = isc_time_compare(&bad->expire, &now); 411 if (n < 0 || dns_name_equal(name, &bad->name)) { 412 if (prev == NULL) { 413 bc->table[hash] = bad->next; 414 } else { 415 prev->next = bad->next; 416 } 417 418 isc_mem_put(bc->mctx, bad, 419 sizeof(*bad) + bad->name.length); 420 atomic_fetch_sub_relaxed(&bc->count, 1); 421 } else { 422 prev = bad; 423 } 424 } 425 UNLOCK(&bc->tlocks[hash]); 426 427 RWUNLOCK(&bc->lock, isc_rwlocktype_read); 428 } 429 430 void 431 dns_badcache_flushtree(dns_badcache_t *bc, const dns_name_t *name) { 432 dns_bcentry_t *bad, *prev, *next; 433 unsigned int i; 434 int n; 435 isc_time_t now; 436 isc_result_t result; 437 438 REQUIRE(VALID_BADCACHE(bc)); 439 REQUIRE(name != NULL); 440 441 /* 442 * We write lock the tree to avoid relocking every node 443 * individually. 444 */ 445 RWLOCK(&bc->lock, isc_rwlocktype_write); 446 447 result = isc_time_now(&now); 448 if (result != ISC_R_SUCCESS) { 449 isc_time_settoepoch(&now); 450 } 451 452 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) { 453 prev = NULL; 454 for (bad = bc->table[i]; bad != NULL; bad = next) { 455 next = bad->next; 456 n = isc_time_compare(&bad->expire, &now); 457 if (n < 0 || dns_name_issubdomain(&bad->name, name)) { 458 if (prev == NULL) { 459 bc->table[i] = bad->next; 460 } else { 461 prev->next = bad->next; 462 } 463 464 isc_mem_put(bc->mctx, bad, 465 sizeof(*bad) + bad->name.length); 466 atomic_fetch_sub_relaxed(&bc->count, 1); 467 } else { 468 prev = bad; 469 } 470 } 471 } 472 473 RWUNLOCK(&bc->lock, isc_rwlocktype_write); 474 } 475 476 void 477 dns_badcache_print(dns_badcache_t *bc, const char *cachename, FILE *fp) { 478 char namebuf[DNS_NAME_FORMATSIZE]; 479 char typebuf[DNS_RDATATYPE_FORMATSIZE]; 480 dns_bcentry_t *bad, *next, *prev; 481 isc_time_t now; 482 unsigned int i; 483 uint64_t t; 484 485 REQUIRE(VALID_BADCACHE(bc)); 486 REQUIRE(cachename != NULL); 487 REQUIRE(fp != NULL); 488 489 /* 490 * We write lock the tree to avoid relocking every node 491 * individually. 492 */ 493 RWLOCK(&bc->lock, isc_rwlocktype_write); 494 fprintf(fp, ";\n; %s\n;\n", cachename); 495 496 TIME_NOW(&now); 497 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) { 498 prev = NULL; 499 for (bad = bc->table[i]; bad != NULL; bad = next) { 500 next = bad->next; 501 if (isc_time_compare(&bad->expire, &now) < 0) { 502 if (prev != NULL) { 503 prev->next = bad->next; 504 } else { 505 bc->table[i] = bad->next; 506 } 507 508 isc_mem_put(bc->mctx, bad, 509 sizeof(*bad) + bad->name.length); 510 atomic_fetch_sub_relaxed(&bc->count, 1); 511 continue; 512 } 513 prev = bad; 514 dns_name_format(&bad->name, namebuf, sizeof(namebuf)); 515 dns_rdatatype_format(bad->type, typebuf, 516 sizeof(typebuf)); 517 t = isc_time_microdiff(&bad->expire, &now); 518 t /= 1000; 519 fprintf(fp, 520 "; %s/%s [ttl " 521 "%" PRIu64 "]\n", 522 namebuf, typebuf, t); 523 } 524 } 525 RWUNLOCK(&bc->lock, isc_rwlocktype_write); 526 } 527