1 /* $NetBSD: histo.c,v 1.3 2025/01/27 02:16:05 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 #include <assert.h> 17 #include <errno.h> 18 #include <math.h> 19 #include <stdbool.h> 20 #include <stdint.h> 21 #include <stdio.h> 22 #include <stdlib.h> 23 #include <string.h> 24 25 #include <isc/atomic.h> 26 #include <isc/histo.h> 27 #include <isc/magic.h> 28 #include <isc/mem.h> 29 #include <isc/tid.h> 30 31 #define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o') 32 #define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC) 33 #define HISTOMULTI_MAGIC ISC_MAGIC('H', 'g', 'M', 't') 34 #define HISTOMULTI_VALID(p) ISC_MAGIC_VALID(p, HISTOMULTI_MAGIC) 35 36 /* 37 * Natural logarithms of 2 and 10 for converting precisions between 38 * binary and decimal significant figures 39 */ 40 #define LN_2 0.693147180559945309 41 #define LN_10 2.302585092994045684 42 43 /* 44 * The chunks array has a static size for simplicity, fixed as the 45 * number of bits in a value. That means we waste a little extra space 46 * that could be saved by omitting the exponents that are covered by 47 * `sigbits`. The following macros calculate (at run time) the exact 48 * number of buckets when we need to do accurate bounds checks. 49 * 50 * For a discussion of the floating point terminology, see the 51 * commmentary on `value_to_key()` below. 52 * 53 * We often use the variable names `c` for chunk and `b` for bucket. 54 */ 55 #define CHUNKS 64 56 57 #define DENORMALS(hg) ((hg)->sigbits - 1) 58 #define MANTISSAS(hg) (1 << (hg)->sigbits) 59 #define EXPONENTS(hg) (CHUNKS - DENORMALS(hg)) 60 #define BUCKETS(hg) (EXPONENTS(hg) * MANTISSAS(hg)) 61 62 #define MAXCHUNK(hg) EXPONENTS(hg) 63 #define CHUNKSIZE(hg) MANTISSAS(hg) 64 65 #ifdef _LP64 66 typedef atomic_uint_fast64_t hg_bucket_t; 67 #else 68 typedef atomic_uint_fast32_t hg_bucket_t; 69 #endif 70 typedef atomic_ptr(hg_bucket_t) hg_chunk_t; 71 72 struct isc_histo { 73 uint magic; 74 uint sigbits; 75 isc_mem_t *mctx; 76 hg_chunk_t chunk[CHUNKS]; 77 }; 78 79 struct isc_histomulti { 80 uint magic; 81 uint size; 82 isc_histo_t *hg[]; 83 }; 84 85 /**********************************************************************/ 86 87 void 88 isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp) { 89 REQUIRE(sigbits >= ISC_HISTO_MINBITS); 90 REQUIRE(sigbits <= ISC_HISTO_MAXBITS); 91 REQUIRE(hgp != NULL); 92 REQUIRE(*hgp == NULL); 93 94 isc_histo_t *hg = isc_mem_get(mctx, sizeof(*hg)); 95 *hg = (isc_histo_t){ 96 .magic = HISTO_MAGIC, 97 .sigbits = sigbits, 98 }; 99 isc_mem_attach(mctx, &hg->mctx); 100 101 *hgp = hg; 102 } 103 104 void 105 isc_histo_destroy(isc_histo_t **hgp) { 106 REQUIRE(hgp != NULL); 107 REQUIRE(HISTO_VALID(*hgp)); 108 109 isc_histo_t *hg = *hgp; 110 *hgp = NULL; 111 112 for (uint c = 0; c < CHUNKS; c++) { 113 if (hg->chunk[c] != NULL) { 114 isc_mem_cput(hg->mctx, hg->chunk[c], CHUNKSIZE(hg), 115 sizeof(hg_bucket_t)); 116 } 117 } 118 isc_mem_putanddetach(&hg->mctx, hg, sizeof(*hg)); 119 } 120 121 /**********************************************************************/ 122 123 uint 124 isc_histo_sigbits(isc_histo_t *hg) { 125 REQUIRE(HISTO_VALID(hg)); 126 return hg->sigbits; 127 } 128 129 /* 130 * use precomputed logs and builtins to avoid linking with libm 131 */ 132 133 uint 134 isc_histo_bits_to_digits(uint bits) { 135 REQUIRE(bits >= ISC_HISTO_MINBITS); 136 REQUIRE(bits <= ISC_HISTO_MAXBITS); 137 return floor(1.0 - (1.0 - bits) * LN_2 / LN_10); 138 } 139 140 uint 141 isc_histo_digits_to_bits(uint digits) { 142 REQUIRE(digits >= ISC_HISTO_MINDIGITS); 143 REQUIRE(digits <= ISC_HISTO_MAXDIGITS); 144 return ceil(1.0 - (1.0 - digits) * LN_10 / LN_2); 145 } 146 147 /**********************************************************************/ 148 149 /* 150 * The way we map buckets to keys is what gives the histogram a 151 * consistent relative error across the whole range of `uint64_t`. 152 * The mapping is log-linear: a chunk key is the logarithm of part 153 * of the value (in other words, chunks are spaced exponentially); 154 * and a bucket within a chunk is a linear function of another part 155 * of the value. 156 * 157 * This log-linear spacing is similar to the size classes used by 158 * jemalloc. It is also the way floating point numbers work: the 159 * exponent is the log part, and the mantissa is the linear part. 160 * 161 * So, a chunk number is the log (base 2) of a `uint64_t`, which is 162 * between 0 and 63, which is why there are up to 64 chunks. In 163 * floating point terms the chunk number is the exponent. The 164 * histogram's number of significant bits is the size of the 165 * mantissa, which indexes buckets within each chunk. 166 * 167 * A fast way to get the logarithm of a positive integer is CLZ, 168 * count leading zeroes. 169 * 170 * Chunk zero is special. Chunk 1 covers values between `CHUNKSIZE` 171 * and `CHUNKSIZE * 2 - 1`, where `CHUNKSIZE == exponent << sigbits 172 * == 1 << sigbits`. Each chunk has CHUNKSIZE buckets, so chunk 1 has 173 * one value per bucket. There are CHUNKSIZE values before chunk 1 174 * which map to chunk 0, so it also has one value per bucket. (Hence 175 * the first two chunks have one value per bucket.) The values in 176 * chunk 0 correspond to denormal nubers in floating point terms. 177 * They are also the values where `63 - sigbits - clz` would be less 178 * than one if denormals were not handled specially. 179 * 180 * This branchless conversion is due to Paul Khuong: see bin_down_of() in 181 * https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/ 182 * 183 * This function is in the `isc_histo_inc()` fast path. 184 */ 185 static inline uint 186 value_to_key(const isc_histo_t *hg, uint64_t value) { 187 /* ensure that denormal numbers are all in chunk zero */ 188 uint64_t chunked = value | CHUNKSIZE(hg); 189 int clz = __builtin_clzll((unsigned long long)(chunked)); 190 /* actually 1 less than the exponent except for denormals */ 191 uint exponent = 63 - hg->sigbits - clz; 192 /* mantissa has leading bit set except for denormals */ 193 uint mantissa = value >> exponent; 194 /* leading bit of mantissa adds one to exponent */ 195 return (exponent << hg->sigbits) + mantissa; 196 } 197 198 /* 199 * Inverse functions of `value_to_key()`, to get the minimum and 200 * maximum values that map to a particular key. 201 * 202 * We must not cause undefined behaviour by hitting integer limits, 203 * which is a risk when we aim to cover the entire range of `uint64_t`. 204 * 205 * The maximum value in the last bucket is UINT64_MAX, which 206 * `key_to_maxval()` gets by deliberately subtracting `0 - 1`, 207 * undeflowing a `uint64_t`. That is OK when unsigned. 208 * 209 * We must take care not to shift too much in `key_to_minval()`. 210 * The largest key passed by `key_to_maxval()` is `BUCKETS(hg)`, so 211 * `exponent == EXPONENTS(hg) - 1 == 64 - sigbits` 212 * which is always less than 64, so the size of the shift is OK. 213 * 214 * The `mantissa` in this edge case is just `chunksize`, which when 215 * shifted becomes `1 << 64` which overflows `uint64_t` Again this is 216 * OK when unsigned, so the return value is zero. 217 */ 218 219 static inline uint64_t 220 key_to_minval(const isc_histo_t *hg, uint key) { 221 uint chunksize = CHUNKSIZE(hg); 222 uint exponent = (key / chunksize) - 1; 223 uint64_t mantissa = (key % chunksize) + chunksize; 224 return key < chunksize ? key : mantissa << exponent; 225 } 226 227 static inline uint64_t 228 key_to_maxval(const isc_histo_t *hg, uint key) { 229 return key_to_minval(hg, key + 1) - 1; 230 } 231 232 /**********************************************************************/ 233 234 static hg_bucket_t * 235 key_to_new_bucket(isc_histo_t *hg, uint key) { 236 /* slow path */ 237 uint chunksize = CHUNKSIZE(hg); 238 uint chunk = key / chunksize; 239 uint bucket = key % chunksize; 240 hg_bucket_t *old_cp = NULL; 241 hg_bucket_t *new_cp = isc_mem_cget(hg->mctx, CHUNKSIZE(hg), 242 sizeof(hg_bucket_t)); 243 hg_chunk_t *cpp = &hg->chunk[chunk]; 244 if (atomic_compare_exchange_strong_acq_rel(cpp, &old_cp, new_cp)) { 245 return &new_cp[bucket]; 246 } else { 247 /* lost the race, so use the winner's chunk */ 248 isc_mem_cput(hg->mctx, new_cp, CHUNKSIZE(hg), 249 sizeof(hg_bucket_t)); 250 return &old_cp[bucket]; 251 } 252 } 253 254 static hg_bucket_t * 255 get_chunk(const isc_histo_t *hg, uint chunk) { 256 return atomic_load_acquire(&hg->chunk[chunk]); 257 } 258 259 static inline hg_bucket_t * 260 key_to_bucket(const isc_histo_t *hg, uint key) { 261 /* fast path */ 262 uint chunksize = CHUNKSIZE(hg); 263 uint chunk = key / chunksize; 264 uint bucket = key % chunksize; 265 hg_bucket_t *cp = get_chunk(hg, chunk); 266 return cp == NULL ? NULL : &cp[bucket]; 267 } 268 269 static inline uint64_t 270 bucket_count(const hg_bucket_t *bp) { 271 return bp == NULL ? 0 : atomic_load_relaxed(bp); 272 } 273 274 static inline uint64_t 275 get_key_count(const isc_histo_t *hg, uint key) { 276 return bucket_count(key_to_bucket(hg, key)); 277 } 278 279 static inline void 280 add_key_count(isc_histo_t *hg, uint key, uint64_t inc) { 281 /* fast path */ 282 if (inc > 0) { 283 hg_bucket_t *bp = key_to_bucket(hg, key); 284 bp = bp != NULL ? bp : key_to_new_bucket(hg, key); 285 atomic_fetch_add_relaxed(bp, inc); 286 } 287 } 288 289 /**********************************************************************/ 290 291 void 292 isc_histo_add(isc_histo_t *hg, uint64_t value, uint64_t inc) { 293 REQUIRE(HISTO_VALID(hg)); 294 add_key_count(hg, value_to_key(hg, value), inc); 295 } 296 297 void 298 isc_histo_inc(isc_histo_t *hg, uint64_t value) { 299 isc_histo_add(hg, value, 1); 300 } 301 302 void 303 isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count) { 304 REQUIRE(HISTO_VALID(hg)); 305 306 uint kmin = value_to_key(hg, min); 307 uint kmax = value_to_key(hg, max); 308 309 for (uint key = kmin; key <= kmax; key++) { 310 uint64_t mid = ISC_MIN(max, key_to_maxval(hg, key)); 311 double in_bucket = mid - min + 1; 312 double remaining = max - min + 1; 313 uint64_t inc = ceil(count * in_bucket / remaining); 314 add_key_count(hg, key, inc); 315 count -= inc; 316 min = mid + 1; 317 } 318 } 319 320 isc_result_t 321 isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp, 322 uint64_t *countp) { 323 REQUIRE(HISTO_VALID(hg)); 324 325 if (key < BUCKETS(hg)) { 326 SET_IF_NOT_NULL(minp, key_to_minval(hg, key)); 327 SET_IF_NOT_NULL(maxp, key_to_maxval(hg, key)); 328 SET_IF_NOT_NULL(countp, get_key_count(hg, key)); 329 return ISC_R_SUCCESS; 330 } else { 331 return ISC_R_RANGE; 332 } 333 } 334 335 void 336 isc_histo_next(const isc_histo_t *hg, uint *keyp) { 337 REQUIRE(HISTO_VALID(hg)); 338 REQUIRE(keyp != NULL); 339 340 uint chunksize = CHUNKSIZE(hg); 341 uint buckets = BUCKETS(hg); 342 uint key = *keyp; 343 344 key++; 345 while (key < buckets && key % chunksize == 0 && 346 key_to_bucket(hg, key) == NULL) 347 { 348 key += chunksize; 349 } 350 *keyp = key; 351 } 352 353 void 354 isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source) { 355 REQUIRE(HISTO_VALID(source)); 356 REQUIRE(targetp != NULL); 357 358 if (*targetp != NULL) { 359 REQUIRE(HISTO_VALID(*targetp)); 360 } else { 361 isc_histo_create(source->mctx, source->sigbits, targetp); 362 } 363 364 uint64_t min, max, count; 365 for (uint key = 0; 366 isc_histo_get(source, key, &min, &max, &count) == ISC_R_SUCCESS; 367 isc_histo_next(source, &key)) 368 { 369 isc_histo_put(*targetp, min, max, count); 370 } 371 } 372 373 /**********************************************************************/ 374 375 void 376 isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp) { 377 REQUIRE(hmp != NULL); 378 REQUIRE(*hmp == NULL); 379 380 uint size = isc_tid_count(); 381 INSIST(size > 0); 382 383 isc_histomulti_t *hm = isc_mem_cget(mctx, 1, 384 STRUCT_FLEX_SIZE(hm, hg, size)); 385 *hm = (isc_histomulti_t){ 386 .magic = HISTOMULTI_MAGIC, 387 .size = size, 388 }; 389 390 for (uint i = 0; i < hm->size; i++) { 391 isc_histo_create(mctx, sigbits, &hm->hg[i]); 392 } 393 394 *hmp = hm; 395 } 396 397 void 398 isc_histomulti_destroy(isc_histomulti_t **hmp) { 399 REQUIRE(hmp != NULL); 400 REQUIRE(HISTOMULTI_VALID(*hmp)); 401 402 isc_histomulti_t *hm = *hmp; 403 isc_mem_t *mctx = hm->hg[0]->mctx; 404 *hmp = NULL; 405 406 for (uint i = 0; i < hm->size; i++) { 407 isc_histo_destroy(&hm->hg[i]); 408 } 409 410 isc_mem_put(mctx, hm, STRUCT_FLEX_SIZE(hm, hg, hm->size)); 411 } 412 413 void 414 isc_histomulti_merge(isc_histo_t **hgp, const isc_histomulti_t *hm) { 415 REQUIRE(HISTOMULTI_VALID(hm)); 416 417 for (uint i = 0; i < hm->size; i++) { 418 isc_histo_merge(hgp, hm->hg[i]); 419 } 420 } 421 422 void 423 isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc) { 424 REQUIRE(HISTOMULTI_VALID(hm)); 425 isc_histo_t *hg = hm->hg[isc_tid()]; 426 add_key_count(hg, value_to_key(hg, value), inc); 427 } 428 429 void 430 isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) { 431 isc_histomulti_add(hm, value, 1); 432 } 433 434 /**********************************************************************/ 435 436 /* 437 * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf 438 * equation 4 (incremental mean) and equation 44 (incremental variance) 439 */ 440 void 441 isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1, 442 double *pm2) { 443 REQUIRE(HISTO_VALID(hg)); 444 445 uint64_t pop = 0; 446 double mean = 0.0; 447 double sigma = 0.0; 448 449 uint64_t min, max, count; 450 for (uint key = 0; 451 isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS; 452 isc_histo_next(hg, &key)) 453 { 454 if (count == 0) { /* avoid division by zero */ 455 continue; 456 } 457 double value = min / 2.0 + max / 2.0; 458 double delta = value - mean; 459 pop += count; 460 mean += count * delta / pop; 461 sigma += count * delta * (value - mean); 462 } 463 464 SET_IF_NOT_NULL(pm0, pop); 465 SET_IF_NOT_NULL(pm1, mean); 466 SET_IF_NOT_NULL(pm2, (pop > 0) ? sqrt(sigma / pop) : 0.0); 467 } 468 469 /* 470 * Clamped linear interpolation 471 * 472 * `outrange` should be `((1 << n) - 1)` for some `n`; when `n` is larger 473 * than 53, `outrange` can get rounded up to a power of 2, so we clamp the 474 * result to keep within bounds (extra important when `max == UINT64_MAX`) 475 */ 476 static inline uint64_t 477 lerp(uint64_t min, uint64_t max, uint64_t lo, uint64_t in, uint64_t hi) { 478 double inrange = (double)(hi - lo); 479 double inpart = (double)(in - lo); 480 double outrange = (double)(max - min); 481 double outpart = round(outrange * inpart / inrange); 482 return min + ISC_MIN((uint64_t)outpart, max - min); 483 } 484 485 /* 486 * There is non-zero space for the inner value, and it is inside the bounds 487 */ 488 static inline bool 489 inside(uint64_t lo, uint64_t in, uint64_t hi) { 490 return lo < hi && lo <= in && in <= hi; 491 } 492 493 isc_result_t 494 isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction, 495 uint64_t *value) { 496 hg_bucket_t *chunk[CHUNKS]; 497 uint64_t total[CHUNKS]; 498 uint64_t rank[ISC_HISTO_MAXQUANTILES]; 499 500 REQUIRE(HISTO_VALID(hg)); 501 REQUIRE(0 < size && size <= ISC_HISTO_MAXQUANTILES); 502 REQUIRE(fraction != NULL); 503 REQUIRE(value != NULL); 504 505 const uint maxchunk = MAXCHUNK(hg); 506 const uint chunksize = CHUNKSIZE(hg); 507 508 /* 509 * Find out which chunks exist and what their totals are. We take a 510 * copy of the chunk pointers to reduce the need for atomic ops 511 * later on. Scan from low to high so that higher buckets are more 512 * likely to be in the CPU cache when we scan from high to low. 513 */ 514 uint64_t population = 0; 515 for (uint c = 0; c < maxchunk; c++) { 516 chunk[c] = get_chunk(hg, c); 517 total[c] = 0; 518 if (chunk[c] != NULL) { 519 for (uint b = chunksize; b-- > 0;) { 520 total[c] += bucket_count(&chunk[c][b]); 521 } 522 population += total[c]; 523 } 524 } 525 526 /* 527 * Now we know the population, we can convert fractions to ranks. 528 * Also ensure they are within bounds and in decreasing order. 529 */ 530 for (uint i = 0; i < size; i++) { 531 REQUIRE(0.0 <= fraction[i] && fraction[i] <= 1.0); 532 REQUIRE(i == 0 || fraction[i - 1] > fraction[i]); 533 rank[i] = round(fraction[i] * population); 534 } 535 536 /* 537 * Scan chunks from high to low, keeping track of the bounds on 538 * each chunk's ranks. Each time we match `rank[i]`, move on to the 539 * next rank and continue the scan from the same place. 540 */ 541 uint i = 0; 542 uint64_t chunk_lo = population; 543 for (uint c = maxchunk; c-- > 0;) { 544 uint64_t chunk_hi = chunk_lo; 545 chunk_lo = chunk_hi - total[c]; 546 547 /* 548 * Scan buckets backwards within this chunk, in a similar 549 * manner to the chunk scan. Skip all or part of the loop 550 * if the current rank is not in the chunk. 551 */ 552 uint64_t bucket_lo = chunk_hi; 553 for (uint b = chunksize; 554 b-- > 0 && inside(chunk_lo, rank[i], chunk_hi);) 555 { 556 uint64_t bucket_hi = bucket_lo; 557 bucket_lo = bucket_hi - bucket_count(&chunk[c][b]); 558 559 /* 560 * Convert all ranks that fall in this bucket. 561 */ 562 while (inside(bucket_lo, rank[i], bucket_hi)) { 563 uint key = chunksize * c + b; 564 value[i] = lerp(key_to_minval(hg, key), 565 key_to_maxval(hg, key), 566 bucket_lo, rank[i], bucket_hi); 567 if (++i == size) { 568 return ISC_R_SUCCESS; 569 } 570 } 571 } 572 } 573 574 return ISC_R_UNSET; 575 } 576 577 /**********************************************************************/ 578