xref: /netbsd-src/external/mpl/bind/dist/lib/isc/histo.c (revision c2d18a95984d618d94f261361000142d37ac53d9)
1 /*	$NetBSD: histo.c,v 1.3 2025/01/27 02:16:05 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 #include <assert.h>
17 #include <errno.h>
18 #include <math.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 
25 #include <isc/atomic.h>
26 #include <isc/histo.h>
27 #include <isc/magic.h>
28 #include <isc/mem.h>
29 #include <isc/tid.h>
30 
31 #define HISTO_MAGIC	    ISC_MAGIC('H', 's', 't', 'o')
32 #define HISTO_VALID(p)	    ISC_MAGIC_VALID(p, HISTO_MAGIC)
33 #define HISTOMULTI_MAGIC    ISC_MAGIC('H', 'g', 'M', 't')
34 #define HISTOMULTI_VALID(p) ISC_MAGIC_VALID(p, HISTOMULTI_MAGIC)
35 
36 /*
37  * Natural logarithms of 2 and 10 for converting precisions between
38  * binary and decimal significant figures
39  */
40 #define LN_2  0.693147180559945309
41 #define LN_10 2.302585092994045684
42 
43 /*
44  * The chunks array has a static size for simplicity, fixed as the
45  * number of bits in a value. That means we waste a little extra space
46  * that could be saved by omitting the exponents that are covered by
47  * `sigbits`. The following macros calculate (at run time) the exact
48  * number of buckets when we need to do accurate bounds checks.
49  *
50  * For a discussion of the floating point terminology, see the
51  * commmentary on `value_to_key()` below.
52  *
53  * We often use the variable names `c` for chunk and `b` for bucket.
54  */
55 #define CHUNKS 64
56 
57 #define DENORMALS(hg) ((hg)->sigbits - 1)
58 #define MANTISSAS(hg) (1 << (hg)->sigbits)
59 #define EXPONENTS(hg) (CHUNKS - DENORMALS(hg))
60 #define BUCKETS(hg)   (EXPONENTS(hg) * MANTISSAS(hg))
61 
62 #define MAXCHUNK(hg)  EXPONENTS(hg)
63 #define CHUNKSIZE(hg) MANTISSAS(hg)
64 
65 #ifdef _LP64
66 typedef atomic_uint_fast64_t hg_bucket_t;
67 #else
68 typedef atomic_uint_fast32_t hg_bucket_t;
69 #endif
70 typedef atomic_ptr(hg_bucket_t) hg_chunk_t;
71 
72 struct isc_histo {
73 	uint magic;
74 	uint sigbits;
75 	isc_mem_t *mctx;
76 	hg_chunk_t chunk[CHUNKS];
77 };
78 
79 struct isc_histomulti {
80 	uint magic;
81 	uint size;
82 	isc_histo_t *hg[];
83 };
84 
85 /**********************************************************************/
86 
87 void
88 isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp) {
89 	REQUIRE(sigbits >= ISC_HISTO_MINBITS);
90 	REQUIRE(sigbits <= ISC_HISTO_MAXBITS);
91 	REQUIRE(hgp != NULL);
92 	REQUIRE(*hgp == NULL);
93 
94 	isc_histo_t *hg = isc_mem_get(mctx, sizeof(*hg));
95 	*hg = (isc_histo_t){
96 		.magic = HISTO_MAGIC,
97 		.sigbits = sigbits,
98 	};
99 	isc_mem_attach(mctx, &hg->mctx);
100 
101 	*hgp = hg;
102 }
103 
104 void
105 isc_histo_destroy(isc_histo_t **hgp) {
106 	REQUIRE(hgp != NULL);
107 	REQUIRE(HISTO_VALID(*hgp));
108 
109 	isc_histo_t *hg = *hgp;
110 	*hgp = NULL;
111 
112 	for (uint c = 0; c < CHUNKS; c++) {
113 		if (hg->chunk[c] != NULL) {
114 			isc_mem_cput(hg->mctx, hg->chunk[c], CHUNKSIZE(hg),
115 				     sizeof(hg_bucket_t));
116 		}
117 	}
118 	isc_mem_putanddetach(&hg->mctx, hg, sizeof(*hg));
119 }
120 
121 /**********************************************************************/
122 
123 uint
124 isc_histo_sigbits(isc_histo_t *hg) {
125 	REQUIRE(HISTO_VALID(hg));
126 	return hg->sigbits;
127 }
128 
129 /*
130  * use precomputed logs and builtins to avoid linking with libm
131  */
132 
133 uint
134 isc_histo_bits_to_digits(uint bits) {
135 	REQUIRE(bits >= ISC_HISTO_MINBITS);
136 	REQUIRE(bits <= ISC_HISTO_MAXBITS);
137 	return floor(1.0 - (1.0 - bits) * LN_2 / LN_10);
138 }
139 
140 uint
141 isc_histo_digits_to_bits(uint digits) {
142 	REQUIRE(digits >= ISC_HISTO_MINDIGITS);
143 	REQUIRE(digits <= ISC_HISTO_MAXDIGITS);
144 	return ceil(1.0 - (1.0 - digits) * LN_10 / LN_2);
145 }
146 
147 /**********************************************************************/
148 
149 /*
150  * The way we map buckets to keys is what gives the histogram a
151  * consistent relative error across the whole range of `uint64_t`.
152  * The mapping is log-linear: a chunk key is the logarithm of part
153  * of the value (in other words, chunks are spaced exponentially);
154  * and a bucket within a chunk is a linear function of another part
155  * of the value.
156  *
157  * This log-linear spacing is similar to the size classes used by
158  * jemalloc. It is also the way floating point numbers work: the
159  * exponent is the log part, and the mantissa is the linear part.
160  *
161  * So, a chunk number is the log (base 2) of a `uint64_t`, which is
162  * between 0 and 63, which is why there are up to 64 chunks. In
163  * floating point terms the chunk number is the exponent. The
164  * histogram's number of significant bits is the size of the
165  * mantissa, which indexes buckets within each chunk.
166  *
167  * A fast way to get the logarithm of a positive integer is CLZ,
168  * count leading zeroes.
169  *
170  * Chunk zero is special. Chunk 1 covers values between `CHUNKSIZE`
171  * and `CHUNKSIZE * 2 - 1`, where `CHUNKSIZE == exponent << sigbits
172  * == 1 << sigbits`. Each chunk has CHUNKSIZE buckets, so chunk 1 has
173  * one value per bucket. There are CHUNKSIZE values before chunk 1
174  * which map to chunk 0, so it also has one value per bucket. (Hence
175  * the first two chunks have one value per bucket.) The values in
176  * chunk 0 correspond to denormal nubers in floating point terms.
177  * They are also the values where `63 - sigbits - clz` would be less
178  * than one if denormals were not handled specially.
179  *
180  * This branchless conversion is due to Paul Khuong: see bin_down_of() in
181  * https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/
182  *
183  * This function is in the `isc_histo_inc()` fast path.
184  */
185 static inline uint
186 value_to_key(const isc_histo_t *hg, uint64_t value) {
187 	/* ensure that denormal numbers are all in chunk zero */
188 	uint64_t chunked = value | CHUNKSIZE(hg);
189 	int clz = __builtin_clzll((unsigned long long)(chunked));
190 	/* actually 1 less than the exponent except for denormals */
191 	uint exponent = 63 - hg->sigbits - clz;
192 	/* mantissa has leading bit set except for denormals */
193 	uint mantissa = value >> exponent;
194 	/* leading bit of mantissa adds one to exponent */
195 	return (exponent << hg->sigbits) + mantissa;
196 }
197 
198 /*
199  * Inverse functions of `value_to_key()`, to get the minimum and
200  * maximum values that map to a particular key.
201  *
202  * We must not cause undefined behaviour by hitting integer limits,
203  * which is a risk when we aim to cover the entire range of `uint64_t`.
204  *
205  * The maximum value in the last bucket is UINT64_MAX, which
206  * `key_to_maxval()` gets by deliberately subtracting `0 - 1`,
207  * undeflowing a `uint64_t`. That is OK when unsigned.
208  *
209  * We must take care not to shift too much in `key_to_minval()`.
210  * The largest key passed by `key_to_maxval()` is `BUCKETS(hg)`, so
211  *	`exponent == EXPONENTS(hg) - 1 == 64 - sigbits`
212  * which is always less than 64, so the size of the shift is OK.
213  *
214  * The `mantissa` in this edge case is just `chunksize`, which when
215  * shifted becomes `1 << 64` which overflows `uint64_t` Again this is
216  * OK when unsigned, so the return value is zero.
217  */
218 
219 static inline uint64_t
220 key_to_minval(const isc_histo_t *hg, uint key) {
221 	uint chunksize = CHUNKSIZE(hg);
222 	uint exponent = (key / chunksize) - 1;
223 	uint64_t mantissa = (key % chunksize) + chunksize;
224 	return key < chunksize ? key : mantissa << exponent;
225 }
226 
227 static inline uint64_t
228 key_to_maxval(const isc_histo_t *hg, uint key) {
229 	return key_to_minval(hg, key + 1) - 1;
230 }
231 
232 /**********************************************************************/
233 
234 static hg_bucket_t *
235 key_to_new_bucket(isc_histo_t *hg, uint key) {
236 	/* slow path */
237 	uint chunksize = CHUNKSIZE(hg);
238 	uint chunk = key / chunksize;
239 	uint bucket = key % chunksize;
240 	hg_bucket_t *old_cp = NULL;
241 	hg_bucket_t *new_cp = isc_mem_cget(hg->mctx, CHUNKSIZE(hg),
242 					   sizeof(hg_bucket_t));
243 	hg_chunk_t *cpp = &hg->chunk[chunk];
244 	if (atomic_compare_exchange_strong_acq_rel(cpp, &old_cp, new_cp)) {
245 		return &new_cp[bucket];
246 	} else {
247 		/* lost the race, so use the winner's chunk */
248 		isc_mem_cput(hg->mctx, new_cp, CHUNKSIZE(hg),
249 			     sizeof(hg_bucket_t));
250 		return &old_cp[bucket];
251 	}
252 }
253 
254 static hg_bucket_t *
255 get_chunk(const isc_histo_t *hg, uint chunk) {
256 	return atomic_load_acquire(&hg->chunk[chunk]);
257 }
258 
259 static inline hg_bucket_t *
260 key_to_bucket(const isc_histo_t *hg, uint key) {
261 	/* fast path */
262 	uint chunksize = CHUNKSIZE(hg);
263 	uint chunk = key / chunksize;
264 	uint bucket = key % chunksize;
265 	hg_bucket_t *cp = get_chunk(hg, chunk);
266 	return cp == NULL ? NULL : &cp[bucket];
267 }
268 
269 static inline uint64_t
270 bucket_count(const hg_bucket_t *bp) {
271 	return bp == NULL ? 0 : atomic_load_relaxed(bp);
272 }
273 
274 static inline uint64_t
275 get_key_count(const isc_histo_t *hg, uint key) {
276 	return bucket_count(key_to_bucket(hg, key));
277 }
278 
279 static inline void
280 add_key_count(isc_histo_t *hg, uint key, uint64_t inc) {
281 	/* fast path */
282 	if (inc > 0) {
283 		hg_bucket_t *bp = key_to_bucket(hg, key);
284 		bp = bp != NULL ? bp : key_to_new_bucket(hg, key);
285 		atomic_fetch_add_relaxed(bp, inc);
286 	}
287 }
288 
289 /**********************************************************************/
290 
291 void
292 isc_histo_add(isc_histo_t *hg, uint64_t value, uint64_t inc) {
293 	REQUIRE(HISTO_VALID(hg));
294 	add_key_count(hg, value_to_key(hg, value), inc);
295 }
296 
297 void
298 isc_histo_inc(isc_histo_t *hg, uint64_t value) {
299 	isc_histo_add(hg, value, 1);
300 }
301 
302 void
303 isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count) {
304 	REQUIRE(HISTO_VALID(hg));
305 
306 	uint kmin = value_to_key(hg, min);
307 	uint kmax = value_to_key(hg, max);
308 
309 	for (uint key = kmin; key <= kmax; key++) {
310 		uint64_t mid = ISC_MIN(max, key_to_maxval(hg, key));
311 		double in_bucket = mid - min + 1;
312 		double remaining = max - min + 1;
313 		uint64_t inc = ceil(count * in_bucket / remaining);
314 		add_key_count(hg, key, inc);
315 		count -= inc;
316 		min = mid + 1;
317 	}
318 }
319 
320 isc_result_t
321 isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp,
322 	      uint64_t *countp) {
323 	REQUIRE(HISTO_VALID(hg));
324 
325 	if (key < BUCKETS(hg)) {
326 		SET_IF_NOT_NULL(minp, key_to_minval(hg, key));
327 		SET_IF_NOT_NULL(maxp, key_to_maxval(hg, key));
328 		SET_IF_NOT_NULL(countp, get_key_count(hg, key));
329 		return ISC_R_SUCCESS;
330 	} else {
331 		return ISC_R_RANGE;
332 	}
333 }
334 
335 void
336 isc_histo_next(const isc_histo_t *hg, uint *keyp) {
337 	REQUIRE(HISTO_VALID(hg));
338 	REQUIRE(keyp != NULL);
339 
340 	uint chunksize = CHUNKSIZE(hg);
341 	uint buckets = BUCKETS(hg);
342 	uint key = *keyp;
343 
344 	key++;
345 	while (key < buckets && key % chunksize == 0 &&
346 	       key_to_bucket(hg, key) == NULL)
347 	{
348 		key += chunksize;
349 	}
350 	*keyp = key;
351 }
352 
353 void
354 isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source) {
355 	REQUIRE(HISTO_VALID(source));
356 	REQUIRE(targetp != NULL);
357 
358 	if (*targetp != NULL) {
359 		REQUIRE(HISTO_VALID(*targetp));
360 	} else {
361 		isc_histo_create(source->mctx, source->sigbits, targetp);
362 	}
363 
364 	uint64_t min, max, count;
365 	for (uint key = 0;
366 	     isc_histo_get(source, key, &min, &max, &count) == ISC_R_SUCCESS;
367 	     isc_histo_next(source, &key))
368 	{
369 		isc_histo_put(*targetp, min, max, count);
370 	}
371 }
372 
373 /**********************************************************************/
374 
375 void
376 isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp) {
377 	REQUIRE(hmp != NULL);
378 	REQUIRE(*hmp == NULL);
379 
380 	uint size = isc_tid_count();
381 	INSIST(size > 0);
382 
383 	isc_histomulti_t *hm = isc_mem_cget(mctx, 1,
384 					    STRUCT_FLEX_SIZE(hm, hg, size));
385 	*hm = (isc_histomulti_t){
386 		.magic = HISTOMULTI_MAGIC,
387 		.size = size,
388 	};
389 
390 	for (uint i = 0; i < hm->size; i++) {
391 		isc_histo_create(mctx, sigbits, &hm->hg[i]);
392 	}
393 
394 	*hmp = hm;
395 }
396 
397 void
398 isc_histomulti_destroy(isc_histomulti_t **hmp) {
399 	REQUIRE(hmp != NULL);
400 	REQUIRE(HISTOMULTI_VALID(*hmp));
401 
402 	isc_histomulti_t *hm = *hmp;
403 	isc_mem_t *mctx = hm->hg[0]->mctx;
404 	*hmp = NULL;
405 
406 	for (uint i = 0; i < hm->size; i++) {
407 		isc_histo_destroy(&hm->hg[i]);
408 	}
409 
410 	isc_mem_put(mctx, hm, STRUCT_FLEX_SIZE(hm, hg, hm->size));
411 }
412 
413 void
414 isc_histomulti_merge(isc_histo_t **hgp, const isc_histomulti_t *hm) {
415 	REQUIRE(HISTOMULTI_VALID(hm));
416 
417 	for (uint i = 0; i < hm->size; i++) {
418 		isc_histo_merge(hgp, hm->hg[i]);
419 	}
420 }
421 
422 void
423 isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc) {
424 	REQUIRE(HISTOMULTI_VALID(hm));
425 	isc_histo_t *hg = hm->hg[isc_tid()];
426 	add_key_count(hg, value_to_key(hg, value), inc);
427 }
428 
429 void
430 isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) {
431 	isc_histomulti_add(hm, value, 1);
432 }
433 
434 /**********************************************************************/
435 
436 /*
437  * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
438  * equation 4 (incremental mean) and equation 44 (incremental variance)
439  */
440 void
441 isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1,
442 		  double *pm2) {
443 	REQUIRE(HISTO_VALID(hg));
444 
445 	uint64_t pop = 0;
446 	double mean = 0.0;
447 	double sigma = 0.0;
448 
449 	uint64_t min, max, count;
450 	for (uint key = 0;
451 	     isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS;
452 	     isc_histo_next(hg, &key))
453 	{
454 		if (count == 0) { /* avoid division by zero */
455 			continue;
456 		}
457 		double value = min / 2.0 + max / 2.0;
458 		double delta = value - mean;
459 		pop += count;
460 		mean += count * delta / pop;
461 		sigma += count * delta * (value - mean);
462 	}
463 
464 	SET_IF_NOT_NULL(pm0, pop);
465 	SET_IF_NOT_NULL(pm1, mean);
466 	SET_IF_NOT_NULL(pm2, (pop > 0) ? sqrt(sigma / pop) : 0.0);
467 }
468 
469 /*
470  * Clamped linear interpolation
471  *
472  * `outrange` should be `((1 << n) - 1)` for some `n`; when `n` is larger
473  * than 53, `outrange` can get rounded up to a power of 2, so we clamp the
474  * result to keep within bounds (extra important when `max == UINT64_MAX`)
475  */
476 static inline uint64_t
477 lerp(uint64_t min, uint64_t max, uint64_t lo, uint64_t in, uint64_t hi) {
478 	double inrange = (double)(hi - lo);
479 	double inpart = (double)(in - lo);
480 	double outrange = (double)(max - min);
481 	double outpart = round(outrange * inpart / inrange);
482 	return min + ISC_MIN((uint64_t)outpart, max - min);
483 }
484 
485 /*
486  * There is non-zero space for the inner value, and it is inside the bounds
487  */
488 static inline bool
489 inside(uint64_t lo, uint64_t in, uint64_t hi) {
490 	return lo < hi && lo <= in && in <= hi;
491 }
492 
493 isc_result_t
494 isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction,
495 		    uint64_t *value) {
496 	hg_bucket_t *chunk[CHUNKS];
497 	uint64_t total[CHUNKS];
498 	uint64_t rank[ISC_HISTO_MAXQUANTILES];
499 
500 	REQUIRE(HISTO_VALID(hg));
501 	REQUIRE(0 < size && size <= ISC_HISTO_MAXQUANTILES);
502 	REQUIRE(fraction != NULL);
503 	REQUIRE(value != NULL);
504 
505 	const uint maxchunk = MAXCHUNK(hg);
506 	const uint chunksize = CHUNKSIZE(hg);
507 
508 	/*
509 	 * Find out which chunks exist and what their totals are. We take a
510 	 * copy of the chunk pointers to reduce the need for atomic ops
511 	 * later on. Scan from low to high so that higher buckets are more
512 	 * likely to be in the CPU cache when we scan from high to low.
513 	 */
514 	uint64_t population = 0;
515 	for (uint c = 0; c < maxchunk; c++) {
516 		chunk[c] = get_chunk(hg, c);
517 		total[c] = 0;
518 		if (chunk[c] != NULL) {
519 			for (uint b = chunksize; b-- > 0;) {
520 				total[c] += bucket_count(&chunk[c][b]);
521 			}
522 			population += total[c];
523 		}
524 	}
525 
526 	/*
527 	 * Now we know the population, we can convert fractions to ranks.
528 	 * Also ensure they are within bounds and in decreasing order.
529 	 */
530 	for (uint i = 0; i < size; i++) {
531 		REQUIRE(0.0 <= fraction[i] && fraction[i] <= 1.0);
532 		REQUIRE(i == 0 || fraction[i - 1] > fraction[i]);
533 		rank[i] = round(fraction[i] * population);
534 	}
535 
536 	/*
537 	 * Scan chunks from high to low, keeping track of the bounds on
538 	 * each chunk's ranks. Each time we match `rank[i]`, move on to the
539 	 * next rank and continue the scan from the same place.
540 	 */
541 	uint i = 0;
542 	uint64_t chunk_lo = population;
543 	for (uint c = maxchunk; c-- > 0;) {
544 		uint64_t chunk_hi = chunk_lo;
545 		chunk_lo = chunk_hi - total[c];
546 
547 		/*
548 		 * Scan buckets backwards within this chunk, in a similar
549 		 * manner to the chunk scan. Skip all or part of the loop
550 		 * if the current rank is not in the chunk.
551 		 */
552 		uint64_t bucket_lo = chunk_hi;
553 		for (uint b = chunksize;
554 		     b-- > 0 && inside(chunk_lo, rank[i], chunk_hi);)
555 		{
556 			uint64_t bucket_hi = bucket_lo;
557 			bucket_lo = bucket_hi - bucket_count(&chunk[c][b]);
558 
559 			/*
560 			 * Convert all ranks that fall in this bucket.
561 			 */
562 			while (inside(bucket_lo, rank[i], bucket_hi)) {
563 				uint key = chunksize * c + b;
564 				value[i] = lerp(key_to_minval(hg, key),
565 						key_to_maxval(hg, key),
566 						bucket_lo, rank[i], bucket_hi);
567 				if (++i == size) {
568 					return ISC_R_SUCCESS;
569 				}
570 			}
571 		}
572 	}
573 
574 	return ISC_R_UNSET;
575 }
576 
577 /**********************************************************************/
578