xref: /spdk/lib/util/zipf.c (revision 6f338d4bf3a8a91b7abe377a605a321ea2b05bf7)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright(c) Intel Corporation. All rights reserved.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 #include "spdk/util.h"
8 #include "spdk/zipf.h"
9 
10 struct spdk_zipf {
11 	uint64_t	range;
12 	double		alpha;
13 	double		eta;
14 	double		theta;
15 	double		zetan;
16 	double		val1_limit;
17 	uint32_t	seed;
18 };
19 
20 static double
21 zeta_increment(uint64_t n, double theta)
22 {
23 	return pow((double) 1.0 / (n + 1), theta);
24 }
25 
26 static double
27 zeta(uint64_t range, double theta)
28 {
29 	double zetan = 0;
30 	double inc1, inc2;
31 	uint64_t i, calc, count;
32 	const uint32_t ZIPF_MAX_ZETA_CALC = 10 * 1000 * 1000;
33 	const uint32_t ZIPF_ZETA_ESTIMATE = 1 * 1000 * 1000;
34 
35 	/* Cumulate zeta discretely for the first ZIPF_MAX_ZETA_CALC
36 	 * entries in the range.
37 	 */
38 	calc = spdk_min(ZIPF_MAX_ZETA_CALC, range);
39 	for (i = 0; i < calc; i++) {
40 		zetan += zeta_increment(i, theta);
41 	}
42 
43 	/* For the remaining values in the range, increment zetan
44 	 * with an approximation for every ZIPF_ZETA_ESTIMATE
45 	 * entries.  We will take an average of the increment
46 	 * for (i) and (i + ZIPF_ZETA_ESTIMATE), and then multiply
47 	 * that by ZIPF_ZETA_ESTIMATE.
48 	 *
49 	 * Of course, we'll cap ZIPF_ZETA_ESTIMATE to something
50 	 * smaller if necessary at the end of the range.
51 	 */
52 	while (i < range) {
53 		count = spdk_min(ZIPF_ZETA_ESTIMATE, range - i);
54 		inc1 = zeta_increment(i, theta);
55 		inc2 = zeta_increment(i + count, theta);
56 		zetan += (inc1 + inc2) * count / 2;
57 		i += count;
58 	}
59 
60 	return zetan;
61 }
62 
63 struct spdk_zipf *
64 spdk_zipf_create(uint64_t range, double theta, uint32_t seed)
65 {
66 	struct spdk_zipf *zipf;
67 
68 	zipf = calloc(1, sizeof(*zipf));
69 	if (zipf == NULL) {
70 		return NULL;
71 	}
72 
73 	zipf->range = range;
74 	zipf->seed = seed;
75 
76 	zipf->theta = theta;
77 	zipf->alpha = 1.0 / (1.0 - zipf->theta);
78 	zipf->zetan = zeta(range, theta);
79 	zipf->eta = (1.0 - pow(2.0 / zipf->range, 1.0 - zipf->theta)) /
80 		    (1.0 - zeta(2, theta) / zipf->zetan);
81 	zipf->val1_limit = 1.0 + pow(0.5, zipf->theta);
82 
83 	return zipf;
84 }
85 
86 void
87 spdk_zipf_free(struct spdk_zipf **zipfp)
88 {
89 	assert(zipfp != NULL);
90 	free(*zipfp);
91 	*zipfp = NULL;
92 }
93 
94 uint64_t
95 spdk_zipf_generate(struct spdk_zipf *zipf)
96 {
97 	double randu, randz;
98 	uint64_t val;
99 
100 	randu = (double)rand_r(&zipf->seed) / RAND_MAX;
101 	randz = randu * zipf->zetan;
102 
103 	if (randz < 1.0) {
104 		return 0;
105 	} else if (randz < zipf->val1_limit) {
106 		return 1;
107 	} else {
108 		val = zipf->range * pow(zipf->eta * (randu - 1.0) + 1.0, zipf->alpha);
109 		return val % zipf->range;
110 	}
111 }
112