xref: /spdk/lib/util/zipf.c (revision db35950a13859f8680bb97c961337d3317a974b2)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/util.h"
36 #include "spdk/zipf.h"
37 
38 struct spdk_zipf {
39 	uint64_t	range;
40 	double		alpha;
41 	double		eta;
42 	double		theta;
43 	double		zetan;
44 	double		val1_limit;
45 	uint32_t	seed;
46 };
47 
48 static double
49 zeta_increment(uint64_t n, double theta)
50 {
51 	return pow((double) 1.0 / (n + 1), theta);
52 }
53 
54 static double
55 zeta(uint64_t range, double theta)
56 {
57 	double zetan = 0;
58 	double inc1, inc2;
59 	uint64_t i, calc, count;
60 	const uint32_t ZIPF_MAX_ZETA_CALC = 10 * 1000 * 1000;
61 	const uint32_t ZIPF_ZETA_ESTIMATE = 1 * 1000 * 1000;
62 
63 	/* Cumulate zeta discretely for the first ZIPF_MAX_ZETA_CALC
64 	 * entries in the range.
65 	 */
66 	calc = spdk_min(ZIPF_MAX_ZETA_CALC, range);
67 	for (i = 0; i < calc; i++) {
68 		zetan += zeta_increment(i, theta);
69 	}
70 
71 	/* For the remaining values in the range, increment zetan
72 	 * with an approximation for every ZIPF_ZETA_ESTIMATE
73 	 * entries.  We will take an average of the increment
74 	 * for (i) and (i + ZIPF_ZETA_ESTIMATE), and then multiply
75 	 * that by ZIPF_ZETA_ESTIMATE.
76 	 *
77 	 * Of course, we'll cap ZIPF_ZETA_ESTIMATE to something
78 	 * smaller if necessary at the end of the range.
79 	 */
80 	while (i < range) {
81 		count = spdk_min(ZIPF_ZETA_ESTIMATE, range - i);
82 		inc1 = zeta_increment(i, theta);
83 		inc2 = zeta_increment(i + count, theta);
84 		zetan += (inc1 + inc2) * count / 2;
85 		i += count;
86 	}
87 
88 	return zetan;
89 }
90 
91 struct spdk_zipf *
92 spdk_zipf_create(uint64_t range, double theta, uint32_t seed)
93 {
94 	struct spdk_zipf *zipf;
95 
96 	zipf = calloc(1, sizeof(*zipf));
97 	if (zipf == NULL) {
98 		return NULL;
99 	}
100 
101 	zipf->range = range;
102 	zipf->seed = seed;
103 
104 	zipf->theta = theta;
105 	zipf->alpha = 1.0 / (1.0 - zipf->theta);
106 	zipf->zetan = zeta(range, theta);
107 	zipf->eta = (1.0 - pow(2.0 / zipf->range, 1.0 - zipf->theta)) /
108 		    (1.0 - zeta(2, theta) / zipf->zetan);
109 	zipf->val1_limit = 1.0 + pow(0.5, zipf->theta);
110 
111 	return zipf;
112 }
113 
114 void
115 spdk_zipf_free(struct spdk_zipf **zipfp)
116 {
117 	assert(zipfp != NULL);
118 	free(*zipfp);
119 	*zipfp = NULL;
120 }
121 
122 uint64_t
123 spdk_zipf_generate(struct spdk_zipf *zipf)
124 {
125 	double randu, randz;
126 	uint64_t val;
127 
128 	randu = (double)rand_r(&zipf->seed) / RAND_MAX;
129 	randz = randu * zipf->zetan;
130 
131 	if (randz < 1.0) {
132 		return 0;
133 	} else if (randz < zipf->val1_limit) {
134 		return 1;
135 	} else {
136 		val = zipf->range * pow(zipf->eta * (randu - 1.0) + 1.0, zipf->alpha);
137 		return val % zipf->range;
138 	}
139 }
140