xref: /netbsd-src/usr.bin/nbperf/nbperf-bdz.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: nbperf-bdz.c,v 1.3 2010/03/01 21:46:58 joerg Exp $	*/
2 /*-
3  * Copyright (c) 2009 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Joerg Sonnenberger.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __RCSID("$NetBSD: nbperf-bdz.c,v 1.3 2010/03/01 21:46:58 joerg Exp $");
36 
37 #include <err.h>
38 #include <inttypes.h>
39 #include <stdlib.h>
40 #include <stdio.h>
41 #include <string.h>
42 
43 #include "nbperf.h"
44 
45 /*
46  * A full description of the algorithm can be found in:
47  * "Simple and Space-Efficient Minimal Perfect Hash Functions"
48  * by Botelho, Pagh and Ziviani, proceeedings of WADS 2007.
49  */
50 
51 /*
52  * The algorithm is based on random, acyclic 3-graphs.
53  *
54  * Each edge in the represents a key.  The vertices are the reminder of
55  * the hash function mod n.  n = cm with c > 1.23.  This ensures that
56  * an acyclic graph can be found with a very high probality.
57  *
58  * An acyclic graph has an edge order, where at least one vertex of
59  * each edge hasn't been seen before.   It is declares the first unvisited
60  * vertex as authoritive for the edge and assigns a 2bit value to unvisited
61  * vertices, so that the sum of all vertices of the edge modulo 4 is
62  * the index of the authoritive vertex.
63  */
64 
65 #include "graph3.h"
66 
67 struct state {
68 	struct graph3 graph;
69 	uint32_t *visited;
70 	uint32_t *holes64k;
71 	uint16_t *holes256;
72 	uint8_t *holes256_64;
73 	uint8_t *holes256_128;
74 	uint8_t *holes256_192;
75 	uint8_t *g;
76 	uint32_t *result_map;
77 };
78 
79 static void
80 assign_nodes(struct state *state)
81 {
82 	struct edge3 *e;
83 	size_t i, j;
84 	uint32_t t, r, holes;
85 
86 	for (i = 0; i < state->graph.v; ++i)
87 		state->g[i] = 3;
88 
89 	for (i = 0; i < state->graph.e; ++i) {
90 		j = state->graph.output_order[i];
91 		e = &state->graph.edges[j];
92 		if (!state->visited[e->left]) {
93 			r = 0;
94 			t = e->left;
95 		} else if (!state->visited[e->middle]) {
96 			r = 1;
97 			t = e->middle;
98 		} else {
99 			if (state->visited[e->right])
100 				abort();
101 			r = 2;
102 			t = e->right;
103 		}
104 
105 		state->visited[t] = 2 + j;
106 		if (state->visited[e->left] == 0)
107 			state->visited[e->left] = 1;
108 		if (state->visited[e->middle] == 0)
109 			state->visited[e->middle] = 1;
110 		if (state->visited[e->right] == 0)
111 			state->visited[e->right] = 1;
112 
113 		state->g[t] = (9 + r - state->g[e->left] - state->g[e->middle]
114 		    - state->g[e->right]) % 3;
115 	}
116 
117 	holes = 0;
118 	for (i = 0; i < state->graph.v; ++i) {
119 		if (i % 65536 == 0)
120 			state->holes64k[i >> 16] = holes;
121 
122 		if (i % 256 == 0)
123 			state->holes256[i >> 8] = holes - state->holes64k[i >> 16];
124 
125 		if (i % 256 == 64)
126 			state->holes256_64[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
127 
128 		if (i % 256 == 128)
129 			state->holes256_128[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
130 
131 		if (i % 256 == 192)
132 			state->holes256_192[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
133 
134 		if (state->visited[i] > 1) {
135 			j = state->visited[i] - 2;
136 			state->result_map[j] = i - holes;
137 		}
138 
139 		if (state->g[i] == 3)
140 			++holes;
141 	}
142 
143 	if (i % 65536 != 0)
144 		state->holes64k[(i >> 16) + 1] = holes;
145 
146 	if (i % 256 != 0)
147 		state->holes256[(i >> 8) + 1] = holes - state->holes64k[((i >> 8) + 1) >> 8];
148 
149 	if (i % 256 != 64)
150 		state->holes256_64[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
151 
152 	if (i % 256 != 128)
153 		state->holes256_128[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
154 
155 	if (i % 256 != 192)
156 		state->holes256_192[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
157 }
158 
159 static void
160 print_hash(struct nbperf *nbperf, struct state *state)
161 {
162 	size_t i, j;
163 	uint32_t sum;
164 
165 	fprintf(nbperf->output, "#include <stdlib.h>\n");
166 	fprintf(nbperf->output, "#include <strings.h>\n\n");
167 
168 	fprintf(nbperf->output, "%suint32_t\n",
169 	    nbperf->static_hash ? "static " : "");
170 	fprintf(nbperf->output,
171 	    "%s(const void * __restrict key, size_t keylen)\n",
172 	    nbperf->hash_name);
173 	fprintf(nbperf->output, "{\n");
174 	fprintf(nbperf->output,
175 	    "\tstatic const uint32_t g[%" PRId32 "] = {\n",
176 	    (state->graph.v + 15) / 16);
177 	for (i = 0; i < state->graph.v; i += 16) {
178 		for (j = 0, sum = 0; j < 16; ++j)
179 			sum |= (uint32_t)state->g[i + j] << (2 * j);
180 
181 		fprintf(nbperf->output, "%s0x%08" PRIx32 "ULL,%s",
182 		    (i / 16 % 4 == 0 ? "\t    " : " "),
183 		    sum,
184 		    (i / 16 % 4 == 3 ? "\n" : ""));
185 	}
186 	fprintf(nbperf->output, "%s\t};\n", (i / 16 % 4 ? "\n" : ""));
187 
188 	fprintf(nbperf->output,
189 	    "\tstatic const uint32_t holes64k[%" PRId32 "] = {\n",
190 	    (state->graph.v + 65535) / 65536);
191 	for (i = 0; i < state->graph.v; i += 65536)
192 		fprintf(nbperf->output, "%s0x%08" PRIx32 ",%s",
193 		    (i / 65536 % 4 == 0 ? "\t    " : " "),
194 		    state->holes64k[i >> 16],
195 		    (i / 65536 % 4 == 3 ? "\n" : ""));
196 	fprintf(nbperf->output, "%s\t};\n", (i / 65536 % 4 ? "\n" : ""));
197 
198 	fprintf(nbperf->output,
199 	    "\tstatic const uint16_t holes256[%" PRId32 "] = {\n",
200 	    (state->graph.v + 255) / 256);
201 	for (i = 0; i < state->graph.v; i += 256)
202 		fprintf(nbperf->output, "%s0x%04" PRIx32 ",%s",
203 		    (i / 256 % 4 == 0 ? "\t    " : " "),
204 		    state->holes256[i >> 8],
205 		    (i / 256 % 4 == 3 ? "\n" : ""));
206 	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
207 
208 	fprintf(nbperf->output,
209 	    "\tstatic const uint8_t holes256_64[%" PRId32 "] = {\n",
210 	    (state->graph.v + 255) / 256);
211 	for (i = 64; i < state->graph.v; i += 256)
212 		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
213 		    (i / 256 % 4 == 0 ? "\t    " : " "),
214 		    state->holes256_64[i >> 8],
215 		    (i / 256 % 4 == 3 ? "\n" : ""));
216 	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
217 
218 	fprintf(nbperf->output,
219 	    "\tstatic const uint8_t holes256_128[%" PRId32 "] = {\n",
220 	    (state->graph.v + 255) / 256);
221 	for (i = 128; i < state->graph.v; i += 256)
222 		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
223 		    (i / 256 % 4 == 0 ? "\t    " : " "),
224 		    state->holes256_128[i >> 8],
225 		    (i / 256 % 4 == 3 ? "\n" : ""));
226 	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
227 
228 	fprintf(nbperf->output,
229 	    "\tstatic const uint8_t holes256_192[%" PRId32 "] = {\n",
230 	    (state->graph.v + 255) / 256);
231 	for (i = 192; i < state->graph.v; i += 256)
232 		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
233 		    (i / 256 % 4 == 0 ? "\t    " : " "),
234 		    state->holes256_192[i >> 8],
235 		    (i / 256 % 4 == 3 ? "\n" : ""));
236 	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
237 
238 	fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size);
239 	fprintf(nbperf->output, "\tuint32_t m;\n");
240 	fprintf(nbperf->output, "\tuint32_t a1, a2, b1, b2, c1, c2, idx, idx2;\n\n");
241 
242 	(*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h");
243 
244 	fprintf(nbperf->output, "\n\th[0] = h[0] %% %" PRIu32 ";\n", state->graph.v);
245 	fprintf(nbperf->output, "\th[1] = h[1] %% %" PRIu32 ";\n", state->graph.v);
246 	fprintf(nbperf->output, "\th[2] = h[2] %% %" PRIu32 ";\n", state->graph.v);
247 
248 	fprintf(nbperf->output, "\n\ta1 = h[0] >> 4;\n");
249 	fprintf(nbperf->output, "\ta2 = 2 * (h[0] & 15);\n");
250 	fprintf(nbperf->output, "\tb1 = h[1] >> 4;\n");
251 	fprintf(nbperf->output, "\tb2 = 2 * (h[1] & 15);\n");
252 	fprintf(nbperf->output, "\tc1 = h[2] >> 4;\n");
253 	fprintf(nbperf->output, "\tc2 = 2 * (h[2] & 15);\n");
254 
255 	fprintf(nbperf->output,
256 	    "\tidx = h[(((g[a1] >> a2) & 3) + ((g[b1] >> b2) & 3) +\n"
257 	    "\t    ((g[c1] >> c2) & 3)) %% 3];\n\n");
258 
259 	fprintf(nbperf->output,
260 	    "\tswitch ((idx >> 5) & 7) {\n"
261 	    "\tcase 0:\n"
262 	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8];\n"
263 	    "\t\tbreak;\n"
264 	    "\tcase 1: case 2:\n"
265 	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
266 	    "\t\t    - holes256_64[idx >> 8];\n"
267 	    "\t\tbreak;\n"
268 	    "\tcase 3: case 4:\n"
269 	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
270 	    "\t\t    - holes256_128[idx >> 8];\n"
271 	    "\t\tbreak;\n"
272 	    "\tcase 5: case 6:\n"
273 	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
274 	    "\t\t    - holes256_192[idx >> 8];\n"
275 	    "\t\tbreak;\n"
276 	    "\tcase 7:\n"
277 	    "\t\tidx2 = idx - holes64k[(idx + 32) >> 16] -\n"
278 	    "\t\t    holes256[(idx + 32) >> 8];\n"
279 	    "\t\tbreak;\n"
280 	    "\tdefault:\n"
281 	    "\t\tabort();\n"
282 	    "\t}\n"
283 	    "\tswitch ((idx >> 4) & 3) {\n"
284 	    "\tcase 1:\n"
285 	    "\t\tm = (g[(idx >> 4) - 1] & (g[(idx >> 4) - 1] >> 1) & 0x55555555U);\n"
286 	    "\t\tidx2 -= popcount32(m);\n"
287 	    "\tcase 0:\n"
288 	    "\t\tm = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n"
289 	    "\t\tm &= ((2U << (2 * (idx & 15))) - 1);\n"
290 	    "\t\tidx2 -= popcount32(m);\n"
291 	    "\t\tbreak;\n"
292 	    "\tcase 2:\n"
293 	    "\t\tm = (g[(idx >> 4) + 1] & (g[(idx >> 4) + 1] >> 1) & 0x55555555U);\n"
294 	    "\t\tidx2 += popcount32(m);\n"
295 	    "\tcase 3:\n"
296 	    "\t\tm = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n"
297 	    "\t\tm &= ~((2U << (2 * (idx & 15))) - 1);\n"
298 	    "\t\tidx2 += popcount32(m);\n"
299 	    "\t\tbreak;\n"
300 	    "\t}\n\n");
301 
302 	fprintf(nbperf->output,
303 	    "\treturn idx2;\n");
304 	fprintf(nbperf->output, "}\n");
305 
306 	if (nbperf->map_output != NULL) {
307 		for (i = 0; i < state->graph.e; ++i)
308 			fprintf(nbperf->map_output, "%" PRIu32 "\n",
309 			    state->result_map[i]);
310 	}
311 }
312 
313 int
314 bdz_compute(struct nbperf *nbperf)
315 {
316 	struct state state;
317 	int retval = -1;
318 	uint32_t v, e;
319 
320 	if (nbperf->c == 0)
321 		nbperf->c = 1.24;
322 	if (nbperf->c < 1.24)
323 		errx(1, "The argument for option -c must be at least 1.24");
324 	if (nbperf->hash_size < 3)
325 		errx(1, "The hash function must generate at least 3 values");
326 
327 	(*nbperf->seed_hash)(nbperf);
328 	e = nbperf->n;
329 	v = nbperf->c * nbperf->n;
330 	if (1.24 * nbperf->n > v)
331 		++v;
332 	if (v < 10)
333 		v = 10;
334 
335 	graph3_setup(&state.graph, v, e);
336 
337 	state.holes64k = calloc(sizeof(uint32_t), (v + 65535) / 65536 + 1);
338 	state.holes256 = calloc(sizeof(uint16_t), (v + 255) / 256 + 1);
339 	state.holes256_64 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
340 	state.holes256_128 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
341 	state.holes256_192 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
342 	state.g = calloc(sizeof(uint32_t), v);
343 	state.visited = calloc(sizeof(uint32_t), v);
344 	state.result_map = calloc(sizeof(uint32_t), e);
345 
346 	if (state.holes64k == NULL || state.holes256 == NULL ||
347 	    state.holes256_64 == NULL || state.holes256_128 == NULL ||
348 	    state.holes256_192 == NULL || state.g == NULL ||
349 	    state.visited == NULL || state.result_map == NULL)
350 		err(1, "malloc failed");
351 
352 	if (graph3_hash(nbperf, &state.graph))
353 		goto failed;
354 	if (graph3_output_order(&state.graph))
355 		goto failed;
356 	assign_nodes(&state);
357 	print_hash(nbperf, &state);
358 
359 	retval = 0;
360 
361 failed:
362 	graph3_free(&state.graph);
363 	free(state.visited);
364 	free(state.g);
365 	free(state.holes64k);
366 	free(state.holes256);
367 	free(state.holes256_64);
368 	free(state.holes256_128);
369 	free(state.holes256_192);
370 	free(state.result_map);
371 	return retval;
372 }
373