xref: /netbsd-src/usr.bin/nbperf/nbperf-bdz.c (revision 9ddb6ab554e70fb9bbd90c3d96b812bc57755a14)
1 /*	$NetBSD: nbperf-bdz.c,v 1.4 2011/10/21 23:47:11 joerg Exp $	*/
2 /*-
3  * Copyright (c) 2009 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Joerg Sonnenberger.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #if HAVE_NBTOOL_CONFIG_H
35 #include "nbtool_config.h"
36 #endif
37 
38 #include <sys/cdefs.h>
39 __RCSID("$NetBSD: nbperf-bdz.c,v 1.4 2011/10/21 23:47:11 joerg Exp $");
40 
41 #include <err.h>
42 #include <inttypes.h>
43 #include <stdlib.h>
44 #include <stdio.h>
45 #include <string.h>
46 
47 #include "nbperf.h"
48 
49 /*
50  * A full description of the algorithm can be found in:
51  * "Simple and Space-Efficient Minimal Perfect Hash Functions"
52  * by Botelho, Pagh and Ziviani, proceeedings of WADS 2007.
53  */
54 
55 /*
56  * The algorithm is based on random, acyclic 3-graphs.
57  *
58  * Each edge in the represents a key.  The vertices are the reminder of
59  * the hash function mod n.  n = cm with c > 1.23.  This ensures that
60  * an acyclic graph can be found with a very high probality.
61  *
62  * An acyclic graph has an edge order, where at least one vertex of
63  * each edge hasn't been seen before.   It is declares the first unvisited
64  * vertex as authoritive for the edge and assigns a 2bit value to unvisited
65  * vertices, so that the sum of all vertices of the edge modulo 4 is
66  * the index of the authoritive vertex.
67  */
68 
69 #include "graph3.h"
70 
71 struct state {
72 	struct graph3 graph;
73 	uint32_t *visited;
74 	uint32_t *holes64k;
75 	uint16_t *holes256;
76 	uint8_t *holes256_64;
77 	uint8_t *holes256_128;
78 	uint8_t *holes256_192;
79 	uint8_t *g;
80 	uint32_t *result_map;
81 };
82 
83 static void
84 assign_nodes(struct state *state)
85 {
86 	struct edge3 *e;
87 	size_t i, j;
88 	uint32_t t, r, holes;
89 
90 	for (i = 0; i < state->graph.v; ++i)
91 		state->g[i] = 3;
92 
93 	for (i = 0; i < state->graph.e; ++i) {
94 		j = state->graph.output_order[i];
95 		e = &state->graph.edges[j];
96 		if (!state->visited[e->left]) {
97 			r = 0;
98 			t = e->left;
99 		} else if (!state->visited[e->middle]) {
100 			r = 1;
101 			t = e->middle;
102 		} else {
103 			if (state->visited[e->right])
104 				abort();
105 			r = 2;
106 			t = e->right;
107 		}
108 
109 		state->visited[t] = 2 + j;
110 		if (state->visited[e->left] == 0)
111 			state->visited[e->left] = 1;
112 		if (state->visited[e->middle] == 0)
113 			state->visited[e->middle] = 1;
114 		if (state->visited[e->right] == 0)
115 			state->visited[e->right] = 1;
116 
117 		state->g[t] = (9 + r - state->g[e->left] - state->g[e->middle]
118 		    - state->g[e->right]) % 3;
119 	}
120 
121 	holes = 0;
122 	for (i = 0; i < state->graph.v; ++i) {
123 		if (i % 65536 == 0)
124 			state->holes64k[i >> 16] = holes;
125 
126 		if (i % 256 == 0)
127 			state->holes256[i >> 8] = holes - state->holes64k[i >> 16];
128 
129 		if (i % 256 == 64)
130 			state->holes256_64[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
131 
132 		if (i % 256 == 128)
133 			state->holes256_128[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
134 
135 		if (i % 256 == 192)
136 			state->holes256_192[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
137 
138 		if (state->visited[i] > 1) {
139 			j = state->visited[i] - 2;
140 			state->result_map[j] = i - holes;
141 		}
142 
143 		if (state->g[i] == 3)
144 			++holes;
145 	}
146 
147 	if (i % 65536 != 0)
148 		state->holes64k[(i >> 16) + 1] = holes;
149 
150 	if (i % 256 != 0)
151 		state->holes256[(i >> 8) + 1] = holes - state->holes64k[((i >> 8) + 1) >> 8];
152 
153 	if (i % 256 != 64)
154 		state->holes256_64[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
155 
156 	if (i % 256 != 128)
157 		state->holes256_128[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
158 
159 	if (i % 256 != 192)
160 		state->holes256_192[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
161 }
162 
163 static void
164 print_hash(struct nbperf *nbperf, struct state *state)
165 {
166 	size_t i, j;
167 	uint32_t sum;
168 
169 	fprintf(nbperf->output, "#include <stdlib.h>\n");
170 	fprintf(nbperf->output, "#include <strings.h>\n\n");
171 
172 	fprintf(nbperf->output, "%suint32_t\n",
173 	    nbperf->static_hash ? "static " : "");
174 	fprintf(nbperf->output,
175 	    "%s(const void * __restrict key, size_t keylen)\n",
176 	    nbperf->hash_name);
177 	fprintf(nbperf->output, "{\n");
178 	fprintf(nbperf->output,
179 	    "\tstatic const uint32_t g[%" PRId32 "] = {\n",
180 	    (state->graph.v + 15) / 16);
181 	for (i = 0; i < state->graph.v; i += 16) {
182 		for (j = 0, sum = 0; j < 16; ++j)
183 			sum |= (uint32_t)state->g[i + j] << (2 * j);
184 
185 		fprintf(nbperf->output, "%s0x%08" PRIx32 "ULL,%s",
186 		    (i / 16 % 4 == 0 ? "\t    " : " "),
187 		    sum,
188 		    (i / 16 % 4 == 3 ? "\n" : ""));
189 	}
190 	fprintf(nbperf->output, "%s\t};\n", (i / 16 % 4 ? "\n" : ""));
191 
192 	fprintf(nbperf->output,
193 	    "\tstatic const uint32_t holes64k[%" PRId32 "] = {\n",
194 	    (state->graph.v + 65535) / 65536);
195 	for (i = 0; i < state->graph.v; i += 65536)
196 		fprintf(nbperf->output, "%s0x%08" PRIx32 ",%s",
197 		    (i / 65536 % 4 == 0 ? "\t    " : " "),
198 		    state->holes64k[i >> 16],
199 		    (i / 65536 % 4 == 3 ? "\n" : ""));
200 	fprintf(nbperf->output, "%s\t};\n", (i / 65536 % 4 ? "\n" : ""));
201 
202 	fprintf(nbperf->output,
203 	    "\tstatic const uint16_t holes256[%" PRId32 "] = {\n",
204 	    (state->graph.v + 255) / 256);
205 	for (i = 0; i < state->graph.v; i += 256)
206 		fprintf(nbperf->output, "%s0x%04" PRIx32 ",%s",
207 		    (i / 256 % 4 == 0 ? "\t    " : " "),
208 		    state->holes256[i >> 8],
209 		    (i / 256 % 4 == 3 ? "\n" : ""));
210 	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
211 
212 	fprintf(nbperf->output,
213 	    "\tstatic const uint8_t holes256_64[%" PRId32 "] = {\n",
214 	    (state->graph.v + 255) / 256);
215 	for (i = 64; i < state->graph.v; i += 256)
216 		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
217 		    (i / 256 % 4 == 0 ? "\t    " : " "),
218 		    state->holes256_64[i >> 8],
219 		    (i / 256 % 4 == 3 ? "\n" : ""));
220 	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
221 
222 	fprintf(nbperf->output,
223 	    "\tstatic const uint8_t holes256_128[%" PRId32 "] = {\n",
224 	    (state->graph.v + 255) / 256);
225 	for (i = 128; i < state->graph.v; i += 256)
226 		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
227 		    (i / 256 % 4 == 0 ? "\t    " : " "),
228 		    state->holes256_128[i >> 8],
229 		    (i / 256 % 4 == 3 ? "\n" : ""));
230 	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
231 
232 	fprintf(nbperf->output,
233 	    "\tstatic const uint8_t holes256_192[%" PRId32 "] = {\n",
234 	    (state->graph.v + 255) / 256);
235 	for (i = 192; i < state->graph.v; i += 256)
236 		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
237 		    (i / 256 % 4 == 0 ? "\t    " : " "),
238 		    state->holes256_192[i >> 8],
239 		    (i / 256 % 4 == 3 ? "\n" : ""));
240 	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
241 
242 	fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size);
243 	fprintf(nbperf->output, "\tuint32_t m;\n");
244 	fprintf(nbperf->output, "\tuint32_t a1, a2, b1, b2, c1, c2, idx, idx2;\n\n");
245 
246 	(*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h");
247 
248 	fprintf(nbperf->output, "\n\th[0] = h[0] %% %" PRIu32 ";\n", state->graph.v);
249 	fprintf(nbperf->output, "\th[1] = h[1] %% %" PRIu32 ";\n", state->graph.v);
250 	fprintf(nbperf->output, "\th[2] = h[2] %% %" PRIu32 ";\n", state->graph.v);
251 
252 	fprintf(nbperf->output, "\n\ta1 = h[0] >> 4;\n");
253 	fprintf(nbperf->output, "\ta2 = 2 * (h[0] & 15);\n");
254 	fprintf(nbperf->output, "\tb1 = h[1] >> 4;\n");
255 	fprintf(nbperf->output, "\tb2 = 2 * (h[1] & 15);\n");
256 	fprintf(nbperf->output, "\tc1 = h[2] >> 4;\n");
257 	fprintf(nbperf->output, "\tc2 = 2 * (h[2] & 15);\n");
258 
259 	fprintf(nbperf->output,
260 	    "\tidx = h[(((g[a1] >> a2) & 3) + ((g[b1] >> b2) & 3) +\n"
261 	    "\t    ((g[c1] >> c2) & 3)) %% 3];\n\n");
262 
263 	fprintf(nbperf->output,
264 	    "\tswitch ((idx >> 5) & 7) {\n"
265 	    "\tcase 0:\n"
266 	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8];\n"
267 	    "\t\tbreak;\n"
268 	    "\tcase 1: case 2:\n"
269 	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
270 	    "\t\t    - holes256_64[idx >> 8];\n"
271 	    "\t\tbreak;\n"
272 	    "\tcase 3: case 4:\n"
273 	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
274 	    "\t\t    - holes256_128[idx >> 8];\n"
275 	    "\t\tbreak;\n"
276 	    "\tcase 5: case 6:\n"
277 	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
278 	    "\t\t    - holes256_192[idx >> 8];\n"
279 	    "\t\tbreak;\n"
280 	    "\tcase 7:\n"
281 	    "\t\tidx2 = idx - holes64k[(idx + 32) >> 16] -\n"
282 	    "\t\t    holes256[(idx + 32) >> 8];\n"
283 	    "\t\tbreak;\n"
284 	    "\tdefault:\n"
285 	    "\t\tabort();\n"
286 	    "\t}\n"
287 	    "\tswitch ((idx >> 4) & 3) {\n"
288 	    "\tcase 1:\n"
289 	    "\t\tm = (g[(idx >> 4) - 1] & (g[(idx >> 4) - 1] >> 1) & 0x55555555U);\n"
290 	    "\t\tidx2 -= popcount32(m);\n"
291 	    "\tcase 0:\n"
292 	    "\t\tm = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n"
293 	    "\t\tm &= ((2U << (2 * (idx & 15))) - 1);\n"
294 	    "\t\tidx2 -= popcount32(m);\n"
295 	    "\t\tbreak;\n"
296 	    "\tcase 2:\n"
297 	    "\t\tm = (g[(idx >> 4) + 1] & (g[(idx >> 4) + 1] >> 1) & 0x55555555U);\n"
298 	    "\t\tidx2 += popcount32(m);\n"
299 	    "\tcase 3:\n"
300 	    "\t\tm = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n"
301 	    "\t\tm &= ~((2U << (2 * (idx & 15))) - 1);\n"
302 	    "\t\tidx2 += popcount32(m);\n"
303 	    "\t\tbreak;\n"
304 	    "\t}\n\n");
305 
306 	fprintf(nbperf->output,
307 	    "\treturn idx2;\n");
308 	fprintf(nbperf->output, "}\n");
309 
310 	if (nbperf->map_output != NULL) {
311 		for (i = 0; i < state->graph.e; ++i)
312 			fprintf(nbperf->map_output, "%" PRIu32 "\n",
313 			    state->result_map[i]);
314 	}
315 }
316 
317 int
318 bdz_compute(struct nbperf *nbperf)
319 {
320 	struct state state;
321 	int retval = -1;
322 	uint32_t v, e;
323 
324 	if (nbperf->c == 0)
325 		nbperf->c = 1.24;
326 	if (nbperf->c < 1.24)
327 		errx(1, "The argument for option -c must be at least 1.24");
328 	if (nbperf->hash_size < 3)
329 		errx(1, "The hash function must generate at least 3 values");
330 
331 	(*nbperf->seed_hash)(nbperf);
332 	e = nbperf->n;
333 	v = nbperf->c * nbperf->n;
334 	if (1.24 * nbperf->n > v)
335 		++v;
336 	if (v < 10)
337 		v = 10;
338 
339 	graph3_setup(&state.graph, v, e);
340 
341 	state.holes64k = calloc(sizeof(uint32_t), (v + 65535) / 65536 + 1);
342 	state.holes256 = calloc(sizeof(uint16_t), (v + 255) / 256 + 1);
343 	state.holes256_64 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
344 	state.holes256_128 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
345 	state.holes256_192 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
346 	state.g = calloc(sizeof(uint32_t), v);
347 	state.visited = calloc(sizeof(uint32_t), v);
348 	state.result_map = calloc(sizeof(uint32_t), e);
349 
350 	if (state.holes64k == NULL || state.holes256 == NULL ||
351 	    state.holes256_64 == NULL || state.holes256_128 == NULL ||
352 	    state.holes256_192 == NULL || state.g == NULL ||
353 	    state.visited == NULL || state.result_map == NULL)
354 		err(1, "malloc failed");
355 
356 	if (graph3_hash(nbperf, &state.graph))
357 		goto failed;
358 	if (graph3_output_order(&state.graph))
359 		goto failed;
360 	assign_nodes(&state);
361 	print_hash(nbperf, &state);
362 
363 	retval = 0;
364 
365 failed:
366 	graph3_free(&state.graph);
367 	free(state.visited);
368 	free(state.g);
369 	free(state.holes64k);
370 	free(state.holes256);
371 	free(state.holes256_64);
372 	free(state.holes256_128);
373 	free(state.holes256_192);
374 	free(state.result_map);
375 	return retval;
376 }
377