xref: /netbsd-src/usr.bin/nbperf/nbperf-bdz.c (revision c2a1106fc120677a9a63f7848a215f9225261463)
1*c2a1106fSandvar /*	$NetBSD: nbperf-bdz.c,v 1.12 2023/07/31 21:07:50 andvar Exp $	*/
203c8ba1cSjoerg /*-
34edfbdbbSjoerg  * Copyright (c) 2009, 2012 The NetBSD Foundation, Inc.
403c8ba1cSjoerg  * All rights reserved.
503c8ba1cSjoerg  *
603c8ba1cSjoerg  * This code is derived from software contributed to The NetBSD Foundation
703c8ba1cSjoerg  * by Joerg Sonnenberger.
803c8ba1cSjoerg  *
903c8ba1cSjoerg  * Redistribution and use in source and binary forms, with or without
1003c8ba1cSjoerg  * modification, are permitted provided that the following conditions
1103c8ba1cSjoerg  * are met:
1203c8ba1cSjoerg  *
1303c8ba1cSjoerg  * 1. Redistributions of source code must retain the above copyright
1403c8ba1cSjoerg  *    notice, this list of conditions and the following disclaimer.
1503c8ba1cSjoerg  * 2. Redistributions in binary form must reproduce the above copyright
1603c8ba1cSjoerg  *    notice, this list of conditions and the following disclaimer in
1703c8ba1cSjoerg  *    the documentation and/or other materials provided with the
1803c8ba1cSjoerg  *    distribution.
1903c8ba1cSjoerg  *
2003c8ba1cSjoerg  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2103c8ba1cSjoerg  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2203c8ba1cSjoerg  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
2303c8ba1cSjoerg  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
2403c8ba1cSjoerg  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2503c8ba1cSjoerg  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
2603c8ba1cSjoerg  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2703c8ba1cSjoerg  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2803c8ba1cSjoerg  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
2903c8ba1cSjoerg  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
3003c8ba1cSjoerg  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3103c8ba1cSjoerg  * SUCH DAMAGE.
3203c8ba1cSjoerg  */
3303c8ba1cSjoerg 
3410769988Sjoerg #if HAVE_NBTOOL_CONFIG_H
3510769988Sjoerg #include "nbtool_config.h"
3610769988Sjoerg #endif
3710769988Sjoerg 
3803c8ba1cSjoerg #include <sys/cdefs.h>
39*c2a1106fSandvar __RCSID("$NetBSD: nbperf-bdz.c,v 1.12 2023/07/31 21:07:50 andvar Exp $");
4003c8ba1cSjoerg 
4103c8ba1cSjoerg #include <err.h>
4203c8ba1cSjoerg #include <inttypes.h>
4303c8ba1cSjoerg #include <stdlib.h>
4403c8ba1cSjoerg #include <stdio.h>
4503c8ba1cSjoerg #include <string.h>
4603c8ba1cSjoerg 
4703c8ba1cSjoerg #include "nbperf.h"
4803c8ba1cSjoerg 
4903c8ba1cSjoerg /*
5003c8ba1cSjoerg  * A full description of the algorithm can be found in:
5103c8ba1cSjoerg  * "Simple and Space-Efficient Minimal Perfect Hash Functions"
52*c2a1106fSandvar  * by Botelho, Pagh and Ziviani, proceedings of WADS 2007.
5303c8ba1cSjoerg  */
5403c8ba1cSjoerg 
5503c8ba1cSjoerg /*
5603c8ba1cSjoerg  * The algorithm is based on random, acyclic 3-graphs.
5703c8ba1cSjoerg  *
5803c8ba1cSjoerg  * Each edge in the represents a key.  The vertices are the reminder of
5903c8ba1cSjoerg  * the hash function mod n.  n = cm with c > 1.23.  This ensures that
6097b3b051Sjoerg  * an acyclic graph can be found with a very high probality.
6103c8ba1cSjoerg  *
6203c8ba1cSjoerg  * An acyclic graph has an edge order, where at least one vertex of
6303c8ba1cSjoerg  * each edge hasn't been seen before.   It is declares the first unvisited
6403c8ba1cSjoerg  * vertex as authoritive for the edge and assigns a 2bit value to unvisited
6503c8ba1cSjoerg  * vertices, so that the sum of all vertices of the edge modulo 4 is
6603c8ba1cSjoerg  * the index of the authoritive vertex.
6703c8ba1cSjoerg  */
6803c8ba1cSjoerg 
697c219dd6Sjoerg #define GRAPH_SIZE 3
707c219dd6Sjoerg #include "graph2.h"
7103c8ba1cSjoerg 
7203c8ba1cSjoerg struct state {
737c219dd6Sjoerg 	struct SIZED(graph) graph;
7403c8ba1cSjoerg 	uint32_t *visited;
7503c8ba1cSjoerg 	uint32_t *holes64k;
764edfbdbbSjoerg 	uint16_t *holes64;
7703c8ba1cSjoerg 	uint8_t *g;
7803c8ba1cSjoerg 	uint32_t *result_map;
7903c8ba1cSjoerg };
8003c8ba1cSjoerg 
8103c8ba1cSjoerg static void
assign_nodes(struct state * state)8203c8ba1cSjoerg assign_nodes(struct state *state)
8303c8ba1cSjoerg {
847c219dd6Sjoerg 	struct SIZED(edge) *e;
8503c8ba1cSjoerg 	size_t i, j;
8603c8ba1cSjoerg 	uint32_t t, r, holes;
8703c8ba1cSjoerg 
8803c8ba1cSjoerg 	for (i = 0; i < state->graph.v; ++i)
8903c8ba1cSjoerg 		state->g[i] = 3;
9003c8ba1cSjoerg 
9103c8ba1cSjoerg 	for (i = 0; i < state->graph.e; ++i) {
9203c8ba1cSjoerg 		j = state->graph.output_order[i];
9303c8ba1cSjoerg 		e = &state->graph.edges[j];
947c219dd6Sjoerg 		if (!state->visited[e->vertices[0]]) {
9503c8ba1cSjoerg 			r = 0;
967c219dd6Sjoerg 			t = e->vertices[0];
977c219dd6Sjoerg 		} else if (!state->visited[e->vertices[1]]) {
9803c8ba1cSjoerg 			r = 1;
997c219dd6Sjoerg 			t = e->vertices[1];
10003c8ba1cSjoerg 		} else {
1017c219dd6Sjoerg 			if (state->visited[e->vertices[2]])
10203c8ba1cSjoerg 				abort();
10303c8ba1cSjoerg 			r = 2;
1047c219dd6Sjoerg 			t = e->vertices[2];
10503c8ba1cSjoerg 		}
10603c8ba1cSjoerg 
10703c8ba1cSjoerg 		state->visited[t] = 2 + j;
1087c219dd6Sjoerg 		if (state->visited[e->vertices[0]] == 0)
1097c219dd6Sjoerg 			state->visited[e->vertices[0]] = 1;
1107c219dd6Sjoerg 		if (state->visited[e->vertices[1]] == 0)
1117c219dd6Sjoerg 			state->visited[e->vertices[1]] = 1;
1127c219dd6Sjoerg 		if (state->visited[e->vertices[2]] == 0)
1137c219dd6Sjoerg 			state->visited[e->vertices[2]] = 1;
11403c8ba1cSjoerg 
1157c219dd6Sjoerg 		state->g[t] = (9 + r - state->g[e->vertices[0]] - state->g[e->vertices[1]]
1167c219dd6Sjoerg 		    - state->g[e->vertices[2]]) % 3;
11703c8ba1cSjoerg 	}
11803c8ba1cSjoerg 
11903c8ba1cSjoerg 	holes = 0;
12003c8ba1cSjoerg 	for (i = 0; i < state->graph.v; ++i) {
12103c8ba1cSjoerg 		if (i % 65536 == 0)
12203c8ba1cSjoerg 			state->holes64k[i >> 16] = holes;
12303c8ba1cSjoerg 
1244edfbdbbSjoerg 		if (i % 64 == 0)
1254edfbdbbSjoerg 			state->holes64[i >> 6] = holes - state->holes64k[i >> 16];
12603c8ba1cSjoerg 
12703c8ba1cSjoerg 		if (state->visited[i] > 1) {
12803c8ba1cSjoerg 			j = state->visited[i] - 2;
12903c8ba1cSjoerg 			state->result_map[j] = i - holes;
13003c8ba1cSjoerg 		}
13103c8ba1cSjoerg 
13203c8ba1cSjoerg 		if (state->g[i] == 3)
13303c8ba1cSjoerg 			++holes;
13403c8ba1cSjoerg 	}
13503c8ba1cSjoerg }
13603c8ba1cSjoerg 
13703c8ba1cSjoerg static void
print_hash(struct nbperf * nbperf,struct state * state)13803c8ba1cSjoerg print_hash(struct nbperf *nbperf, struct state *state)
13903c8ba1cSjoerg {
1404edfbdbbSjoerg 	uint64_t sum;
1414edfbdbbSjoerg 	size_t i;
14203c8ba1cSjoerg 
14303c8ba1cSjoerg 	fprintf(nbperf->output, "#include <stdlib.h>\n");
14403c8ba1cSjoerg 	fprintf(nbperf->output, "#include <strings.h>\n\n");
14503c8ba1cSjoerg 
14603c8ba1cSjoerg 	fprintf(nbperf->output, "%suint32_t\n",
14703c8ba1cSjoerg 	    nbperf->static_hash ? "static " : "");
14803c8ba1cSjoerg 	fprintf(nbperf->output,
14903c8ba1cSjoerg 	    "%s(const void * __restrict key, size_t keylen)\n",
15003c8ba1cSjoerg 	    nbperf->hash_name);
15103c8ba1cSjoerg 	fprintf(nbperf->output, "{\n");
15203c8ba1cSjoerg 
1534edfbdbbSjoerg 	fprintf(nbperf->output,
1544edfbdbbSjoerg 	    "\tstatic const uint64_t g1[%" PRId32 "] = {\n",
1554edfbdbbSjoerg 	    (state->graph.v + 63) / 64);
1564edfbdbbSjoerg 	sum = 0;
1574edfbdbbSjoerg 	for (i = 0; i < state->graph.v; ++i) {
1584edfbdbbSjoerg 		sum |= ((uint64_t)state->g[i] & 1) << (i & 63);
1594edfbdbbSjoerg 		if (i % 64 == 63) {
1604edfbdbbSjoerg 			fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s",
1614edfbdbbSjoerg 			    (i / 64 % 2 == 0 ? "\t    " : " "),
16203c8ba1cSjoerg 			    sum,
1634edfbdbbSjoerg 			    (i / 64 % 2 == 1 ? "\n" : ""));
1644edfbdbbSjoerg 			sum = 0;
16503c8ba1cSjoerg 		}
1664edfbdbbSjoerg 	}
1674edfbdbbSjoerg 	if (i % 64 != 0) {
1684edfbdbbSjoerg 		fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s",
1694edfbdbbSjoerg 		    (i / 64 % 2 == 0 ? "\t    " : " "),
1704edfbdbbSjoerg 		    sum,
1714edfbdbbSjoerg 		    (i / 64 % 2 == 1 ? "\n" : ""));
1724edfbdbbSjoerg 	}
1734edfbdbbSjoerg 	fprintf(nbperf->output, "%s\t};\n", (i % 2 ? "\n" : ""));
1744edfbdbbSjoerg 
1754edfbdbbSjoerg 	fprintf(nbperf->output,
1764edfbdbbSjoerg 	    "\tstatic const uint64_t g2[%" PRId32 "] = {\n",
1774edfbdbbSjoerg 	    (state->graph.v + 63) / 64);
1784edfbdbbSjoerg 	sum = 0;
1794edfbdbbSjoerg 	for (i = 0; i < state->graph.v; ++i) {
1804edfbdbbSjoerg 		sum |= (((uint64_t)state->g[i] & 2) >> 1) << (i & 63);
1814edfbdbbSjoerg 		if (i % 64 == 63) {
1824edfbdbbSjoerg 			fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s",
1834edfbdbbSjoerg 			    (i / 64 % 2 == 0 ? "\t    " : " "),
1844edfbdbbSjoerg 			    sum,
1854edfbdbbSjoerg 			    (i / 64 % 2 == 1 ? "\n" : ""));
1864edfbdbbSjoerg 			sum = 0;
1874edfbdbbSjoerg 		}
1884edfbdbbSjoerg 	}
1894edfbdbbSjoerg 	if (i % 64 != 0) {
1904edfbdbbSjoerg 		fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s",
1914edfbdbbSjoerg 		    (i / 64 % 2 == 0 ? "\t    " : " "),
1924edfbdbbSjoerg 		    sum,
1934edfbdbbSjoerg 		    (i / 64 % 2 == 1 ? "\n" : ""));
1944edfbdbbSjoerg 	}
1954edfbdbbSjoerg 	fprintf(nbperf->output, "%s\t};\n", (i % 2 ? "\n" : ""));
19603c8ba1cSjoerg 
19703c8ba1cSjoerg 	fprintf(nbperf->output,
19803c8ba1cSjoerg 	    "\tstatic const uint32_t holes64k[%" PRId32 "] = {\n",
19903c8ba1cSjoerg 	    (state->graph.v + 65535) / 65536);
20003c8ba1cSjoerg 	for (i = 0; i < state->graph.v; i += 65536)
20103c8ba1cSjoerg 		fprintf(nbperf->output, "%s0x%08" PRIx32 ",%s",
20203c8ba1cSjoerg 		    (i / 65536 % 4 == 0 ? "\t    " : " "),
20303c8ba1cSjoerg 		    state->holes64k[i >> 16],
20403c8ba1cSjoerg 		    (i / 65536 % 4 == 3 ? "\n" : ""));
20503c8ba1cSjoerg 	fprintf(nbperf->output, "%s\t};\n", (i / 65536 % 4 ? "\n" : ""));
20603c8ba1cSjoerg 
20703c8ba1cSjoerg 	fprintf(nbperf->output,
2084edfbdbbSjoerg 	    "\tstatic const uint16_t holes64[%" PRId32 "] = {\n",
2094edfbdbbSjoerg 	    (state->graph.v + 63) / 64);
2104edfbdbbSjoerg 	for (i = 0; i < state->graph.v; i += 64)
21103c8ba1cSjoerg 		fprintf(nbperf->output, "%s0x%04" PRIx32 ",%s",
2124edfbdbbSjoerg 		    (i / 64 % 4 == 0 ? "\t    " : " "),
2134edfbdbbSjoerg 		    state->holes64[i >> 6],
2144edfbdbbSjoerg 		    (i / 64 % 4 == 3 ? "\n" : ""));
2154edfbdbbSjoerg 	fprintf(nbperf->output, "%s\t};\n", (i / 64 % 4 ? "\n" : ""));
21603c8ba1cSjoerg 
2174edfbdbbSjoerg 	fprintf(nbperf->output, "\tuint64_t m;\n");
2184edfbdbbSjoerg 	fprintf(nbperf->output, "\tuint32_t idx, i, idx2;\n");
21903c8ba1cSjoerg 	fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size);
22003c8ba1cSjoerg 
22103c8ba1cSjoerg 	(*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h");
22203c8ba1cSjoerg 
2234edfbdbbSjoerg 	fprintf(nbperf->output, "\n\th[0] = h[0] %% %" PRIu32 ";\n",
2244edfbdbbSjoerg 	    state->graph.v);
2254edfbdbbSjoerg 	fprintf(nbperf->output, "\th[1] = h[1] %% %" PRIu32 ";\n",
2264edfbdbbSjoerg 	    state->graph.v);
2274edfbdbbSjoerg 	fprintf(nbperf->output, "\th[2] = h[2] %% %" PRIu32 ";\n",
2284edfbdbbSjoerg 	    state->graph.v);
22903c8ba1cSjoerg 
2307c219dd6Sjoerg 	if (state->graph.hash_fudge & 1)
2317c219dd6Sjoerg 		fprintf(nbperf->output, "\th[1] ^= (h[0] == h[1]);\n");
2327c219dd6Sjoerg 
2337c219dd6Sjoerg 	if (state->graph.hash_fudge & 2) {
2347c219dd6Sjoerg 		fprintf(nbperf->output,
2357c219dd6Sjoerg 		    "\th[2] ^= (h[0] == h[2] || h[1] == h[2]);\n");
2367c219dd6Sjoerg 		fprintf(nbperf->output,
2377c219dd6Sjoerg 		    "\th[2] ^= 2 * (h[0] == h[2] || h[1] == h[2]);\n");
2387c219dd6Sjoerg 	}
2397c219dd6Sjoerg 
24003c8ba1cSjoerg 	fprintf(nbperf->output,
241ab86636aSjoerg 	    "\tidx = 9 + ((g1[h[0] >> 6] >> (h[0] & 63)) &1)\n"
242ab86636aSjoerg 	    "\t      + ((g1[h[1] >> 6] >> (h[1] & 63)) & 1)\n"
243ab86636aSjoerg 	    "\t      + ((g1[h[2] >> 6] >> (h[2] & 63)) & 1)\n"
244ab86636aSjoerg 	    "\t      - ((g2[h[0] >> 6] >> (h[0] & 63)) & 1)\n"
245ab86636aSjoerg 	    "\t      - ((g2[h[1] >> 6] >> (h[1] & 63)) & 1)\n"
246ab86636aSjoerg 	    "\t      - ((g2[h[2] >> 6] >> (h[2] & 63)) & 1);\n"
2474edfbdbbSjoerg 	    );
24803c8ba1cSjoerg 
24903c8ba1cSjoerg 	fprintf(nbperf->output,
2504edfbdbbSjoerg 	    "\tidx = h[idx %% 3];\n");
25103c8ba1cSjoerg 	fprintf(nbperf->output,
2524edfbdbbSjoerg 	    "\tidx2 = idx - holes64[idx >> 6] - holes64k[idx >> 16];\n"
2534edfbdbbSjoerg 	    "\tidx2 -= popcount64(g1[idx >> 6] & g2[idx >> 6]\n"
254cc89f792Sjoerg 	    "\t                   & (((uint64_t)1 << (idx & 63)) - 1));\n"
255e240adbdSjoerg 	    "\treturn idx2;\n");
2564edfbdbbSjoerg 
25703c8ba1cSjoerg 	fprintf(nbperf->output, "}\n");
25803c8ba1cSjoerg 
25903c8ba1cSjoerg 	if (nbperf->map_output != NULL) {
26003c8ba1cSjoerg 		for (i = 0; i < state->graph.e; ++i)
26103c8ba1cSjoerg 			fprintf(nbperf->map_output, "%" PRIu32 "\n",
26203c8ba1cSjoerg 			    state->result_map[i]);
26303c8ba1cSjoerg 	}
26403c8ba1cSjoerg }
26503c8ba1cSjoerg 
26603c8ba1cSjoerg int
bpz_compute(struct nbperf * nbperf)267a81d4ca3Sjoerg bpz_compute(struct nbperf *nbperf)
26803c8ba1cSjoerg {
26903c8ba1cSjoerg 	struct state state;
27003c8ba1cSjoerg 	int retval = -1;
27103c8ba1cSjoerg 	uint32_t v, e;
27203c8ba1cSjoerg 
27303c8ba1cSjoerg 	if (nbperf->c == 0)
27403c8ba1cSjoerg 		nbperf->c = 1.24;
27503c8ba1cSjoerg 	if (nbperf->c < 1.24)
27603c8ba1cSjoerg 		errx(1, "The argument for option -c must be at least 1.24");
27703c8ba1cSjoerg 	if (nbperf->hash_size < 3)
27803c8ba1cSjoerg 		errx(1, "The hash function must generate at least 3 values");
27903c8ba1cSjoerg 
28003c8ba1cSjoerg 	(*nbperf->seed_hash)(nbperf);
28103c8ba1cSjoerg 	e = nbperf->n;
28203c8ba1cSjoerg 	v = nbperf->c * nbperf->n;
28303c8ba1cSjoerg 	if (1.24 * nbperf->n > v)
28403c8ba1cSjoerg 		++v;
28503c8ba1cSjoerg 	if (v < 10)
28603c8ba1cSjoerg 		v = 10;
2877c219dd6Sjoerg 	if (nbperf->allow_hash_fudging)
28863eec5d6Sjoerg 		v = (v + 3) & ~3;
28903c8ba1cSjoerg 
29003c8ba1cSjoerg 	graph3_setup(&state.graph, v, e);
29103c8ba1cSjoerg 
2924edfbdbbSjoerg 	state.holes64k = calloc(sizeof(uint32_t), (v + 65535) / 65536);
2934edfbdbbSjoerg 	state.holes64 = calloc(sizeof(uint16_t), (v + 63) / 64 );
2944edfbdbbSjoerg 	state.g = calloc(sizeof(uint32_t), v | 63);
29503c8ba1cSjoerg 	state.visited = calloc(sizeof(uint32_t), v);
29603c8ba1cSjoerg 	state.result_map = calloc(sizeof(uint32_t), e);
29703c8ba1cSjoerg 
2984edfbdbbSjoerg 	if (state.holes64k == NULL || state.holes64 == NULL ||
2994edfbdbbSjoerg 	    state.g == NULL || state.visited == NULL ||
3004edfbdbbSjoerg 	    state.result_map == NULL)
30103c8ba1cSjoerg 		err(1, "malloc failed");
30203c8ba1cSjoerg 
3037c219dd6Sjoerg 	if (SIZED2(_hash)(nbperf, &state.graph))
30403c8ba1cSjoerg 		goto failed;
3057c219dd6Sjoerg 	if (SIZED2(_output_order)(&state.graph))
30603c8ba1cSjoerg 		goto failed;
30703c8ba1cSjoerg 	assign_nodes(&state);
30803c8ba1cSjoerg 	print_hash(nbperf, &state);
30903c8ba1cSjoerg 
31003c8ba1cSjoerg 	retval = 0;
31103c8ba1cSjoerg 
31203c8ba1cSjoerg failed:
3137c219dd6Sjoerg 	SIZED2(_free)(&state.graph);
31403c8ba1cSjoerg 	free(state.visited);
31503c8ba1cSjoerg 	free(state.g);
31603c8ba1cSjoerg 	free(state.holes64k);
3174edfbdbbSjoerg 	free(state.holes64);
31803c8ba1cSjoerg 	free(state.result_map);
31903c8ba1cSjoerg 	return retval;
32003c8ba1cSjoerg }
321