1*c2a1106fSandvar /* $NetBSD: nbperf-bdz.c,v 1.12 2023/07/31 21:07:50 andvar Exp $ */
203c8ba1cSjoerg /*-
34edfbdbbSjoerg * Copyright (c) 2009, 2012 The NetBSD Foundation, Inc.
403c8ba1cSjoerg * All rights reserved.
503c8ba1cSjoerg *
603c8ba1cSjoerg * This code is derived from software contributed to The NetBSD Foundation
703c8ba1cSjoerg * by Joerg Sonnenberger.
803c8ba1cSjoerg *
903c8ba1cSjoerg * Redistribution and use in source and binary forms, with or without
1003c8ba1cSjoerg * modification, are permitted provided that the following conditions
1103c8ba1cSjoerg * are met:
1203c8ba1cSjoerg *
1303c8ba1cSjoerg * 1. Redistributions of source code must retain the above copyright
1403c8ba1cSjoerg * notice, this list of conditions and the following disclaimer.
1503c8ba1cSjoerg * 2. Redistributions in binary form must reproduce the above copyright
1603c8ba1cSjoerg * notice, this list of conditions and the following disclaimer in
1703c8ba1cSjoerg * the documentation and/or other materials provided with the
1803c8ba1cSjoerg * distribution.
1903c8ba1cSjoerg *
2003c8ba1cSjoerg * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2103c8ba1cSjoerg * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2203c8ba1cSjoerg * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
2303c8ba1cSjoerg * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
2403c8ba1cSjoerg * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2503c8ba1cSjoerg * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
2603c8ba1cSjoerg * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2703c8ba1cSjoerg * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2803c8ba1cSjoerg * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
2903c8ba1cSjoerg * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
3003c8ba1cSjoerg * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3103c8ba1cSjoerg * SUCH DAMAGE.
3203c8ba1cSjoerg */
3303c8ba1cSjoerg
3410769988Sjoerg #if HAVE_NBTOOL_CONFIG_H
3510769988Sjoerg #include "nbtool_config.h"
3610769988Sjoerg #endif
3710769988Sjoerg
3803c8ba1cSjoerg #include <sys/cdefs.h>
39*c2a1106fSandvar __RCSID("$NetBSD: nbperf-bdz.c,v 1.12 2023/07/31 21:07:50 andvar Exp $");
4003c8ba1cSjoerg
4103c8ba1cSjoerg #include <err.h>
4203c8ba1cSjoerg #include <inttypes.h>
4303c8ba1cSjoerg #include <stdlib.h>
4403c8ba1cSjoerg #include <stdio.h>
4503c8ba1cSjoerg #include <string.h>
4603c8ba1cSjoerg
4703c8ba1cSjoerg #include "nbperf.h"
4803c8ba1cSjoerg
4903c8ba1cSjoerg /*
5003c8ba1cSjoerg * A full description of the algorithm can be found in:
5103c8ba1cSjoerg * "Simple and Space-Efficient Minimal Perfect Hash Functions"
52*c2a1106fSandvar * by Botelho, Pagh and Ziviani, proceedings of WADS 2007.
5303c8ba1cSjoerg */
5403c8ba1cSjoerg
5503c8ba1cSjoerg /*
5603c8ba1cSjoerg * The algorithm is based on random, acyclic 3-graphs.
5703c8ba1cSjoerg *
5803c8ba1cSjoerg * Each edge in the represents a key. The vertices are the reminder of
5903c8ba1cSjoerg * the hash function mod n. n = cm with c > 1.23. This ensures that
6097b3b051Sjoerg * an acyclic graph can be found with a very high probality.
6103c8ba1cSjoerg *
6203c8ba1cSjoerg * An acyclic graph has an edge order, where at least one vertex of
6303c8ba1cSjoerg * each edge hasn't been seen before. It is declares the first unvisited
6403c8ba1cSjoerg * vertex as authoritive for the edge and assigns a 2bit value to unvisited
6503c8ba1cSjoerg * vertices, so that the sum of all vertices of the edge modulo 4 is
6603c8ba1cSjoerg * the index of the authoritive vertex.
6703c8ba1cSjoerg */
6803c8ba1cSjoerg
697c219dd6Sjoerg #define GRAPH_SIZE 3
707c219dd6Sjoerg #include "graph2.h"
7103c8ba1cSjoerg
7203c8ba1cSjoerg struct state {
737c219dd6Sjoerg struct SIZED(graph) graph;
7403c8ba1cSjoerg uint32_t *visited;
7503c8ba1cSjoerg uint32_t *holes64k;
764edfbdbbSjoerg uint16_t *holes64;
7703c8ba1cSjoerg uint8_t *g;
7803c8ba1cSjoerg uint32_t *result_map;
7903c8ba1cSjoerg };
8003c8ba1cSjoerg
8103c8ba1cSjoerg static void
assign_nodes(struct state * state)8203c8ba1cSjoerg assign_nodes(struct state *state)
8303c8ba1cSjoerg {
847c219dd6Sjoerg struct SIZED(edge) *e;
8503c8ba1cSjoerg size_t i, j;
8603c8ba1cSjoerg uint32_t t, r, holes;
8703c8ba1cSjoerg
8803c8ba1cSjoerg for (i = 0; i < state->graph.v; ++i)
8903c8ba1cSjoerg state->g[i] = 3;
9003c8ba1cSjoerg
9103c8ba1cSjoerg for (i = 0; i < state->graph.e; ++i) {
9203c8ba1cSjoerg j = state->graph.output_order[i];
9303c8ba1cSjoerg e = &state->graph.edges[j];
947c219dd6Sjoerg if (!state->visited[e->vertices[0]]) {
9503c8ba1cSjoerg r = 0;
967c219dd6Sjoerg t = e->vertices[0];
977c219dd6Sjoerg } else if (!state->visited[e->vertices[1]]) {
9803c8ba1cSjoerg r = 1;
997c219dd6Sjoerg t = e->vertices[1];
10003c8ba1cSjoerg } else {
1017c219dd6Sjoerg if (state->visited[e->vertices[2]])
10203c8ba1cSjoerg abort();
10303c8ba1cSjoerg r = 2;
1047c219dd6Sjoerg t = e->vertices[2];
10503c8ba1cSjoerg }
10603c8ba1cSjoerg
10703c8ba1cSjoerg state->visited[t] = 2 + j;
1087c219dd6Sjoerg if (state->visited[e->vertices[0]] == 0)
1097c219dd6Sjoerg state->visited[e->vertices[0]] = 1;
1107c219dd6Sjoerg if (state->visited[e->vertices[1]] == 0)
1117c219dd6Sjoerg state->visited[e->vertices[1]] = 1;
1127c219dd6Sjoerg if (state->visited[e->vertices[2]] == 0)
1137c219dd6Sjoerg state->visited[e->vertices[2]] = 1;
11403c8ba1cSjoerg
1157c219dd6Sjoerg state->g[t] = (9 + r - state->g[e->vertices[0]] - state->g[e->vertices[1]]
1167c219dd6Sjoerg - state->g[e->vertices[2]]) % 3;
11703c8ba1cSjoerg }
11803c8ba1cSjoerg
11903c8ba1cSjoerg holes = 0;
12003c8ba1cSjoerg for (i = 0; i < state->graph.v; ++i) {
12103c8ba1cSjoerg if (i % 65536 == 0)
12203c8ba1cSjoerg state->holes64k[i >> 16] = holes;
12303c8ba1cSjoerg
1244edfbdbbSjoerg if (i % 64 == 0)
1254edfbdbbSjoerg state->holes64[i >> 6] = holes - state->holes64k[i >> 16];
12603c8ba1cSjoerg
12703c8ba1cSjoerg if (state->visited[i] > 1) {
12803c8ba1cSjoerg j = state->visited[i] - 2;
12903c8ba1cSjoerg state->result_map[j] = i - holes;
13003c8ba1cSjoerg }
13103c8ba1cSjoerg
13203c8ba1cSjoerg if (state->g[i] == 3)
13303c8ba1cSjoerg ++holes;
13403c8ba1cSjoerg }
13503c8ba1cSjoerg }
13603c8ba1cSjoerg
13703c8ba1cSjoerg static void
print_hash(struct nbperf * nbperf,struct state * state)13803c8ba1cSjoerg print_hash(struct nbperf *nbperf, struct state *state)
13903c8ba1cSjoerg {
1404edfbdbbSjoerg uint64_t sum;
1414edfbdbbSjoerg size_t i;
14203c8ba1cSjoerg
14303c8ba1cSjoerg fprintf(nbperf->output, "#include <stdlib.h>\n");
14403c8ba1cSjoerg fprintf(nbperf->output, "#include <strings.h>\n\n");
14503c8ba1cSjoerg
14603c8ba1cSjoerg fprintf(nbperf->output, "%suint32_t\n",
14703c8ba1cSjoerg nbperf->static_hash ? "static " : "");
14803c8ba1cSjoerg fprintf(nbperf->output,
14903c8ba1cSjoerg "%s(const void * __restrict key, size_t keylen)\n",
15003c8ba1cSjoerg nbperf->hash_name);
15103c8ba1cSjoerg fprintf(nbperf->output, "{\n");
15203c8ba1cSjoerg
1534edfbdbbSjoerg fprintf(nbperf->output,
1544edfbdbbSjoerg "\tstatic const uint64_t g1[%" PRId32 "] = {\n",
1554edfbdbbSjoerg (state->graph.v + 63) / 64);
1564edfbdbbSjoerg sum = 0;
1574edfbdbbSjoerg for (i = 0; i < state->graph.v; ++i) {
1584edfbdbbSjoerg sum |= ((uint64_t)state->g[i] & 1) << (i & 63);
1594edfbdbbSjoerg if (i % 64 == 63) {
1604edfbdbbSjoerg fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s",
1614edfbdbbSjoerg (i / 64 % 2 == 0 ? "\t " : " "),
16203c8ba1cSjoerg sum,
1634edfbdbbSjoerg (i / 64 % 2 == 1 ? "\n" : ""));
1644edfbdbbSjoerg sum = 0;
16503c8ba1cSjoerg }
1664edfbdbbSjoerg }
1674edfbdbbSjoerg if (i % 64 != 0) {
1684edfbdbbSjoerg fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s",
1694edfbdbbSjoerg (i / 64 % 2 == 0 ? "\t " : " "),
1704edfbdbbSjoerg sum,
1714edfbdbbSjoerg (i / 64 % 2 == 1 ? "\n" : ""));
1724edfbdbbSjoerg }
1734edfbdbbSjoerg fprintf(nbperf->output, "%s\t};\n", (i % 2 ? "\n" : ""));
1744edfbdbbSjoerg
1754edfbdbbSjoerg fprintf(nbperf->output,
1764edfbdbbSjoerg "\tstatic const uint64_t g2[%" PRId32 "] = {\n",
1774edfbdbbSjoerg (state->graph.v + 63) / 64);
1784edfbdbbSjoerg sum = 0;
1794edfbdbbSjoerg for (i = 0; i < state->graph.v; ++i) {
1804edfbdbbSjoerg sum |= (((uint64_t)state->g[i] & 2) >> 1) << (i & 63);
1814edfbdbbSjoerg if (i % 64 == 63) {
1824edfbdbbSjoerg fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s",
1834edfbdbbSjoerg (i / 64 % 2 == 0 ? "\t " : " "),
1844edfbdbbSjoerg sum,
1854edfbdbbSjoerg (i / 64 % 2 == 1 ? "\n" : ""));
1864edfbdbbSjoerg sum = 0;
1874edfbdbbSjoerg }
1884edfbdbbSjoerg }
1894edfbdbbSjoerg if (i % 64 != 0) {
1904edfbdbbSjoerg fprintf(nbperf->output, "%s0x%016" PRIx64 "ULL,%s",
1914edfbdbbSjoerg (i / 64 % 2 == 0 ? "\t " : " "),
1924edfbdbbSjoerg sum,
1934edfbdbbSjoerg (i / 64 % 2 == 1 ? "\n" : ""));
1944edfbdbbSjoerg }
1954edfbdbbSjoerg fprintf(nbperf->output, "%s\t};\n", (i % 2 ? "\n" : ""));
19603c8ba1cSjoerg
19703c8ba1cSjoerg fprintf(nbperf->output,
19803c8ba1cSjoerg "\tstatic const uint32_t holes64k[%" PRId32 "] = {\n",
19903c8ba1cSjoerg (state->graph.v + 65535) / 65536);
20003c8ba1cSjoerg for (i = 0; i < state->graph.v; i += 65536)
20103c8ba1cSjoerg fprintf(nbperf->output, "%s0x%08" PRIx32 ",%s",
20203c8ba1cSjoerg (i / 65536 % 4 == 0 ? "\t " : " "),
20303c8ba1cSjoerg state->holes64k[i >> 16],
20403c8ba1cSjoerg (i / 65536 % 4 == 3 ? "\n" : ""));
20503c8ba1cSjoerg fprintf(nbperf->output, "%s\t};\n", (i / 65536 % 4 ? "\n" : ""));
20603c8ba1cSjoerg
20703c8ba1cSjoerg fprintf(nbperf->output,
2084edfbdbbSjoerg "\tstatic const uint16_t holes64[%" PRId32 "] = {\n",
2094edfbdbbSjoerg (state->graph.v + 63) / 64);
2104edfbdbbSjoerg for (i = 0; i < state->graph.v; i += 64)
21103c8ba1cSjoerg fprintf(nbperf->output, "%s0x%04" PRIx32 ",%s",
2124edfbdbbSjoerg (i / 64 % 4 == 0 ? "\t " : " "),
2134edfbdbbSjoerg state->holes64[i >> 6],
2144edfbdbbSjoerg (i / 64 % 4 == 3 ? "\n" : ""));
2154edfbdbbSjoerg fprintf(nbperf->output, "%s\t};\n", (i / 64 % 4 ? "\n" : ""));
21603c8ba1cSjoerg
2174edfbdbbSjoerg fprintf(nbperf->output, "\tuint64_t m;\n");
2184edfbdbbSjoerg fprintf(nbperf->output, "\tuint32_t idx, i, idx2;\n");
21903c8ba1cSjoerg fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size);
22003c8ba1cSjoerg
22103c8ba1cSjoerg (*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h");
22203c8ba1cSjoerg
2234edfbdbbSjoerg fprintf(nbperf->output, "\n\th[0] = h[0] %% %" PRIu32 ";\n",
2244edfbdbbSjoerg state->graph.v);
2254edfbdbbSjoerg fprintf(nbperf->output, "\th[1] = h[1] %% %" PRIu32 ";\n",
2264edfbdbbSjoerg state->graph.v);
2274edfbdbbSjoerg fprintf(nbperf->output, "\th[2] = h[2] %% %" PRIu32 ";\n",
2284edfbdbbSjoerg state->graph.v);
22903c8ba1cSjoerg
2307c219dd6Sjoerg if (state->graph.hash_fudge & 1)
2317c219dd6Sjoerg fprintf(nbperf->output, "\th[1] ^= (h[0] == h[1]);\n");
2327c219dd6Sjoerg
2337c219dd6Sjoerg if (state->graph.hash_fudge & 2) {
2347c219dd6Sjoerg fprintf(nbperf->output,
2357c219dd6Sjoerg "\th[2] ^= (h[0] == h[2] || h[1] == h[2]);\n");
2367c219dd6Sjoerg fprintf(nbperf->output,
2377c219dd6Sjoerg "\th[2] ^= 2 * (h[0] == h[2] || h[1] == h[2]);\n");
2387c219dd6Sjoerg }
2397c219dd6Sjoerg
24003c8ba1cSjoerg fprintf(nbperf->output,
241ab86636aSjoerg "\tidx = 9 + ((g1[h[0] >> 6] >> (h[0] & 63)) &1)\n"
242ab86636aSjoerg "\t + ((g1[h[1] >> 6] >> (h[1] & 63)) & 1)\n"
243ab86636aSjoerg "\t + ((g1[h[2] >> 6] >> (h[2] & 63)) & 1)\n"
244ab86636aSjoerg "\t - ((g2[h[0] >> 6] >> (h[0] & 63)) & 1)\n"
245ab86636aSjoerg "\t - ((g2[h[1] >> 6] >> (h[1] & 63)) & 1)\n"
246ab86636aSjoerg "\t - ((g2[h[2] >> 6] >> (h[2] & 63)) & 1);\n"
2474edfbdbbSjoerg );
24803c8ba1cSjoerg
24903c8ba1cSjoerg fprintf(nbperf->output,
2504edfbdbbSjoerg "\tidx = h[idx %% 3];\n");
25103c8ba1cSjoerg fprintf(nbperf->output,
2524edfbdbbSjoerg "\tidx2 = idx - holes64[idx >> 6] - holes64k[idx >> 16];\n"
2534edfbdbbSjoerg "\tidx2 -= popcount64(g1[idx >> 6] & g2[idx >> 6]\n"
254cc89f792Sjoerg "\t & (((uint64_t)1 << (idx & 63)) - 1));\n"
255e240adbdSjoerg "\treturn idx2;\n");
2564edfbdbbSjoerg
25703c8ba1cSjoerg fprintf(nbperf->output, "}\n");
25803c8ba1cSjoerg
25903c8ba1cSjoerg if (nbperf->map_output != NULL) {
26003c8ba1cSjoerg for (i = 0; i < state->graph.e; ++i)
26103c8ba1cSjoerg fprintf(nbperf->map_output, "%" PRIu32 "\n",
26203c8ba1cSjoerg state->result_map[i]);
26303c8ba1cSjoerg }
26403c8ba1cSjoerg }
26503c8ba1cSjoerg
26603c8ba1cSjoerg int
bpz_compute(struct nbperf * nbperf)267a81d4ca3Sjoerg bpz_compute(struct nbperf *nbperf)
26803c8ba1cSjoerg {
26903c8ba1cSjoerg struct state state;
27003c8ba1cSjoerg int retval = -1;
27103c8ba1cSjoerg uint32_t v, e;
27203c8ba1cSjoerg
27303c8ba1cSjoerg if (nbperf->c == 0)
27403c8ba1cSjoerg nbperf->c = 1.24;
27503c8ba1cSjoerg if (nbperf->c < 1.24)
27603c8ba1cSjoerg errx(1, "The argument for option -c must be at least 1.24");
27703c8ba1cSjoerg if (nbperf->hash_size < 3)
27803c8ba1cSjoerg errx(1, "The hash function must generate at least 3 values");
27903c8ba1cSjoerg
28003c8ba1cSjoerg (*nbperf->seed_hash)(nbperf);
28103c8ba1cSjoerg e = nbperf->n;
28203c8ba1cSjoerg v = nbperf->c * nbperf->n;
28303c8ba1cSjoerg if (1.24 * nbperf->n > v)
28403c8ba1cSjoerg ++v;
28503c8ba1cSjoerg if (v < 10)
28603c8ba1cSjoerg v = 10;
2877c219dd6Sjoerg if (nbperf->allow_hash_fudging)
28863eec5d6Sjoerg v = (v + 3) & ~3;
28903c8ba1cSjoerg
29003c8ba1cSjoerg graph3_setup(&state.graph, v, e);
29103c8ba1cSjoerg
2924edfbdbbSjoerg state.holes64k = calloc(sizeof(uint32_t), (v + 65535) / 65536);
2934edfbdbbSjoerg state.holes64 = calloc(sizeof(uint16_t), (v + 63) / 64 );
2944edfbdbbSjoerg state.g = calloc(sizeof(uint32_t), v | 63);
29503c8ba1cSjoerg state.visited = calloc(sizeof(uint32_t), v);
29603c8ba1cSjoerg state.result_map = calloc(sizeof(uint32_t), e);
29703c8ba1cSjoerg
2984edfbdbbSjoerg if (state.holes64k == NULL || state.holes64 == NULL ||
2994edfbdbbSjoerg state.g == NULL || state.visited == NULL ||
3004edfbdbbSjoerg state.result_map == NULL)
30103c8ba1cSjoerg err(1, "malloc failed");
30203c8ba1cSjoerg
3037c219dd6Sjoerg if (SIZED2(_hash)(nbperf, &state.graph))
30403c8ba1cSjoerg goto failed;
3057c219dd6Sjoerg if (SIZED2(_output_order)(&state.graph))
30603c8ba1cSjoerg goto failed;
30703c8ba1cSjoerg assign_nodes(&state);
30803c8ba1cSjoerg print_hash(nbperf, &state);
30903c8ba1cSjoerg
31003c8ba1cSjoerg retval = 0;
31103c8ba1cSjoerg
31203c8ba1cSjoerg failed:
3137c219dd6Sjoerg SIZED2(_free)(&state.graph);
31403c8ba1cSjoerg free(state.visited);
31503c8ba1cSjoerg free(state.g);
31603c8ba1cSjoerg free(state.holes64k);
3174edfbdbbSjoerg free(state.holes64);
31803c8ba1cSjoerg free(state.result_map);
31903c8ba1cSjoerg return retval;
32003c8ba1cSjoerg }
321