1 /* $NetBSD: nbperf-chm.c,v 1.2 2009/08/24 17:12:46 joerg Exp $ */ 2 /*- 3 * Copyright (c) 2009 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Joerg Sonnenberger. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __RCSID("$NetBSD: nbperf-chm.c,v 1.2 2009/08/24 17:12:46 joerg Exp $"); 36 37 #include <err.h> 38 #include <inttypes.h> 39 #include <stdlib.h> 40 #include <stdio.h> 41 #include <string.h> 42 43 #include "nbperf.h" 44 45 #ifdef BUILD_CHM3 46 #include "graph3.h" 47 #else 48 #include "graph2.h" 49 #endif 50 51 /* 52 * A full description of the algorithm can be found in: 53 * "An optimal algorithm for generating minimal perfect hash functions" 54 * by Czech, Havas and Majewski in Information Processing Letters, 55 * 43(5):256-264, October 1992. 56 */ 57 58 /* 59 * The algorithm is based on random, acyclic graphs. 60 * 61 * Each edge in the represents a key. The vertices are the reminder of 62 * the hash function mod n. n = cm with c > 2, otherwise the propability 63 * of finding an acyclic graph is very low (for 2-graphs). The constant 64 * for 3-graphs is 1.24. 65 * 66 * After the hashing phase, the graph is checked for cycles. 67 * A cycle-free graph is either empty or has a vertex of degree 1. 68 * Removing the edge for this vertex doesn't change this property, 69 * so applying this recursively reduces the size of the graph. 70 * If the graph is empty at the end of the process, it was acyclic. 71 * 72 * The assignment step now sets g[i] := 0 and processes the edges 73 * in reverse order of removal. That ensures that at least one vertex 74 * is always unvisited and can be assigned. 75 */ 76 77 struct state { 78 #ifdef BUILD_CHM3 79 struct graph3 graph; 80 #else 81 struct graph2 graph; 82 #endif 83 uint32_t *g; 84 uint8_t *visited; 85 }; 86 87 static void 88 assign_nodes(struct state *state) 89 { 90 #ifdef BUILD_CHM3 91 struct edge3 *e; 92 #else 93 struct edge2 *e; 94 #endif 95 size_t i; 96 uint32_t e_idx; 97 98 for (i = 0; i < state->graph.e; ++i) { 99 e_idx = state->graph.output_order[i]; 100 e = &state->graph.edges[e_idx]; 101 102 #ifdef BUILD_CHM3 103 if (!state->visited[e->left]) { 104 state->g[e->left] = (2 * state->graph.e + e_idx 105 - state->g[e->middle] - state->g[e->right]) 106 % state->graph.e; 107 } else if (!state->visited[e->middle]) { 108 state->g[e->middle] = (2 * state->graph.e + e_idx 109 - state->g[e->left] - state->g[e->right]) 110 % state->graph.e; 111 } else { 112 state->g[e->right] = (2 * state->graph.e + e_idx 113 - state->g[e->left] - state->g[e->middle]) 114 % state->graph.e; 115 } 116 state->visited[e->left] = 1; 117 state->visited[e->middle] = 1; 118 state->visited[e->right] = 1; 119 #else 120 if (!state->visited[e->left]) { 121 state->g[e->left] = (state->graph.e + e_idx 122 - state->g[e->right]) % state->graph.e; 123 } else { 124 state->g[e->right] = (state->graph.e + e_idx 125 - state->g[e->left]) % state->graph.e; 126 } 127 state->visited[e->left] = 1; 128 state->visited[e->right] = 1; 129 #endif 130 } 131 } 132 133 static void 134 print_hash(struct nbperf *nbperf, struct state *state) 135 { 136 uint32_t i, per_line; 137 const char *g_type; 138 int g_width; 139 140 fprintf(nbperf->output, "#include <stdlib.h>\n\n"); 141 142 fprintf(nbperf->output, "%suint32_t\n", 143 nbperf->static_hash ? "static " : ""); 144 fprintf(nbperf->output, 145 "%s(const void * __restrict key, size_t keylen)\n", 146 nbperf->hash_name); 147 fprintf(nbperf->output, "{\n"); 148 if (state->graph.v >= 65536) { 149 g_type = "uint32_t"; 150 g_width = 8; 151 per_line = 4; 152 } else if (state->graph.v >= 256) { 153 g_type = "uint16_t"; 154 g_width = 4; 155 per_line = 8; 156 } else { 157 g_type = "uint8_t"; 158 g_width = 2; 159 per_line = 10; 160 } 161 fprintf(nbperf->output, "\tstatic const %s g[%" PRId32 "] = {\n", 162 g_type, state->graph.v); 163 for (i = 0; i < state->graph.v; ++i) { 164 fprintf(nbperf->output, "%s0x%0*" PRIx32 ",%s", 165 (i % per_line == 0 ? "\t " : " "), 166 g_width, state->g[i], 167 (i % per_line == per_line - 1 ? "\n" : "")); 168 } 169 if (i % per_line != 0) 170 fprintf(nbperf->output, "\n\t};\n"); 171 else 172 fprintf(nbperf->output, "\t};\n"); 173 fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size); 174 (*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h"); 175 #ifdef BUILD_CHM3 176 fprintf(nbperf->output, "\treturn (g[h[0] %% %" PRIu32 "] + " 177 "g[h[1] %% %" PRIu32 "] + " 178 "g[h[2] %% %" PRIu32"]) %% %" PRIu32 ";\n", 179 state->graph.v, state->graph.v, state->graph.v, state->graph.e); 180 #else 181 fprintf(nbperf->output, "\treturn (g[h[0] %% %" PRIu32 "] + " 182 "g[h[1] %% %" PRIu32"]) %% %" PRIu32 ";\n", 183 state->graph.v, state->graph.v, state->graph.e); 184 #endif 185 fprintf(nbperf->output, "}\n"); 186 187 if (nbperf->map_output != NULL) { 188 for (i = 0; i < state->graph.e; ++i) 189 fprintf(nbperf->map_output, "%" PRIu32 "\n", i); 190 } 191 } 192 193 int 194 #ifdef BUILD_CHM3 195 chm3_compute(struct nbperf *nbperf) 196 #else 197 chm_compute(struct nbperf *nbperf) 198 #endif 199 { 200 struct state state; 201 int retval = -1; 202 uint32_t v, e; 203 204 #ifdef BUILD_CHM3 205 if (nbperf->c == 0) 206 nbperf-> c = 1.24; 207 208 if (nbperf->c < 1.24) 209 errx(1, "The argument for option -c must be at least 1.24"); 210 211 if (nbperf->hash_size < 3) 212 errx(1, "The hash function must generate at least 3 values"); 213 #else 214 if (nbperf->c == 0) 215 nbperf-> c = 2; 216 217 if (nbperf->c < 2) 218 errx(1, "The argument for option -c must be at least 2"); 219 220 if (nbperf->hash_size < 2) 221 errx(1, "The hash function must generate at least 2 values"); 222 #endif 223 224 (*nbperf->seed_hash)(nbperf); 225 e = nbperf->n; 226 v = nbperf->c * nbperf->n; 227 #ifdef BUILD_CHM3 228 if (v == 1.24 * nbperf->n) 229 ++v; 230 if (v < 10) 231 v = 10; 232 #else 233 if (v == 2 * nbperf->n) 234 ++v; 235 #endif 236 237 state.g = calloc(sizeof(uint32_t), v); 238 state.visited = calloc(sizeof(uint8_t), v); 239 if (state.g == NULL || state.visited == NULL) 240 err(1, "malloc failed"); 241 242 #ifdef BUILD_CHM3 243 graph3_setup(&state.graph, v, e); 244 if (graph3_hash(nbperf, &state.graph)) 245 goto failed; 246 if (graph3_output_order(&state.graph)) 247 goto failed; 248 #else 249 graph2_setup(&state.graph, v, e); 250 if (graph2_hash(nbperf, &state.graph)) 251 goto failed; 252 if (graph2_output_order(&state.graph)) 253 goto failed; 254 #endif 255 assign_nodes(&state); 256 print_hash(nbperf, &state); 257 258 retval = 0; 259 260 failed: 261 #ifdef BUILD_CHM3 262 graph3_free(&state.graph); 263 #else 264 graph2_free(&state.graph); 265 #endif 266 free(state.g); 267 free(state.visited); 268 return retval; 269 } 270