1 /* $NetBSD: nbperf-chm.c,v 1.3 2011/10/21 23:47:11 joerg Exp $ */ 2 /*- 3 * Copyright (c) 2009 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Joerg Sonnenberger. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 #if HAVE_NBTOOL_CONFIG_H 34 #include "nbtool_config.h" 35 #endif 36 37 #include <sys/cdefs.h> 38 __RCSID("$NetBSD: nbperf-chm.c,v 1.3 2011/10/21 23:47:11 joerg Exp $"); 39 40 #include <err.h> 41 #include <inttypes.h> 42 #include <stdlib.h> 43 #include <stdio.h> 44 #include <string.h> 45 46 #include "nbperf.h" 47 48 #ifdef BUILD_CHM3 49 #include "graph3.h" 50 #else 51 #include "graph2.h" 52 #endif 53 54 /* 55 * A full description of the algorithm can be found in: 56 * "An optimal algorithm for generating minimal perfect hash functions" 57 * by Czech, Havas and Majewski in Information Processing Letters, 58 * 43(5):256-264, October 1992. 59 */ 60 61 /* 62 * The algorithm is based on random, acyclic graphs. 63 * 64 * Each edge in the represents a key. The vertices are the reminder of 65 * the hash function mod n. n = cm with c > 2, otherwise the propability 66 * of finding an acyclic graph is very low (for 2-graphs). The constant 67 * for 3-graphs is 1.24. 68 * 69 * After the hashing phase, the graph is checked for cycles. 70 * A cycle-free graph is either empty or has a vertex of degree 1. 71 * Removing the edge for this vertex doesn't change this property, 72 * so applying this recursively reduces the size of the graph. 73 * If the graph is empty at the end of the process, it was acyclic. 74 * 75 * The assignment step now sets g[i] := 0 and processes the edges 76 * in reverse order of removal. That ensures that at least one vertex 77 * is always unvisited and can be assigned. 78 */ 79 80 struct state { 81 #ifdef BUILD_CHM3 82 struct graph3 graph; 83 #else 84 struct graph2 graph; 85 #endif 86 uint32_t *g; 87 uint8_t *visited; 88 }; 89 90 static void 91 assign_nodes(struct state *state) 92 { 93 #ifdef BUILD_CHM3 94 struct edge3 *e; 95 #else 96 struct edge2 *e; 97 #endif 98 size_t i; 99 uint32_t e_idx; 100 101 for (i = 0; i < state->graph.e; ++i) { 102 e_idx = state->graph.output_order[i]; 103 e = &state->graph.edges[e_idx]; 104 105 #ifdef BUILD_CHM3 106 if (!state->visited[e->left]) { 107 state->g[e->left] = (2 * state->graph.e + e_idx 108 - state->g[e->middle] - state->g[e->right]) 109 % state->graph.e; 110 } else if (!state->visited[e->middle]) { 111 state->g[e->middle] = (2 * state->graph.e + e_idx 112 - state->g[e->left] - state->g[e->right]) 113 % state->graph.e; 114 } else { 115 state->g[e->right] = (2 * state->graph.e + e_idx 116 - state->g[e->left] - state->g[e->middle]) 117 % state->graph.e; 118 } 119 state->visited[e->left] = 1; 120 state->visited[e->middle] = 1; 121 state->visited[e->right] = 1; 122 #else 123 if (!state->visited[e->left]) { 124 state->g[e->left] = (state->graph.e + e_idx 125 - state->g[e->right]) % state->graph.e; 126 } else { 127 state->g[e->right] = (state->graph.e + e_idx 128 - state->g[e->left]) % state->graph.e; 129 } 130 state->visited[e->left] = 1; 131 state->visited[e->right] = 1; 132 #endif 133 } 134 } 135 136 static void 137 print_hash(struct nbperf *nbperf, struct state *state) 138 { 139 uint32_t i, per_line; 140 const char *g_type; 141 int g_width; 142 143 fprintf(nbperf->output, "#include <stdlib.h>\n\n"); 144 145 fprintf(nbperf->output, "%suint32_t\n", 146 nbperf->static_hash ? "static " : ""); 147 fprintf(nbperf->output, 148 "%s(const void * __restrict key, size_t keylen)\n", 149 nbperf->hash_name); 150 fprintf(nbperf->output, "{\n"); 151 if (state->graph.v >= 65536) { 152 g_type = "uint32_t"; 153 g_width = 8; 154 per_line = 4; 155 } else if (state->graph.v >= 256) { 156 g_type = "uint16_t"; 157 g_width = 4; 158 per_line = 8; 159 } else { 160 g_type = "uint8_t"; 161 g_width = 2; 162 per_line = 10; 163 } 164 fprintf(nbperf->output, "\tstatic const %s g[%" PRId32 "] = {\n", 165 g_type, state->graph.v); 166 for (i = 0; i < state->graph.v; ++i) { 167 fprintf(nbperf->output, "%s0x%0*" PRIx32 ",%s", 168 (i % per_line == 0 ? "\t " : " "), 169 g_width, state->g[i], 170 (i % per_line == per_line - 1 ? "\n" : "")); 171 } 172 if (i % per_line != 0) 173 fprintf(nbperf->output, "\n\t};\n"); 174 else 175 fprintf(nbperf->output, "\t};\n"); 176 fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size); 177 (*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h"); 178 #ifdef BUILD_CHM3 179 fprintf(nbperf->output, "\treturn (g[h[0] %% %" PRIu32 "] + " 180 "g[h[1] %% %" PRIu32 "] + " 181 "g[h[2] %% %" PRIu32"]) %% %" PRIu32 ";\n", 182 state->graph.v, state->graph.v, state->graph.v, state->graph.e); 183 #else 184 fprintf(nbperf->output, "\treturn (g[h[0] %% %" PRIu32 "] + " 185 "g[h[1] %% %" PRIu32"]) %% %" PRIu32 ";\n", 186 state->graph.v, state->graph.v, state->graph.e); 187 #endif 188 fprintf(nbperf->output, "}\n"); 189 190 if (nbperf->map_output != NULL) { 191 for (i = 0; i < state->graph.e; ++i) 192 fprintf(nbperf->map_output, "%" PRIu32 "\n", i); 193 } 194 } 195 196 int 197 #ifdef BUILD_CHM3 198 chm3_compute(struct nbperf *nbperf) 199 #else 200 chm_compute(struct nbperf *nbperf) 201 #endif 202 { 203 struct state state; 204 int retval = -1; 205 uint32_t v, e; 206 207 #ifdef BUILD_CHM3 208 if (nbperf->c == 0) 209 nbperf-> c = 1.24; 210 211 if (nbperf->c < 1.24) 212 errx(1, "The argument for option -c must be at least 1.24"); 213 214 if (nbperf->hash_size < 3) 215 errx(1, "The hash function must generate at least 3 values"); 216 #else 217 if (nbperf->c == 0) 218 nbperf-> c = 2; 219 220 if (nbperf->c < 2) 221 errx(1, "The argument for option -c must be at least 2"); 222 223 if (nbperf->hash_size < 2) 224 errx(1, "The hash function must generate at least 2 values"); 225 #endif 226 227 (*nbperf->seed_hash)(nbperf); 228 e = nbperf->n; 229 v = nbperf->c * nbperf->n; 230 #ifdef BUILD_CHM3 231 if (v == 1.24 * nbperf->n) 232 ++v; 233 if (v < 10) 234 v = 10; 235 #else 236 if (v == 2 * nbperf->n) 237 ++v; 238 #endif 239 240 state.g = calloc(sizeof(uint32_t), v); 241 state.visited = calloc(sizeof(uint8_t), v); 242 if (state.g == NULL || state.visited == NULL) 243 err(1, "malloc failed"); 244 245 #ifdef BUILD_CHM3 246 graph3_setup(&state.graph, v, e); 247 if (graph3_hash(nbperf, &state.graph)) 248 goto failed; 249 if (graph3_output_order(&state.graph)) 250 goto failed; 251 #else 252 graph2_setup(&state.graph, v, e); 253 if (graph2_hash(nbperf, &state.graph)) 254 goto failed; 255 if (graph2_output_order(&state.graph)) 256 goto failed; 257 #endif 258 assign_nodes(&state); 259 print_hash(nbperf, &state); 260 261 retval = 0; 262 263 failed: 264 #ifdef BUILD_CHM3 265 graph3_free(&state.graph); 266 #else 267 graph2_free(&state.graph); 268 #endif 269 free(state.g); 270 free(state.visited); 271 return retval; 272 } 273