1*72254be9Sriastradh /* $NetBSD: cdbw.c,v 1.9 2023/08/08 10:34:08 riastradh Exp $ */
2b9cf7d31Sjoerg /*-
35486dd5aSalnsn * Copyright (c) 2009, 2010, 2015 The NetBSD Foundation, Inc.
4b9cf7d31Sjoerg * All rights reserved.
5b9cf7d31Sjoerg *
6b9cf7d31Sjoerg * This code is derived from software contributed to The NetBSD Foundation
75486dd5aSalnsn * by Joerg Sonnenberger and Alexander Nasonov.
8b9cf7d31Sjoerg *
9b9cf7d31Sjoerg * Redistribution and use in source and binary forms, with or without
10b9cf7d31Sjoerg * modification, are permitted provided that the following conditions
11b9cf7d31Sjoerg * are met:
12b9cf7d31Sjoerg *
13b9cf7d31Sjoerg * 1. Redistributions of source code must retain the above copyright
14b9cf7d31Sjoerg * notice, this list of conditions and the following disclaimer.
15b9cf7d31Sjoerg * 2. Redistributions in binary form must reproduce the above copyright
16b9cf7d31Sjoerg * notice, this list of conditions and the following disclaimer in
17b9cf7d31Sjoerg * the documentation and/or other materials provided with the
18b9cf7d31Sjoerg * distribution.
19b9cf7d31Sjoerg *
20b9cf7d31Sjoerg * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21b9cf7d31Sjoerg * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22b9cf7d31Sjoerg * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23b9cf7d31Sjoerg * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24b9cf7d31Sjoerg * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25b9cf7d31Sjoerg * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26b9cf7d31Sjoerg * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27b9cf7d31Sjoerg * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28b9cf7d31Sjoerg * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29b9cf7d31Sjoerg * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30b9cf7d31Sjoerg * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31b9cf7d31Sjoerg * SUCH DAMAGE.
32b9cf7d31Sjoerg */
33b9cf7d31Sjoerg
34b9cf7d31Sjoerg #if HAVE_NBTOOL_CONFIG_H
35b9cf7d31Sjoerg #include "nbtool_config.h"
36b9cf7d31Sjoerg #endif
37b9cf7d31Sjoerg
38b9cf7d31Sjoerg #include <sys/cdefs.h>
39*72254be9Sriastradh __RCSID("$NetBSD: cdbw.c,v 1.9 2023/08/08 10:34:08 riastradh Exp $");
40b9cf7d31Sjoerg
41b9cf7d31Sjoerg #include "namespace.h"
42b9cf7d31Sjoerg
43f7ef3610Sjoerg #if !HAVE_NBTOOL_CONFIG_H || HAVE_SYS_ENDIAN_H
44b9cf7d31Sjoerg #include <sys/endian.h>
45f7ef3610Sjoerg #endif
46b9cf7d31Sjoerg #include <sys/queue.h>
47b9cf7d31Sjoerg #include <cdbw.h>
48b9cf7d31Sjoerg #include <stdlib.h>
49b9cf7d31Sjoerg #include <string.h>
50b9cf7d31Sjoerg #include <unistd.h>
51b9cf7d31Sjoerg
52bb682c39Sjoerg #if !HAVE_NBTOOL_CONFIG_H
53bb682c39Sjoerg #include <sys/bitops.h>
54bb682c39Sjoerg #else
55bb682c39Sjoerg static inline int
my_fls32(uint32_t n)56bb682c39Sjoerg my_fls32(uint32_t n)
57bb682c39Sjoerg {
58bb682c39Sjoerg int v;
59bb682c39Sjoerg
60bb682c39Sjoerg if (!n)
61bb682c39Sjoerg return 0;
62bb682c39Sjoerg
63bb682c39Sjoerg v = 32;
64bb682c39Sjoerg if ((n & 0xFFFF0000U) == 0) {
65bb682c39Sjoerg n <<= 16;
66bb682c39Sjoerg v -= 16;
67bb682c39Sjoerg }
68bb682c39Sjoerg if ((n & 0xFF000000U) == 0) {
69bb682c39Sjoerg n <<= 8;
70bb682c39Sjoerg v -= 8;
71bb682c39Sjoerg }
72bb682c39Sjoerg if ((n & 0xF0000000U) == 0) {
73bb682c39Sjoerg n <<= 4;
74bb682c39Sjoerg v -= 4;
75bb682c39Sjoerg }
76bb682c39Sjoerg if ((n & 0xC0000000U) == 0) {
77bb682c39Sjoerg n <<= 2;
78bb682c39Sjoerg v -= 2;
79bb682c39Sjoerg }
80bb682c39Sjoerg if ((n & 0x80000000U) == 0) {
81bb682c39Sjoerg n <<= 1;
82bb682c39Sjoerg v -= 1;
83bb682c39Sjoerg }
84bb682c39Sjoerg return v;
85bb682c39Sjoerg }
86bb682c39Sjoerg
87bb682c39Sjoerg static inline void
fast_divide32_prepare(uint32_t div,uint32_t * m,uint8_t * s1,uint8_t * s2)88bb682c39Sjoerg fast_divide32_prepare(uint32_t div, uint32_t * m,
89bb682c39Sjoerg uint8_t *s1, uint8_t *s2)
90bb682c39Sjoerg {
91bb682c39Sjoerg uint64_t mt;
92bb682c39Sjoerg int l;
93bb682c39Sjoerg
94bb682c39Sjoerg l = my_fls32(div - 1);
95bb682c39Sjoerg mt = (uint64_t)(0x100000000ULL * ((1ULL << l) - div));
96bb682c39Sjoerg *m = (uint32_t)(mt / div + 1);
97bb682c39Sjoerg *s1 = (l > 1) ? 1U : (uint8_t)l;
98bb682c39Sjoerg *s2 = (l == 0) ? 0 : (uint8_t)(l - 1);
99bb682c39Sjoerg }
100bb682c39Sjoerg
101bb682c39Sjoerg static inline uint32_t
fast_divide32(uint32_t v,uint32_t div,uint32_t m,uint8_t s1,uint8_t s2)102bb682c39Sjoerg fast_divide32(uint32_t v, uint32_t div, uint32_t m, uint8_t s1,
103bb682c39Sjoerg uint8_t s2)
104bb682c39Sjoerg {
105bb682c39Sjoerg uint32_t t;
106bb682c39Sjoerg
107bb682c39Sjoerg t = (uint32_t)(((uint64_t)v * m) >> 32);
108bb682c39Sjoerg return (t + ((v - t) >> s1)) >> s2;
109bb682c39Sjoerg }
110bb682c39Sjoerg
111bb682c39Sjoerg static inline uint32_t
fast_remainder32(uint32_t v,uint32_t div,uint32_t m,uint8_t s1,uint8_t s2)112bb682c39Sjoerg fast_remainder32(uint32_t v, uint32_t div, uint32_t m, uint8_t s1,
113bb682c39Sjoerg uint8_t s2)
114bb682c39Sjoerg {
115bb682c39Sjoerg
116bb682c39Sjoerg return v - div * fast_divide32(v, div, m, s1, s2);
117bb682c39Sjoerg }
118bb682c39Sjoerg #endif
119bb682c39Sjoerg
120b9cf7d31Sjoerg #ifdef __weak_alias
121b9cf7d31Sjoerg __weak_alias(cdbw_close,_cdbw_close)
122b9cf7d31Sjoerg __weak_alias(cdbw_open,_cdbw_open)
123b9cf7d31Sjoerg __weak_alias(cdbw_output,_cdbw_output)
124b9cf7d31Sjoerg __weak_alias(cdbw_put,_cdbw_put)
125b9cf7d31Sjoerg __weak_alias(cdbw_put_data,_cdbw_put_data)
126b9cf7d31Sjoerg __weak_alias(cdbw_put_key,_cdbw_put_key)
127b9cf7d31Sjoerg #endif
128b9cf7d31Sjoerg
129b9cf7d31Sjoerg struct key_hash {
130b9cf7d31Sjoerg SLIST_ENTRY(key_hash) link;
131b9cf7d31Sjoerg uint32_t hashes[3];
132b9cf7d31Sjoerg uint32_t idx;
133b9cf7d31Sjoerg void *key;
134b9cf7d31Sjoerg size_t keylen;
135b9cf7d31Sjoerg };
136b9cf7d31Sjoerg
137b9cf7d31Sjoerg SLIST_HEAD(key_hash_head, key_hash);
138b9cf7d31Sjoerg
139b9cf7d31Sjoerg struct cdbw {
140b9cf7d31Sjoerg size_t data_counter;
141b9cf7d31Sjoerg size_t data_allocated;
142b9cf7d31Sjoerg size_t data_size;
143b9cf7d31Sjoerg size_t *data_len;
144b9cf7d31Sjoerg void **data_ptr;
145b9cf7d31Sjoerg
146b9cf7d31Sjoerg size_t hash_size;
147b9cf7d31Sjoerg struct key_hash_head *hash;
148b9cf7d31Sjoerg size_t key_counter;
149b9cf7d31Sjoerg };
150b9cf7d31Sjoerg
151b9cf7d31Sjoerg /* Max. data counter that allows the index size to be 32bit. */
152b9cf7d31Sjoerg static const uint32_t max_data_counter = 0xccccccccU;
153b9cf7d31Sjoerg
154b9cf7d31Sjoerg struct cdbw *
cdbw_open(void)155b9cf7d31Sjoerg cdbw_open(void)
156b9cf7d31Sjoerg {
157b9cf7d31Sjoerg struct cdbw *cdbw;
158b9cf7d31Sjoerg size_t i;
159b9cf7d31Sjoerg
160b9cf7d31Sjoerg cdbw = calloc(sizeof(*cdbw), 1);
161b9cf7d31Sjoerg if (cdbw == NULL)
162b9cf7d31Sjoerg return NULL;
163b9cf7d31Sjoerg
164b9cf7d31Sjoerg cdbw->hash_size = 1024;
165b9cf7d31Sjoerg cdbw->hash = calloc(cdbw->hash_size, sizeof(*cdbw->hash));
166b9cf7d31Sjoerg if (cdbw->hash == NULL) {
167b9cf7d31Sjoerg free(cdbw);
168b9cf7d31Sjoerg return NULL;
169b9cf7d31Sjoerg }
170b9cf7d31Sjoerg
171b9cf7d31Sjoerg for (i = 0; i < cdbw->hash_size; ++i)
172b9cf7d31Sjoerg SLIST_INIT(cdbw->hash + i);
173b9cf7d31Sjoerg
174b9cf7d31Sjoerg return cdbw;
175b9cf7d31Sjoerg }
176b9cf7d31Sjoerg
177b9cf7d31Sjoerg int
cdbw_put(struct cdbw * cdbw,const void * key,size_t keylen,const void * data,size_t datalen)178b9cf7d31Sjoerg cdbw_put(struct cdbw *cdbw, const void *key, size_t keylen,
179b9cf7d31Sjoerg const void *data, size_t datalen)
180b9cf7d31Sjoerg {
181b9cf7d31Sjoerg uint32_t idx;
182b9cf7d31Sjoerg int rv;
183b9cf7d31Sjoerg
184b9cf7d31Sjoerg rv = cdbw_put_data(cdbw, data, datalen, &idx);
185b9cf7d31Sjoerg if (rv)
186b9cf7d31Sjoerg return rv;
187b9cf7d31Sjoerg rv = cdbw_put_key(cdbw, key, keylen, idx);
188b9cf7d31Sjoerg if (rv) {
189b9cf7d31Sjoerg --cdbw->data_counter;
190b9cf7d31Sjoerg free(cdbw->data_ptr[cdbw->data_counter]);
191b9cf7d31Sjoerg cdbw->data_size -= datalen;
192b9cf7d31Sjoerg return rv;
193b9cf7d31Sjoerg }
194b9cf7d31Sjoerg return 0;
195b9cf7d31Sjoerg }
196b9cf7d31Sjoerg
197b9cf7d31Sjoerg int
cdbw_put_data(struct cdbw * cdbw,const void * data,size_t datalen,uint32_t * idx)198b9cf7d31Sjoerg cdbw_put_data(struct cdbw *cdbw, const void *data, size_t datalen,
199b9cf7d31Sjoerg uint32_t *idx)
200b9cf7d31Sjoerg {
201b9cf7d31Sjoerg
202b9cf7d31Sjoerg if (cdbw->data_counter == max_data_counter)
203b9cf7d31Sjoerg return -1;
204b9cf7d31Sjoerg
205b9cf7d31Sjoerg if (cdbw->data_size + datalen < cdbw->data_size ||
206b9cf7d31Sjoerg cdbw->data_size + datalen > 0xffffffffU)
207b9cf7d31Sjoerg return -1; /* Overflow */
208b9cf7d31Sjoerg
209b9cf7d31Sjoerg if (cdbw->data_allocated == cdbw->data_counter) {
210b9cf7d31Sjoerg void **new_data_ptr;
211b9cf7d31Sjoerg size_t *new_data_len;
212b9cf7d31Sjoerg size_t new_allocated;
213b9cf7d31Sjoerg
214b9cf7d31Sjoerg if (cdbw->data_allocated == 0)
215b9cf7d31Sjoerg new_allocated = 256;
216b9cf7d31Sjoerg else
217b9cf7d31Sjoerg new_allocated = cdbw->data_allocated * 2;
218b9cf7d31Sjoerg
219b9cf7d31Sjoerg new_data_ptr = realloc(cdbw->data_ptr,
220b9cf7d31Sjoerg sizeof(*cdbw->data_ptr) * new_allocated);
221b9cf7d31Sjoerg if (new_data_ptr == NULL)
222b9cf7d31Sjoerg return -1;
223b9cf7d31Sjoerg cdbw->data_ptr = new_data_ptr;
224b9cf7d31Sjoerg
225b9cf7d31Sjoerg new_data_len = realloc(cdbw->data_len,
226b9cf7d31Sjoerg sizeof(*cdbw->data_len) * new_allocated);
227b9cf7d31Sjoerg if (new_data_len == NULL)
228b9cf7d31Sjoerg return -1;
229b9cf7d31Sjoerg cdbw->data_len = new_data_len;
230b9cf7d31Sjoerg
231b9cf7d31Sjoerg cdbw->data_allocated = new_allocated;
232b9cf7d31Sjoerg }
233b9cf7d31Sjoerg
234b9cf7d31Sjoerg cdbw->data_ptr[cdbw->data_counter] = malloc(datalen);
235b9cf7d31Sjoerg if (cdbw->data_ptr[cdbw->data_counter] == NULL)
236b9cf7d31Sjoerg return -1;
237b9cf7d31Sjoerg memcpy(cdbw->data_ptr[cdbw->data_counter], data, datalen);
238b9cf7d31Sjoerg cdbw->data_len[cdbw->data_counter] = datalen;
239b9cf7d31Sjoerg cdbw->data_size += datalen;
24051d5b8a1Sjoerg *idx = cdbw->data_counter++;
241b9cf7d31Sjoerg return 0;
242b9cf7d31Sjoerg }
243b9cf7d31Sjoerg
244b9cf7d31Sjoerg int
cdbw_put_key(struct cdbw * cdbw,const void * key,size_t keylen,uint32_t idx)245b9cf7d31Sjoerg cdbw_put_key(struct cdbw *cdbw, const void *key, size_t keylen, uint32_t idx)
246b9cf7d31Sjoerg {
247b9cf7d31Sjoerg uint32_t hashes[3];
248b9cf7d31Sjoerg struct key_hash_head *head, *head2, *new_head;
249b9cf7d31Sjoerg struct key_hash *key_hash;
250b9cf7d31Sjoerg size_t new_hash_size, i;
251b9cf7d31Sjoerg
252b9cf7d31Sjoerg if (idx >= cdbw->data_counter ||
253b9cf7d31Sjoerg cdbw->key_counter == max_data_counter)
254b9cf7d31Sjoerg return -1;
255b9cf7d31Sjoerg
256b9cf7d31Sjoerg mi_vector_hash(key, keylen, 0, hashes);
257b9cf7d31Sjoerg
258b9cf7d31Sjoerg head = cdbw->hash + (hashes[0] & (cdbw->hash_size - 1));
259b9cf7d31Sjoerg SLIST_FOREACH(key_hash, head, link) {
260b9cf7d31Sjoerg if (key_hash->keylen != keylen)
261b9cf7d31Sjoerg continue;
262b9cf7d31Sjoerg if (key_hash->hashes[0] != hashes[0])
263b9cf7d31Sjoerg continue;
264b9cf7d31Sjoerg if (key_hash->hashes[1] != hashes[1])
265b9cf7d31Sjoerg continue;
266b9cf7d31Sjoerg if (key_hash->hashes[2] != hashes[2])
267b9cf7d31Sjoerg continue;
268b9cf7d31Sjoerg if (memcmp(key, key_hash->key, keylen))
269b9cf7d31Sjoerg continue;
270b9cf7d31Sjoerg return -1;
271b9cf7d31Sjoerg }
272b9cf7d31Sjoerg key_hash = malloc(sizeof(*key_hash));
273b9cf7d31Sjoerg if (key_hash == NULL)
274b9cf7d31Sjoerg return -1;
275b9cf7d31Sjoerg key_hash->key = malloc(keylen);
276b9cf7d31Sjoerg if (key_hash->key == NULL) {
277b9cf7d31Sjoerg free(key_hash);
278b9cf7d31Sjoerg return -1;
279b9cf7d31Sjoerg }
280b9cf7d31Sjoerg memcpy(key_hash->key, key, keylen);
281b9cf7d31Sjoerg key_hash->hashes[0] = hashes[0];
282b9cf7d31Sjoerg key_hash->hashes[1] = hashes[1];
283b9cf7d31Sjoerg key_hash->hashes[2] = hashes[2];
284b9cf7d31Sjoerg key_hash->keylen = keylen;
285b9cf7d31Sjoerg key_hash->idx = idx;
286b9cf7d31Sjoerg SLIST_INSERT_HEAD(head, key_hash, link);
287b9cf7d31Sjoerg ++cdbw->key_counter;
288b9cf7d31Sjoerg
289b9cf7d31Sjoerg if (cdbw->key_counter <= cdbw->hash_size)
290b9cf7d31Sjoerg return 0;
291b9cf7d31Sjoerg
292b9cf7d31Sjoerg /* Try to resize the hash table, but ignore errors. */
293b9cf7d31Sjoerg new_hash_size = cdbw->hash_size * 2;
294b9cf7d31Sjoerg new_head = calloc(sizeof(*new_head), new_hash_size);
295b9cf7d31Sjoerg if (new_head == NULL)
296b9cf7d31Sjoerg return 0;
297b9cf7d31Sjoerg
298b9cf7d31Sjoerg head = &cdbw->hash[hashes[0] & (cdbw->hash_size - 1)];
299b9cf7d31Sjoerg for (i = 0; i < new_hash_size; ++i)
300b9cf7d31Sjoerg SLIST_INIT(new_head + i);
301b9cf7d31Sjoerg
302b9cf7d31Sjoerg for (i = 0; i < cdbw->hash_size; ++i) {
303b9cf7d31Sjoerg head = cdbw->hash + i;
304b9cf7d31Sjoerg
305b9cf7d31Sjoerg while ((key_hash = SLIST_FIRST(head)) != NULL) {
306b9cf7d31Sjoerg SLIST_REMOVE_HEAD(head, link);
307b9cf7d31Sjoerg head2 = new_head +
308b9cf7d31Sjoerg (key_hash->hashes[0] & (new_hash_size - 1));
309b9cf7d31Sjoerg SLIST_INSERT_HEAD(head2, key_hash, link);
310b9cf7d31Sjoerg }
311b9cf7d31Sjoerg }
312b9cf7d31Sjoerg free(cdbw->hash);
313b9cf7d31Sjoerg cdbw->hash_size = new_hash_size;
314b9cf7d31Sjoerg cdbw->hash = new_head;
315b9cf7d31Sjoerg
316b9cf7d31Sjoerg return 0;
317b9cf7d31Sjoerg }
318b9cf7d31Sjoerg
319b9cf7d31Sjoerg void
cdbw_close(struct cdbw * cdbw)320b9cf7d31Sjoerg cdbw_close(struct cdbw *cdbw)
321b9cf7d31Sjoerg {
322b9cf7d31Sjoerg struct key_hash_head *head;
323b9cf7d31Sjoerg struct key_hash *key_hash;
324b9cf7d31Sjoerg size_t i;
325b9cf7d31Sjoerg
326b9cf7d31Sjoerg for (i = 0; i < cdbw->hash_size; ++i) {
327b9cf7d31Sjoerg head = cdbw->hash + i;
328b9cf7d31Sjoerg while ((key_hash = SLIST_FIRST(head)) != NULL) {
329b9cf7d31Sjoerg SLIST_REMOVE_HEAD(head, link);
330b9cf7d31Sjoerg free(key_hash->key);
331b9cf7d31Sjoerg free(key_hash);
332b9cf7d31Sjoerg }
333b9cf7d31Sjoerg }
334b9cf7d31Sjoerg
335b9cf7d31Sjoerg for (i = 0; i < cdbw->data_counter; ++i)
336b9cf7d31Sjoerg free(cdbw->data_ptr[i]);
337b9cf7d31Sjoerg free(cdbw->data_ptr);
338b9cf7d31Sjoerg free(cdbw->data_len);
339b9cf7d31Sjoerg free(cdbw->hash);
340b9cf7d31Sjoerg free(cdbw);
341b9cf7d31Sjoerg }
342b9cf7d31Sjoerg
343f7ef3610Sjoerg uint32_t
cdbw_stable_seeder(void)344f7ef3610Sjoerg cdbw_stable_seeder(void)
345f7ef3610Sjoerg {
346f7ef3610Sjoerg return 0;
347f7ef3610Sjoerg }
348f7ef3610Sjoerg
3495486dd5aSalnsn /*
350bb682c39Sjoerg * For each vertex in the 3-graph, the incidence lists needs to be kept.
351bb682c39Sjoerg * Avoid storing the full list by just XORing the indices of the still
352bb682c39Sjoerg * incident edges and remember the number of such edges as that's all
353bb682c39Sjoerg * the peeling computation needs. This is inspired by:
3545486dd5aSalnsn * Cache-Oblivious Peeling of Random Hypergraphs by Djamal Belazzougui,
3555486dd5aSalnsn * Paolo Boldi, Giuseppe Ottaviano, Rossano Venturini, and Sebastiano
356bb682c39Sjoerg * Vigna. https://arxiv.org/abs/1312.0526
357bb682c39Sjoerg *
358bb682c39Sjoerg * Unlike in the paper, we don't care about external storage and have
359bb682c39Sjoerg * the edge list at hand all the time. As such, no ordering is necessary
360bb682c39Sjoerg * and the vertices of the edge don't have to be copied.
361bb682c39Sjoerg *
362bb682c39Sjoerg * The core observation of the paper above is that for a degree of one,
363bb682c39Sjoerg * the incident edge can be obtained directly.
3645486dd5aSalnsn */
365bb682c39Sjoerg struct vertex {
366bb682c39Sjoerg uint32_t degree;
367bb682c39Sjoerg uint32_t edges;
368b9cf7d31Sjoerg };
369b9cf7d31Sjoerg
370b9cf7d31Sjoerg struct edge {
371bb682c39Sjoerg uint32_t vertices[3];
372b9cf7d31Sjoerg uint32_t idx;
373b9cf7d31Sjoerg };
374b9cf7d31Sjoerg
375b9cf7d31Sjoerg struct state {
376b9cf7d31Sjoerg uint32_t data_entries;
377b9cf7d31Sjoerg uint32_t entries;
378b9cf7d31Sjoerg uint32_t keys;
379b9cf7d31Sjoerg uint32_t seed;
380b9cf7d31Sjoerg
381b9cf7d31Sjoerg uint32_t *g;
382b9cf7d31Sjoerg char *visited;
383b9cf7d31Sjoerg
384bb682c39Sjoerg struct vertex *vertices;
385b9cf7d31Sjoerg struct edge *edges;
386b9cf7d31Sjoerg uint32_t output_index;
387b9cf7d31Sjoerg uint32_t *output_order;
388b9cf7d31Sjoerg };
389b9cf7d31Sjoerg
3905486dd5aSalnsn /*
391bb682c39Sjoerg * Add (delta == 1) or remove (delta == -1) the edge e
392bb682c39Sjoerg * from the incidence lists.
3935486dd5aSalnsn */
3945486dd5aSalnsn static inline void
change_edge(struct state * state,int delta,uint32_t e)395bb682c39Sjoerg change_edge(struct state *state, int delta, uint32_t e)
396b9cf7d31Sjoerg {
397bb682c39Sjoerg int i;
398bb682c39Sjoerg struct vertex *v;
399bb682c39Sjoerg struct edge *e_ = &state->edges[e];
400b9cf7d31Sjoerg
401bb682c39Sjoerg for (i = 0; i < 3; ++i) {
402bb682c39Sjoerg v = &state->vertices[e_->vertices[i]];
403bb682c39Sjoerg v->edges ^= e;
404bb682c39Sjoerg v->degree += delta;
405bb682c39Sjoerg }
406b9cf7d31Sjoerg }
407b9cf7d31Sjoerg
4085486dd5aSalnsn static inline void
remove_vertex(struct state * state,uint32_t v)409bb682c39Sjoerg remove_vertex(struct state *state, uint32_t v)
4105486dd5aSalnsn {
411bb682c39Sjoerg struct vertex *v_ = &state->vertices[v];
412bb682c39Sjoerg uint32_t e;
413b9cf7d31Sjoerg
414bb682c39Sjoerg if (v_->degree == 1) {
415bb682c39Sjoerg e = v_->edges;
4165486dd5aSalnsn state->output_order[--state->output_index] = e;
417bb682c39Sjoerg change_edge(state, -1, e);
4185486dd5aSalnsn }
419b9cf7d31Sjoerg }
420b9cf7d31Sjoerg
421b9cf7d31Sjoerg static int
build_graph(struct cdbw * cdbw,struct state * state)422b9cf7d31Sjoerg build_graph(struct cdbw *cdbw, struct state *state)
423b9cf7d31Sjoerg {
424b9cf7d31Sjoerg struct key_hash_head *head;
425b9cf7d31Sjoerg struct key_hash *key_hash;
426b9cf7d31Sjoerg struct edge *e;
427bb682c39Sjoerg uint32_t entries_m;
428bb682c39Sjoerg uint8_t entries_s1, entries_s2;
42951d5b8a1Sjoerg uint32_t hashes[3];
43051d5b8a1Sjoerg size_t i;
431bb682c39Sjoerg int j;
432b9cf7d31Sjoerg
433bb682c39Sjoerg memset(state->vertices, 0, sizeof(*state->vertices) * state->entries);
4345486dd5aSalnsn
435b9cf7d31Sjoerg e = state->edges;
436bb682c39Sjoerg fast_divide32_prepare(state->entries, &entries_m, &entries_s1,
437bb682c39Sjoerg &entries_s2);
438bb682c39Sjoerg
439b9cf7d31Sjoerg for (i = 0; i < cdbw->hash_size; ++i) {
440b9cf7d31Sjoerg head = &cdbw->hash[i];
441b9cf7d31Sjoerg SLIST_FOREACH(key_hash, head, link) {
442b9cf7d31Sjoerg mi_vector_hash(key_hash->key, key_hash->keylen,
443b9cf7d31Sjoerg state->seed, hashes);
444b9cf7d31Sjoerg
445bb682c39Sjoerg for (j = 0; j < 3; ++j) {
446bb682c39Sjoerg e->vertices[j] = fast_remainder32(hashes[j],
447bb682c39Sjoerg state->entries, entries_m, entries_s1,
448bb682c39Sjoerg entries_s2);
449bb682c39Sjoerg }
450ca946cd6Sjoerg
451bb682c39Sjoerg if (e->vertices[0] == e->vertices[1])
452bb682c39Sjoerg return -1;
453bb682c39Sjoerg if (e->vertices[0] == e->vertices[2])
454bb682c39Sjoerg return -1;
455bb682c39Sjoerg if (e->vertices[1] == e->vertices[2])
456bb682c39Sjoerg return -1;
457bb682c39Sjoerg e->idx = key_hash->idx;
458b9cf7d31Sjoerg ++e;
459b9cf7d31Sjoerg }
460b9cf7d31Sjoerg }
461b9cf7d31Sjoerg
462bb682c39Sjoerg /*
463bb682c39Sjoerg * Do the edge processing separately as there is a good chance
464bb682c39Sjoerg * the degraded edge case above will happen; this avoid
465bb682c39Sjoerg *unnecessary work.
466bb682c39Sjoerg */
467bb682c39Sjoerg for (i = 0; i < state->keys; ++i)
468bb682c39Sjoerg change_edge(state, 1, i);
469bb682c39Sjoerg
470b9cf7d31Sjoerg state->output_index = state->keys;
471b9cf7d31Sjoerg for (i = 0; i < state->entries; ++i)
4725486dd5aSalnsn remove_vertex(state, i);
473b9cf7d31Sjoerg
474b9cf7d31Sjoerg i = state->keys;
475b9cf7d31Sjoerg while (i > 0 && i > state->output_index) {
476b9cf7d31Sjoerg --i;
477b9cf7d31Sjoerg e = state->edges + state->output_order[i];
478bb682c39Sjoerg for (j = 0; j < 3; ++j)
479bb682c39Sjoerg remove_vertex(state, e->vertices[j]);
480b9cf7d31Sjoerg }
481b9cf7d31Sjoerg
482b9cf7d31Sjoerg return state->output_index == 0 ? 0 : -1;
483b9cf7d31Sjoerg }
484b9cf7d31Sjoerg
485b9cf7d31Sjoerg static void
assign_nodes(struct state * state)486b9cf7d31Sjoerg assign_nodes(struct state *state)
487b9cf7d31Sjoerg {
488b9cf7d31Sjoerg struct edge *e;
489b9cf7d31Sjoerg size_t i;
490b9cf7d31Sjoerg
491bb682c39Sjoerg uint32_t v0, v1, v2, entries_m;
492bb682c39Sjoerg uint8_t entries_s1, entries_s2;
493bb682c39Sjoerg
494bb682c39Sjoerg fast_divide32_prepare(state->data_entries, &entries_m, &entries_s1,
495bb682c39Sjoerg &entries_s2);
496bb682c39Sjoerg
497b9cf7d31Sjoerg for (i = 0; i < state->keys; ++i) {
498b9cf7d31Sjoerg e = state->edges + state->output_order[i];
499bb682c39Sjoerg if (!state->visited[e->vertices[0]]) {
500bb682c39Sjoerg v0 = e->vertices[0];
501bb682c39Sjoerg v1 = e->vertices[1];
502bb682c39Sjoerg v2 = e->vertices[2];
503bb682c39Sjoerg } else if (!state->visited[e->vertices[1]]) {
504bb682c39Sjoerg v0 = e->vertices[1];
505bb682c39Sjoerg v1 = e->vertices[0];
506bb682c39Sjoerg v2 = e->vertices[2];
507b9cf7d31Sjoerg } else {
508bb682c39Sjoerg v0 = e->vertices[2];
509bb682c39Sjoerg v1 = e->vertices[0];
510bb682c39Sjoerg v2 = e->vertices[1];
511b9cf7d31Sjoerg }
512bb682c39Sjoerg state->g[v0] =
513bb682c39Sjoerg fast_remainder32((2 * state->data_entries + e->idx
514bb682c39Sjoerg - state->g[v1] - state->g[v2]),
515bb682c39Sjoerg state->data_entries, entries_m,
516bb682c39Sjoerg entries_s1, entries_s2);
517bb682c39Sjoerg state->visited[v0] = 1;
518bb682c39Sjoerg state->visited[v1] = 1;
519bb682c39Sjoerg state->visited[v2] = 1;
520b9cf7d31Sjoerg }
521b9cf7d31Sjoerg }
522b9cf7d31Sjoerg
523b9cf7d31Sjoerg static size_t
compute_size(uint32_t size)524b9cf7d31Sjoerg compute_size(uint32_t size)
525b9cf7d31Sjoerg {
526b9cf7d31Sjoerg if (size < 0x100)
527b9cf7d31Sjoerg return 1;
528b9cf7d31Sjoerg else if (size < 0x10000)
529b9cf7d31Sjoerg return 2;
530b9cf7d31Sjoerg else
531b9cf7d31Sjoerg return 4;
532b9cf7d31Sjoerg }
533b9cf7d31Sjoerg
534b9cf7d31Sjoerg #define COND_FLUSH_BUFFER(n) do { \
535b9cf7d31Sjoerg if (__predict_false(cur_pos + (n) >= sizeof(buf))) { \
536b9cf7d31Sjoerg ret = write(fd, buf, cur_pos); \
537b9cf7d31Sjoerg if (ret == -1 || (size_t)ret != cur_pos) \
538b9cf7d31Sjoerg return -1; \
539b9cf7d31Sjoerg cur_pos = 0; \
540b9cf7d31Sjoerg } \
541388550b0Srillig } while (0)
542b9cf7d31Sjoerg
543b9cf7d31Sjoerg static int
print_hash(struct cdbw * cdbw,struct state * state,int fd,const char * descr)544b9cf7d31Sjoerg print_hash(struct cdbw *cdbw, struct state *state, int fd, const char *descr)
545b9cf7d31Sjoerg {
546b9cf7d31Sjoerg uint32_t data_size;
547b9cf7d31Sjoerg uint8_t buf[90000];
548b9cf7d31Sjoerg size_t i, size, size2, cur_pos;
549b9cf7d31Sjoerg ssize_t ret;
550b9cf7d31Sjoerg
551b9cf7d31Sjoerg memcpy(buf, "NBCDB\n\0", 7);
552b9cf7d31Sjoerg buf[7] = 1;
553b9cf7d31Sjoerg strncpy((char *)buf + 8, descr, 16);
55451d5b8a1Sjoerg le32enc(buf + 24, cdbw->data_size);
55551d5b8a1Sjoerg le32enc(buf + 28, cdbw->data_counter);
556b9cf7d31Sjoerg le32enc(buf + 32, state->entries);
557b9cf7d31Sjoerg le32enc(buf + 36, state->seed);
558b9cf7d31Sjoerg cur_pos = 40;
559b9cf7d31Sjoerg
560b9cf7d31Sjoerg size = compute_size(state->entries);
561b9cf7d31Sjoerg for (i = 0; i < state->entries; ++i) {
562b9cf7d31Sjoerg COND_FLUSH_BUFFER(4);
563b9cf7d31Sjoerg le32enc(buf + cur_pos, state->g[i]);
564b9cf7d31Sjoerg cur_pos += size;
565b9cf7d31Sjoerg }
56651d5b8a1Sjoerg size2 = compute_size(cdbw->data_size);
567b9cf7d31Sjoerg size = size * state->entries % size2;
568b9cf7d31Sjoerg if (size != 0) {
569b9cf7d31Sjoerg size = size2 - size;
570b9cf7d31Sjoerg COND_FLUSH_BUFFER(4);
571b9cf7d31Sjoerg le32enc(buf + cur_pos, 0);
572b9cf7d31Sjoerg cur_pos += size;
573b9cf7d31Sjoerg }
574b9cf7d31Sjoerg for (data_size = 0, i = 0; i < cdbw->data_counter; ++i) {
575b9cf7d31Sjoerg COND_FLUSH_BUFFER(4);
576b9cf7d31Sjoerg le32enc(buf + cur_pos, data_size);
577b9cf7d31Sjoerg cur_pos += size2;
57851d5b8a1Sjoerg data_size += cdbw->data_len[i];
579b9cf7d31Sjoerg }
580b9cf7d31Sjoerg COND_FLUSH_BUFFER(4);
581b9cf7d31Sjoerg le32enc(buf + cur_pos, data_size);
582b9cf7d31Sjoerg cur_pos += size2;
583b9cf7d31Sjoerg
584b9cf7d31Sjoerg for (i = 0; i < cdbw->data_counter; ++i) {
585b9cf7d31Sjoerg COND_FLUSH_BUFFER(cdbw->data_len[i]);
586b9cf7d31Sjoerg if (cdbw->data_len[i] < sizeof(buf)) {
587b9cf7d31Sjoerg memcpy(buf + cur_pos, cdbw->data_ptr[i],
588b9cf7d31Sjoerg cdbw->data_len[i]);
589b9cf7d31Sjoerg cur_pos += cdbw->data_len[i];
590b9cf7d31Sjoerg } else {
591b9cf7d31Sjoerg ret = write(fd, cdbw->data_ptr[i], cdbw->data_len[i]);
592b9cf7d31Sjoerg if (ret == -1 || (size_t)ret != cdbw->data_len[i])
593b9cf7d31Sjoerg return -1;
594b9cf7d31Sjoerg }
595b9cf7d31Sjoerg }
596b9cf7d31Sjoerg if (cur_pos != 0) {
597b9cf7d31Sjoerg ret = write(fd, buf, cur_pos);
598b9cf7d31Sjoerg if (ret == -1 || (size_t)ret != cur_pos)
599b9cf7d31Sjoerg return -1;
600b9cf7d31Sjoerg }
601b9cf7d31Sjoerg return 0;
602b9cf7d31Sjoerg }
603b9cf7d31Sjoerg
604b9cf7d31Sjoerg int
cdbw_output(struct cdbw * cdbw,int fd,const char * descr,uint32_t (* seedgen)(void))605*72254be9Sriastradh cdbw_output(struct cdbw *cdbw, int fd, const char *descr,
606b9cf7d31Sjoerg uint32_t (*seedgen)(void))
607b9cf7d31Sjoerg {
608b9cf7d31Sjoerg struct state state;
609b9cf7d31Sjoerg int rv;
610b9cf7d31Sjoerg
611b9cf7d31Sjoerg if (cdbw->data_counter == 0 || cdbw->key_counter == 0) {
612b9cf7d31Sjoerg state.entries = 0;
613b9cf7d31Sjoerg state.seed = 0;
614b9cf7d31Sjoerg print_hash(cdbw, &state, fd, descr);
615b9cf7d31Sjoerg return 0;
616b9cf7d31Sjoerg }
617b9cf7d31Sjoerg
618f7ef3610Sjoerg #if HAVE_NBTOOL_CONFIG_H
619f7ef3610Sjoerg if (seedgen == NULL)
620f7ef3610Sjoerg seedgen = cdbw_stable_seeder;
621f7ef3610Sjoerg #else
622b9cf7d31Sjoerg if (seedgen == NULL)
623b9cf7d31Sjoerg seedgen = arc4random;
624f7ef3610Sjoerg #endif
625b9cf7d31Sjoerg
626b9cf7d31Sjoerg rv = 0;
627b9cf7d31Sjoerg
62851d5b8a1Sjoerg state.keys = cdbw->key_counter;
62951d5b8a1Sjoerg state.data_entries = cdbw->data_counter;
630b9cf7d31Sjoerg state.entries = state.keys + (state.keys + 3) / 4;
631b9cf7d31Sjoerg if (state.entries < 10)
632b9cf7d31Sjoerg state.entries = 10;
633b9cf7d31Sjoerg
634b9cf7d31Sjoerg #define NALLOC(var, n) var = calloc(sizeof(*var), n)
635b9cf7d31Sjoerg NALLOC(state.g, state.entries);
636b9cf7d31Sjoerg NALLOC(state.visited, state.entries);
637bb682c39Sjoerg NALLOC(state.vertices, state.entries);
6385486dd5aSalnsn NALLOC(state.edges, state.keys);
639b9cf7d31Sjoerg NALLOC(state.output_order, state.keys);
640b9cf7d31Sjoerg #undef NALLOC
641b9cf7d31Sjoerg
642bb682c39Sjoerg if (state.g == NULL || state.visited == NULL || state.edges == NULL ||
643bb682c39Sjoerg state.vertices == NULL || state.output_order == NULL) {
644b9cf7d31Sjoerg rv = -1;
645b9cf7d31Sjoerg goto release;
646b9cf7d31Sjoerg }
647b9cf7d31Sjoerg
648f7ef3610Sjoerg state.seed = 0;
649b9cf7d31Sjoerg do {
650f7ef3610Sjoerg if (seedgen == cdbw_stable_seeder)
651f7ef3610Sjoerg ++state.seed;
652f7ef3610Sjoerg else
653b9cf7d31Sjoerg state.seed = (*seedgen)();
654b9cf7d31Sjoerg } while (build_graph(cdbw, &state));
655b9cf7d31Sjoerg
656b9cf7d31Sjoerg assign_nodes(&state);
657b9cf7d31Sjoerg rv = print_hash(cdbw, &state, fd, descr);
658b9cf7d31Sjoerg
659b9cf7d31Sjoerg release:
660b9cf7d31Sjoerg free(state.g);
661b9cf7d31Sjoerg free(state.visited);
662bb682c39Sjoerg free(state.vertices);
663b9cf7d31Sjoerg free(state.edges);
664b9cf7d31Sjoerg free(state.output_order);
665b9cf7d31Sjoerg
666b9cf7d31Sjoerg return rv;
667b9cf7d31Sjoerg }
668