1 /* $OpenBSD: icdb.c,v 1.6 2016/05/30 03:06:58 guenther Exp $ */ 2 /* 3 * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <fcntl.h> 18 #include <stdint.h> 19 #include <stdio.h> 20 #include <stddef.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <unistd.h> 24 #include <icdb.h> 25 26 #include <sys/mman.h> 27 #include <sys/stat.h> 28 29 #include <siphash.h> 30 31 /* 32 * Creating a new icdb: icdb_new 33 * Opening existing icdb: icdb_open 34 * 35 * Adding new entries: icdb_add 36 * Adding entries does not update the disk or indices. 37 * 38 * Save to disk: icdb_save 39 * Update indices: icdb_rehash 40 * icdb_save will call rehash. 41 * 42 * Change an existing entry: icdb_update 43 * Changing entries does write to disk. 44 * 45 * Find an entry: icdb_lookup 46 * Looking up an entry is only defined when the indices are synced. 47 * 48 * Close and free resources: icdb_close 49 */ 50 51 /* 52 * There are two major modes of operation. 53 * 54 * Existing databases use the mmap codepath. The entire database is mapped 55 * into the address space for quick access. Individual entries may be updated, 56 * but no new entries added. 57 * 58 * New databases use malloc backed memory instead. The database may be saved 59 * with icdb_save. It should be saved to a new file to avoid corrupting any 60 * open databases in other processes. 61 */ 62 63 /* 64 * An icdb has the following format: 65 * struct icbinfo header 66 * indexes [ uint32_t * indexsize * nkeys ] 67 * entries [ entrysize * nentries ] 68 * 69 * To find an entry in the file, the user specifies which key to use. 70 * The key is hashed and looked up in the index. The index contains the 71 * position of the entry in the entries array. -1 identifies not found. 72 * Chaining is done by rehashing the hash. All keys are fixed size byte arrays. 73 */ 74 75 /* 76 * Header info for icdb. This struct is stored on disk. 77 */ 78 struct icdbinfo { 79 uint32_t magic; /* magic */ 80 uint32_t version; /* user specified version */ 81 uint32_t nentries; /* number of entries stored */ 82 uint32_t entrysize; /* size of each entry */ 83 uint32_t indexsize; /* number of entries in hash index */ 84 uint32_t nkeys; /* number of keys defined */ 85 uint32_t keysize[8]; /* size of each key */ 86 uint32_t keyoffset[8]; /* offset of each key in entry */ 87 SIPHASH_KEY siphashkey; /* random hash key */ 88 }; 89 90 /* 91 * In memory representation with auxiliary data. 92 * idxdata and entries will be written to disk after info. 93 */ 94 struct icdb { 95 struct icdbinfo *info; 96 void *idxdata[8]; 97 void *entries; 98 size_t maplen; 99 uint32_t allocated; 100 int fd; 101 }; 102 103 static const uint32_t magic = 0x1ca9d0b7; 104 105 static uint32_t 106 roundup(uint32_t num) 107 { 108 uint32_t r = 2; 109 110 while (r < num * 3 / 2) 111 r *= 2; 112 return r; 113 } 114 115 struct icdb * 116 icdb_new(uint32_t version, uint32_t nentries, uint32_t entrysize, 117 uint32_t nkeys, uint32_t *keysizes, uint32_t *keyoffsets) 118 { 119 struct icdb *db; 120 struct icdbinfo *info; 121 int i; 122 123 if (entrysize == 0 || entrysize > 1048576) 124 return NULL; 125 if (nkeys > 8) 126 return NULL; 127 128 if (!(db = calloc(1, sizeof(*db)))) 129 return NULL; 130 if (!(info = calloc(1, sizeof(*info)))) { 131 free(db); 132 return NULL; 133 } 134 db->info = info; 135 db->fd = -1; 136 info->magic = magic; 137 info->version = version; 138 if (nentries) 139 if ((db->entries = reallocarray(NULL, nentries, entrysize))) 140 db->allocated = nentries; 141 info->entrysize = entrysize; 142 info->nkeys = nkeys; 143 for (i = 0; i < nkeys; i++) { 144 info->keysize[i] = keysizes[i]; 145 info->keyoffset[i] = keyoffsets[i]; 146 } 147 return db; 148 } 149 DEF_WEAK(icdb_new); 150 151 struct icdb * 152 icdb_open(const char *name, int flags, uint32_t version) 153 { 154 struct icdb *db = NULL; 155 struct icdbinfo *info; 156 struct stat sb; 157 uint8_t *ptr = MAP_FAILED; 158 uint32_t baseoff, indexsize, idxmask, idxlen; 159 int fd, i; 160 161 if ((fd = open(name, flags | O_CLOEXEC)) == -1) 162 return NULL; 163 if (fstat(fd, &sb) != 0) 164 goto fail; 165 if (sb.st_size < sizeof(struct icdbinfo)) 166 goto fail; 167 ptr = mmap(NULL, sb.st_size, PROT_READ | 168 ((flags & O_RDWR) ? PROT_WRITE : 0), MAP_SHARED, fd, 0); 169 if (ptr == MAP_FAILED) 170 goto fail; 171 info = (struct icdbinfo *)ptr; 172 if (info->magic != magic) 173 goto fail; 174 if (info->version != version) 175 goto fail; 176 177 if (!(db = calloc(1, sizeof(*db)))) 178 goto fail; 179 db->info = info; 180 181 indexsize = info->indexsize; 182 idxmask = indexsize - 1; 183 idxlen = indexsize * sizeof(uint32_t); 184 baseoff = sizeof(*info) + idxlen * info->nkeys; 185 186 for (i = 0; i < info->nkeys; i++) 187 db->idxdata[i] = ptr + sizeof(*info) + i * idxlen; 188 db->entries = ptr + baseoff; 189 db->maplen = sb.st_size; 190 db->fd = fd; 191 return db; 192 193 fail: 194 if (ptr != MAP_FAILED) 195 munmap(ptr, sb.st_size); 196 if (fd != -1) 197 close(fd); 198 free(db); 199 return NULL; 200 } 201 DEF_WEAK(icdb_open); 202 203 int 204 icdb_get(struct icdb *db, void *entry, uint32_t idx) 205 { 206 uint32_t entrysize = db->info->entrysize; 207 208 memcpy(entry, (uint8_t *)db->entries + idx * entrysize, entrysize); 209 return 0; 210 } 211 DEF_WEAK(icdb_get); 212 213 int 214 icdb_lookup(struct icdb *db, int keynum, const void *key, void *entry, uint32_t *idxp) 215 { 216 struct icdbinfo *info = db->info; 217 uint32_t offset; 218 uint64_t hash; 219 uint32_t indexsize, idxmask, idxlen; 220 uint32_t *idxdata; 221 222 indexsize = info->indexsize; 223 idxmask = indexsize - 1; 224 idxlen = indexsize * sizeof(uint32_t); 225 226 idxdata = db->idxdata[keynum]; 227 228 hash = SipHash24(&info->siphashkey, key, info->keysize[keynum]); 229 while ((offset = idxdata[hash & idxmask]) != -1) { 230 if (icdb_get(db, entry, offset) != 0) 231 return -1; 232 if (memcmp((uint8_t *)entry + info->keyoffset[keynum], key, 233 info->keysize[keynum]) == 0) { 234 if (idxp) 235 *idxp = offset; 236 return 0; 237 } 238 hash = SipHash24(&info->siphashkey, &hash, sizeof(hash)); 239 } 240 return 1; 241 } 242 DEF_WEAK(icdb_lookup); 243 244 int 245 icdb_nentries(struct icdb *db) 246 { 247 return db->info->nentries; 248 } 249 DEF_WEAK(icdb_nentries); 250 251 const void * 252 icdb_entries(struct icdb *db) 253 { 254 return db->entries; 255 } 256 DEF_WEAK(icdb_entries); 257 258 int 259 icdb_update(struct icdb *db, const void *entry, int offset) 260 { 261 struct icdbinfo *info = db->info; 262 uint32_t entrysize = info->entrysize; 263 uint32_t baseoff; 264 uint32_t indexsize, idxmask, idxlen; 265 266 indexsize = info->indexsize; 267 idxmask = indexsize - 1; 268 idxlen = indexsize * sizeof(uint32_t); 269 baseoff = sizeof(*info) + idxlen * info->nkeys; 270 271 memcpy((uint8_t *)db->entries + offset * entrysize, 272 entry, entrysize); 273 if (db->fd != -1) 274 msync(db->entries + offset * entrysize, entrysize, MS_SYNC); 275 return 0; 276 } 277 DEF_WEAK(icdb_update); 278 279 int 280 icdb_add(struct icdb *db, const void *entry) 281 { 282 struct icdbinfo *info = db->info; 283 size_t entrysize = info->entrysize; 284 285 if (db->allocated == info->nentries) { 286 void *p; 287 size_t amt = db->allocated ? db->allocated * 2 : 63; 288 if (!(p = reallocarray(db->entries, amt, entrysize))) 289 return -1; 290 db->allocated = amt; 291 db->entries = p; 292 } 293 memcpy((uint8_t *)db->entries + info->nentries * entrysize, 294 entry, entrysize); 295 info->nentries++; 296 return 0; 297 } 298 DEF_WEAK(icdb_add); 299 300 int 301 icdb_rehash(struct icdb *db) 302 { 303 struct icdbinfo *info = db->info; 304 uint32_t entrysize = info->entrysize; 305 uint32_t indexsize, idxmask, idxlen; 306 int i, j; 307 308 indexsize = info->indexsize = roundup(info->nentries); 309 idxmask = indexsize - 1; 310 idxlen = sizeof(uint32_t) * indexsize; 311 312 arc4random_buf(&info->siphashkey, sizeof(info->siphashkey)); 313 314 for (i = 0; i < info->nkeys; i++) { 315 uint32_t *idxdata = reallocarray(db->idxdata[i], 316 indexsize, sizeof(uint32_t)); 317 if (!idxdata) 318 return -1; 319 memset(idxdata, 0xff, idxlen); 320 db->idxdata[i] = idxdata; 321 } 322 for (j = 0; j < info->nentries; j++) { 323 for (i = 0; i < info->nkeys; i++) { 324 uint32_t *idxdata = db->idxdata[i]; 325 uint64_t hash = SipHash24(&info->siphashkey, 326 (uint8_t *)db->entries + j * entrysize + 327 info->keyoffset[i], info->keysize[i]); 328 while (idxdata[hash & idxmask] != -1) 329 hash = SipHash24(&info->siphashkey, &hash, sizeof(hash)); 330 idxdata[hash & idxmask] = j; 331 } 332 } 333 return 0; 334 } 335 DEF_WEAK(icdb_rehash); 336 337 int 338 icdb_save(struct icdb *db, int fd) 339 { 340 struct icdbinfo *info = db->info; 341 uint32_t entrysize = info->entrysize; 342 uint32_t indexsize, idxlen; 343 int i; 344 345 if (icdb_rehash(db) != 0) 346 return -1; 347 348 indexsize = info->indexsize; 349 idxlen = sizeof(uint32_t) * indexsize; 350 351 if (ftruncate(fd, 0) != 0) 352 return -1; 353 if (write(fd, info, sizeof(*info)) != sizeof(*info)) 354 return -1; 355 for (i = 0; i < info->nkeys; i++) { 356 if (write(fd, db->idxdata[i], idxlen) != idxlen) 357 return -1; 358 } 359 if (write(fd, db->entries, info->nentries * entrysize) != 360 info->nentries * entrysize) 361 return -1; 362 return 0; 363 } 364 DEF_WEAK(icdb_save); 365 366 int 367 icdb_close(struct icdb *db) 368 { 369 int i; 370 371 if (db->fd == -1) { 372 for (i = 0; i < db->info->nkeys; i++) 373 free(db->idxdata[i]); 374 free(db->entries); 375 free(db->info); 376 } else { 377 munmap(db->info, db->maplen); 378 close(db->fd); 379 } 380 free(db); 381 return 0; 382 } 383 DEF_WEAK(icdb_close); 384