1*61d06d6bSBaptiste Daroussin /* $Id: dbm_map.c,v 1.8 2017/02/17 14:43:54 schwarze Exp $ */ 2*61d06d6bSBaptiste Daroussin /* 3*61d06d6bSBaptiste Daroussin * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> 4*61d06d6bSBaptiste Daroussin * 5*61d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any 6*61d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above 7*61d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies. 8*61d06d6bSBaptiste Daroussin * 9*61d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10*61d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11*61d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12*61d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13*61d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14*61d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15*61d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16*61d06d6bSBaptiste Daroussin * 17*61d06d6bSBaptiste Daroussin * Low-level routines for the map-based version 18*61d06d6bSBaptiste Daroussin * of the mandoc database, for read-only access. 19*61d06d6bSBaptiste Daroussin * The interface is defined in "dbm_map.h". 20*61d06d6bSBaptiste Daroussin */ 21*61d06d6bSBaptiste Daroussin #include "config.h" 22*61d06d6bSBaptiste Daroussin 23*61d06d6bSBaptiste Daroussin #include <sys/mman.h> 24*61d06d6bSBaptiste Daroussin #include <sys/stat.h> 25*61d06d6bSBaptiste Daroussin #include <sys/types.h> 26*61d06d6bSBaptiste Daroussin 27*61d06d6bSBaptiste Daroussin #if HAVE_ENDIAN 28*61d06d6bSBaptiste Daroussin #include <endian.h> 29*61d06d6bSBaptiste Daroussin #elif HAVE_SYS_ENDIAN 30*61d06d6bSBaptiste Daroussin #include <sys/endian.h> 31*61d06d6bSBaptiste Daroussin #elif HAVE_NTOHL 32*61d06d6bSBaptiste Daroussin #include <arpa/inet.h> 33*61d06d6bSBaptiste Daroussin #endif 34*61d06d6bSBaptiste Daroussin #if HAVE_ERR 35*61d06d6bSBaptiste Daroussin #include <err.h> 36*61d06d6bSBaptiste Daroussin #endif 37*61d06d6bSBaptiste Daroussin #include <errno.h> 38*61d06d6bSBaptiste Daroussin #include <fcntl.h> 39*61d06d6bSBaptiste Daroussin #include <regex.h> 40*61d06d6bSBaptiste Daroussin #include <stdint.h> 41*61d06d6bSBaptiste Daroussin #include <stdlib.h> 42*61d06d6bSBaptiste Daroussin #include <string.h> 43*61d06d6bSBaptiste Daroussin #include <unistd.h> 44*61d06d6bSBaptiste Daroussin 45*61d06d6bSBaptiste Daroussin #include "mansearch.h" 46*61d06d6bSBaptiste Daroussin #include "dbm_map.h" 47*61d06d6bSBaptiste Daroussin #include "dbm.h" 48*61d06d6bSBaptiste Daroussin 49*61d06d6bSBaptiste Daroussin static struct stat st; 50*61d06d6bSBaptiste Daroussin static char *dbm_base; 51*61d06d6bSBaptiste Daroussin static int ifd; 52*61d06d6bSBaptiste Daroussin static int32_t max_offset; 53*61d06d6bSBaptiste Daroussin 54*61d06d6bSBaptiste Daroussin /* 55*61d06d6bSBaptiste Daroussin * Open a disk-based database for read-only access. 56*61d06d6bSBaptiste Daroussin * Validate the file format as far as it is not mandoc-specific. 57*61d06d6bSBaptiste Daroussin * Return 0 on success. Return -1 and set errno on failure. 58*61d06d6bSBaptiste Daroussin */ 59*61d06d6bSBaptiste Daroussin int 60*61d06d6bSBaptiste Daroussin dbm_map(const char *fname) 61*61d06d6bSBaptiste Daroussin { 62*61d06d6bSBaptiste Daroussin int save_errno; 63*61d06d6bSBaptiste Daroussin const int32_t *magic; 64*61d06d6bSBaptiste Daroussin 65*61d06d6bSBaptiste Daroussin if ((ifd = open(fname, O_RDONLY)) == -1) 66*61d06d6bSBaptiste Daroussin return -1; 67*61d06d6bSBaptiste Daroussin if (fstat(ifd, &st) == -1) 68*61d06d6bSBaptiste Daroussin goto fail; 69*61d06d6bSBaptiste Daroussin if (st.st_size < 5) { 70*61d06d6bSBaptiste Daroussin warnx("dbm_map(%s): File too short", fname); 71*61d06d6bSBaptiste Daroussin errno = EFTYPE; 72*61d06d6bSBaptiste Daroussin goto fail; 73*61d06d6bSBaptiste Daroussin } 74*61d06d6bSBaptiste Daroussin if (st.st_size > INT32_MAX) { 75*61d06d6bSBaptiste Daroussin errno = EFBIG; 76*61d06d6bSBaptiste Daroussin goto fail; 77*61d06d6bSBaptiste Daroussin } 78*61d06d6bSBaptiste Daroussin if ((dbm_base = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, 79*61d06d6bSBaptiste Daroussin ifd, 0)) == MAP_FAILED) 80*61d06d6bSBaptiste Daroussin goto fail; 81*61d06d6bSBaptiste Daroussin magic = dbm_getint(0); 82*61d06d6bSBaptiste Daroussin if (be32toh(*magic) != MANDOCDB_MAGIC) { 83*61d06d6bSBaptiste Daroussin if (strncmp(dbm_base, "SQLite format 3", 15)) 84*61d06d6bSBaptiste Daroussin warnx("dbm_map(%s): " 85*61d06d6bSBaptiste Daroussin "Bad initial magic %x (expected %x)", 86*61d06d6bSBaptiste Daroussin fname, be32toh(*magic), MANDOCDB_MAGIC); 87*61d06d6bSBaptiste Daroussin else 88*61d06d6bSBaptiste Daroussin warnx("dbm_map(%s): " 89*61d06d6bSBaptiste Daroussin "Obsolete format based on SQLite 3", 90*61d06d6bSBaptiste Daroussin fname); 91*61d06d6bSBaptiste Daroussin errno = EFTYPE; 92*61d06d6bSBaptiste Daroussin goto fail; 93*61d06d6bSBaptiste Daroussin } 94*61d06d6bSBaptiste Daroussin magic = dbm_getint(1); 95*61d06d6bSBaptiste Daroussin if (be32toh(*magic) != MANDOCDB_VERSION) { 96*61d06d6bSBaptiste Daroussin warnx("dbm_map(%s): Bad version number %d (expected %d)", 97*61d06d6bSBaptiste Daroussin fname, be32toh(*magic), MANDOCDB_VERSION); 98*61d06d6bSBaptiste Daroussin errno = EFTYPE; 99*61d06d6bSBaptiste Daroussin goto fail; 100*61d06d6bSBaptiste Daroussin } 101*61d06d6bSBaptiste Daroussin max_offset = be32toh(*dbm_getint(3)) + sizeof(int32_t); 102*61d06d6bSBaptiste Daroussin if (st.st_size != max_offset) { 103*61d06d6bSBaptiste Daroussin warnx("dbm_map(%s): Inconsistent file size %lld (expected %d)", 104*61d06d6bSBaptiste Daroussin fname, (long long)st.st_size, max_offset); 105*61d06d6bSBaptiste Daroussin errno = EFTYPE; 106*61d06d6bSBaptiste Daroussin goto fail; 107*61d06d6bSBaptiste Daroussin } 108*61d06d6bSBaptiste Daroussin if ((magic = dbm_get(*dbm_getint(3))) == NULL) { 109*61d06d6bSBaptiste Daroussin errno = EFTYPE; 110*61d06d6bSBaptiste Daroussin goto fail; 111*61d06d6bSBaptiste Daroussin } 112*61d06d6bSBaptiste Daroussin if (be32toh(*magic) != MANDOCDB_MAGIC) { 113*61d06d6bSBaptiste Daroussin warnx("dbm_map(%s): Bad final magic %x (expected %x)", 114*61d06d6bSBaptiste Daroussin fname, be32toh(*magic), MANDOCDB_MAGIC); 115*61d06d6bSBaptiste Daroussin errno = EFTYPE; 116*61d06d6bSBaptiste Daroussin goto fail; 117*61d06d6bSBaptiste Daroussin } 118*61d06d6bSBaptiste Daroussin return 0; 119*61d06d6bSBaptiste Daroussin 120*61d06d6bSBaptiste Daroussin fail: 121*61d06d6bSBaptiste Daroussin save_errno = errno; 122*61d06d6bSBaptiste Daroussin close(ifd); 123*61d06d6bSBaptiste Daroussin errno = save_errno; 124*61d06d6bSBaptiste Daroussin return -1; 125*61d06d6bSBaptiste Daroussin } 126*61d06d6bSBaptiste Daroussin 127*61d06d6bSBaptiste Daroussin void 128*61d06d6bSBaptiste Daroussin dbm_unmap(void) 129*61d06d6bSBaptiste Daroussin { 130*61d06d6bSBaptiste Daroussin if (munmap(dbm_base, st.st_size) == -1) 131*61d06d6bSBaptiste Daroussin warn("dbm_unmap: munmap"); 132*61d06d6bSBaptiste Daroussin if (close(ifd) == -1) 133*61d06d6bSBaptiste Daroussin warn("dbm_unmap: close"); 134*61d06d6bSBaptiste Daroussin dbm_base = (char *)-1; 135*61d06d6bSBaptiste Daroussin } 136*61d06d6bSBaptiste Daroussin 137*61d06d6bSBaptiste Daroussin /* 138*61d06d6bSBaptiste Daroussin * Take a raw integer as it was read from the database. 139*61d06d6bSBaptiste Daroussin * Interpret it as an offset into the database file 140*61d06d6bSBaptiste Daroussin * and return a pointer to that place in the file. 141*61d06d6bSBaptiste Daroussin */ 142*61d06d6bSBaptiste Daroussin void * 143*61d06d6bSBaptiste Daroussin dbm_get(int32_t offset) 144*61d06d6bSBaptiste Daroussin { 145*61d06d6bSBaptiste Daroussin offset = be32toh(offset); 146*61d06d6bSBaptiste Daroussin if (offset < 0) { 147*61d06d6bSBaptiste Daroussin warnx("dbm_get: Database corrupt: offset %d", offset); 148*61d06d6bSBaptiste Daroussin return NULL; 149*61d06d6bSBaptiste Daroussin } 150*61d06d6bSBaptiste Daroussin if (offset >= max_offset) { 151*61d06d6bSBaptiste Daroussin warnx("dbm_get: Database corrupt: offset %d > %d", 152*61d06d6bSBaptiste Daroussin offset, max_offset); 153*61d06d6bSBaptiste Daroussin return NULL; 154*61d06d6bSBaptiste Daroussin } 155*61d06d6bSBaptiste Daroussin return dbm_base + offset; 156*61d06d6bSBaptiste Daroussin } 157*61d06d6bSBaptiste Daroussin 158*61d06d6bSBaptiste Daroussin /* 159*61d06d6bSBaptiste Daroussin * Assume the database starts with some integers. 160*61d06d6bSBaptiste Daroussin * Assume they are numbered starting from 0, increasing. 161*61d06d6bSBaptiste Daroussin * Get a pointer to one with the number "offset". 162*61d06d6bSBaptiste Daroussin */ 163*61d06d6bSBaptiste Daroussin int32_t * 164*61d06d6bSBaptiste Daroussin dbm_getint(int32_t offset) 165*61d06d6bSBaptiste Daroussin { 166*61d06d6bSBaptiste Daroussin return (int32_t *)dbm_base + offset; 167*61d06d6bSBaptiste Daroussin } 168*61d06d6bSBaptiste Daroussin 169*61d06d6bSBaptiste Daroussin /* 170*61d06d6bSBaptiste Daroussin * The reverse of dbm_get(). 171*61d06d6bSBaptiste Daroussin * Take pointer into the database file 172*61d06d6bSBaptiste Daroussin * and convert it to the raw integer 173*61d06d6bSBaptiste Daroussin * that would be used to refer to that place in the file. 174*61d06d6bSBaptiste Daroussin */ 175*61d06d6bSBaptiste Daroussin int32_t 176*61d06d6bSBaptiste Daroussin dbm_addr(const void *p) 177*61d06d6bSBaptiste Daroussin { 178*61d06d6bSBaptiste Daroussin return htobe32((const char *)p - dbm_base); 179*61d06d6bSBaptiste Daroussin } 180*61d06d6bSBaptiste Daroussin 181*61d06d6bSBaptiste Daroussin int 182*61d06d6bSBaptiste Daroussin dbm_match(const struct dbm_match *match, const char *str) 183*61d06d6bSBaptiste Daroussin { 184*61d06d6bSBaptiste Daroussin switch (match->type) { 185*61d06d6bSBaptiste Daroussin case DBM_EXACT: 186*61d06d6bSBaptiste Daroussin return strcmp(str, match->str) == 0; 187*61d06d6bSBaptiste Daroussin case DBM_SUB: 188*61d06d6bSBaptiste Daroussin return strcasestr(str, match->str) != NULL; 189*61d06d6bSBaptiste Daroussin case DBM_REGEX: 190*61d06d6bSBaptiste Daroussin return regexec(match->re, str, 0, NULL, 0) == 0; 191*61d06d6bSBaptiste Daroussin default: 192*61d06d6bSBaptiste Daroussin abort(); 193*61d06d6bSBaptiste Daroussin } 194*61d06d6bSBaptiste Daroussin } 195