1*9508192eSchristos /* Id: dbm_map.c,v 1.8 2017/02/17 14:43:54 schwarze Exp */
2*9508192eSchristos /*
3*9508192eSchristos * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4*9508192eSchristos *
5*9508192eSchristos * Permission to use, copy, modify, and distribute this software for any
6*9508192eSchristos * purpose with or without fee is hereby granted, provided that the above
7*9508192eSchristos * copyright notice and this permission notice appear in all copies.
8*9508192eSchristos *
9*9508192eSchristos * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10*9508192eSchristos * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11*9508192eSchristos * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12*9508192eSchristos * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13*9508192eSchristos * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14*9508192eSchristos * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15*9508192eSchristos * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*9508192eSchristos *
17*9508192eSchristos * Low-level routines for the map-based version
18*9508192eSchristos * of the mandoc database, for read-only access.
19*9508192eSchristos * The interface is defined in "dbm_map.h".
20*9508192eSchristos */
21*9508192eSchristos #include "config.h"
22*9508192eSchristos
23*9508192eSchristos #include <sys/mman.h>
24*9508192eSchristos #include <sys/stat.h>
25*9508192eSchristos #include <sys/types.h>
26*9508192eSchristos
27*9508192eSchristos #if HAVE_ENDIAN
28*9508192eSchristos #include <endian.h>
29*9508192eSchristos #elif HAVE_SYS_ENDIAN
30*9508192eSchristos #include <sys/endian.h>
31*9508192eSchristos #elif HAVE_NTOHL
32*9508192eSchristos #include <arpa/inet.h>
33*9508192eSchristos #endif
34*9508192eSchristos #if HAVE_ERR
35*9508192eSchristos #include <err.h>
36*9508192eSchristos #endif
37*9508192eSchristos #include <errno.h>
38*9508192eSchristos #include <fcntl.h>
39*9508192eSchristos #include <regex.h>
40*9508192eSchristos #include <stdint.h>
41*9508192eSchristos #include <stdlib.h>
42*9508192eSchristos #include <string.h>
43*9508192eSchristos #include <unistd.h>
44*9508192eSchristos
45*9508192eSchristos #include "mansearch.h"
46*9508192eSchristos #include "dbm_map.h"
47*9508192eSchristos #include "dbm.h"
48*9508192eSchristos
49*9508192eSchristos static struct stat st;
50*9508192eSchristos static char *dbm_base;
51*9508192eSchristos static int ifd;
52*9508192eSchristos static int32_t max_offset;
53*9508192eSchristos
54*9508192eSchristos /*
55*9508192eSchristos * Open a disk-based database for read-only access.
56*9508192eSchristos * Validate the file format as far as it is not mandoc-specific.
57*9508192eSchristos * Return 0 on success. Return -1 and set errno on failure.
58*9508192eSchristos */
59*9508192eSchristos int
dbm_map(const char * fname)60*9508192eSchristos dbm_map(const char *fname)
61*9508192eSchristos {
62*9508192eSchristos int save_errno;
63*9508192eSchristos const int32_t *magic;
64*9508192eSchristos
65*9508192eSchristos if ((ifd = open(fname, O_RDONLY)) == -1)
66*9508192eSchristos return -1;
67*9508192eSchristos if (fstat(ifd, &st) == -1)
68*9508192eSchristos goto fail;
69*9508192eSchristos if (st.st_size < 5) {
70*9508192eSchristos warnx("dbm_map(%s): File too short", fname);
71*9508192eSchristos errno = EFTYPE;
72*9508192eSchristos goto fail;
73*9508192eSchristos }
74*9508192eSchristos if (st.st_size > INT32_MAX) {
75*9508192eSchristos errno = EFBIG;
76*9508192eSchristos goto fail;
77*9508192eSchristos }
78*9508192eSchristos if ((dbm_base = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED,
79*9508192eSchristos ifd, 0)) == MAP_FAILED)
80*9508192eSchristos goto fail;
81*9508192eSchristos magic = dbm_getint(0);
82*9508192eSchristos if (be32toh(*magic) != MANDOCDB_MAGIC) {
83*9508192eSchristos if (strncmp(dbm_base, "SQLite format 3", 15))
84*9508192eSchristos warnx("dbm_map(%s): "
85*9508192eSchristos "Bad initial magic %x (expected %x)",
86*9508192eSchristos fname, be32toh(*magic), MANDOCDB_MAGIC);
87*9508192eSchristos else
88*9508192eSchristos warnx("dbm_map(%s): "
89*9508192eSchristos "Obsolete format based on SQLite 3",
90*9508192eSchristos fname);
91*9508192eSchristos errno = EFTYPE;
92*9508192eSchristos goto fail;
93*9508192eSchristos }
94*9508192eSchristos magic = dbm_getint(1);
95*9508192eSchristos if (be32toh(*magic) != MANDOCDB_VERSION) {
96*9508192eSchristos warnx("dbm_map(%s): Bad version number %d (expected %d)",
97*9508192eSchristos fname, be32toh(*magic), MANDOCDB_VERSION);
98*9508192eSchristos errno = EFTYPE;
99*9508192eSchristos goto fail;
100*9508192eSchristos }
101*9508192eSchristos max_offset = be32toh(*dbm_getint(3)) + sizeof(int32_t);
102*9508192eSchristos if (st.st_size != max_offset) {
103*9508192eSchristos warnx("dbm_map(%s): Inconsistent file size %lld (expected %d)",
104*9508192eSchristos fname, (long long)st.st_size, max_offset);
105*9508192eSchristos errno = EFTYPE;
106*9508192eSchristos goto fail;
107*9508192eSchristos }
108*9508192eSchristos if ((magic = dbm_get(*dbm_getint(3))) == NULL) {
109*9508192eSchristos errno = EFTYPE;
110*9508192eSchristos goto fail;
111*9508192eSchristos }
112*9508192eSchristos if (be32toh(*magic) != MANDOCDB_MAGIC) {
113*9508192eSchristos warnx("dbm_map(%s): Bad final magic %x (expected %x)",
114*9508192eSchristos fname, be32toh(*magic), MANDOCDB_MAGIC);
115*9508192eSchristos errno = EFTYPE;
116*9508192eSchristos goto fail;
117*9508192eSchristos }
118*9508192eSchristos return 0;
119*9508192eSchristos
120*9508192eSchristos fail:
121*9508192eSchristos save_errno = errno;
122*9508192eSchristos close(ifd);
123*9508192eSchristos errno = save_errno;
124*9508192eSchristos return -1;
125*9508192eSchristos }
126*9508192eSchristos
127*9508192eSchristos void
dbm_unmap(void)128*9508192eSchristos dbm_unmap(void)
129*9508192eSchristos {
130*9508192eSchristos if (munmap(dbm_base, st.st_size) == -1)
131*9508192eSchristos warn("dbm_unmap: munmap");
132*9508192eSchristos if (close(ifd) == -1)
133*9508192eSchristos warn("dbm_unmap: close");
134*9508192eSchristos dbm_base = (char *)-1;
135*9508192eSchristos }
136*9508192eSchristos
137*9508192eSchristos /*
138*9508192eSchristos * Take a raw integer as it was read from the database.
139*9508192eSchristos * Interpret it as an offset into the database file
140*9508192eSchristos * and return a pointer to that place in the file.
141*9508192eSchristos */
142*9508192eSchristos void *
dbm_get(int32_t offset)143*9508192eSchristos dbm_get(int32_t offset)
144*9508192eSchristos {
145*9508192eSchristos offset = be32toh(offset);
146*9508192eSchristos if (offset < 0) {
147*9508192eSchristos warnx("dbm_get: Database corrupt: offset %d", offset);
148*9508192eSchristos return NULL;
149*9508192eSchristos }
150*9508192eSchristos if (offset >= max_offset) {
151*9508192eSchristos warnx("dbm_get: Database corrupt: offset %d > %d",
152*9508192eSchristos offset, max_offset);
153*9508192eSchristos return NULL;
154*9508192eSchristos }
155*9508192eSchristos return dbm_base + offset;
156*9508192eSchristos }
157*9508192eSchristos
158*9508192eSchristos /*
159*9508192eSchristos * Assume the database starts with some integers.
160*9508192eSchristos * Assume they are numbered starting from 0, increasing.
161*9508192eSchristos * Get a pointer to one with the number "offset".
162*9508192eSchristos */
163*9508192eSchristos int32_t *
dbm_getint(int32_t offset)164*9508192eSchristos dbm_getint(int32_t offset)
165*9508192eSchristos {
166*9508192eSchristos return (int32_t *)dbm_base + offset;
167*9508192eSchristos }
168*9508192eSchristos
169*9508192eSchristos /*
170*9508192eSchristos * The reverse of dbm_get().
171*9508192eSchristos * Take pointer into the database file
172*9508192eSchristos * and convert it to the raw integer
173*9508192eSchristos * that would be used to refer to that place in the file.
174*9508192eSchristos */
175*9508192eSchristos int32_t
dbm_addr(const void * p)176*9508192eSchristos dbm_addr(const void *p)
177*9508192eSchristos {
178*9508192eSchristos return htobe32((const char *)p - dbm_base);
179*9508192eSchristos }
180*9508192eSchristos
181*9508192eSchristos int
dbm_match(const struct dbm_match * match,const char * str)182*9508192eSchristos dbm_match(const struct dbm_match *match, const char *str)
183*9508192eSchristos {
184*9508192eSchristos switch (match->type) {
185*9508192eSchristos case DBM_EXACT:
186*9508192eSchristos return strcmp(str, match->str) == 0;
187*9508192eSchristos case DBM_SUB:
188*9508192eSchristos return strcasestr(str, match->str) != NULL;
189*9508192eSchristos case DBM_REGEX:
190*9508192eSchristos return regexec(match->re, str, 0, NULL, 0) == 0;
191*9508192eSchristos default:
192*9508192eSchristos abort();
193*9508192eSchristos }
194*9508192eSchristos }
195