1*54ba9607SSascha Wildner /* $Id: dbm_map.c,v 1.8 2017/02/17 14:43:54 schwarze Exp $ */
2*54ba9607SSascha Wildner /*
3*54ba9607SSascha Wildner * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4*54ba9607SSascha Wildner *
5*54ba9607SSascha Wildner * Permission to use, copy, modify, and distribute this software for any
6*54ba9607SSascha Wildner * purpose with or without fee is hereby granted, provided that the above
7*54ba9607SSascha Wildner * copyright notice and this permission notice appear in all copies.
8*54ba9607SSascha Wildner *
9*54ba9607SSascha Wildner * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10*54ba9607SSascha Wildner * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11*54ba9607SSascha Wildner * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12*54ba9607SSascha Wildner * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13*54ba9607SSascha Wildner * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14*54ba9607SSascha Wildner * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15*54ba9607SSascha Wildner * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*54ba9607SSascha Wildner *
17*54ba9607SSascha Wildner * Low-level routines for the map-based version
18*54ba9607SSascha Wildner * of the mandoc database, for read-only access.
19*54ba9607SSascha Wildner * The interface is defined in "dbm_map.h".
20*54ba9607SSascha Wildner */
21*54ba9607SSascha Wildner #include "config.h"
22*54ba9607SSascha Wildner
23*54ba9607SSascha Wildner #include <sys/mman.h>
24*54ba9607SSascha Wildner #include <sys/stat.h>
25*54ba9607SSascha Wildner #include <sys/types.h>
26*54ba9607SSascha Wildner
27*54ba9607SSascha Wildner #if HAVE_ENDIAN
28*54ba9607SSascha Wildner #include <endian.h>
29*54ba9607SSascha Wildner #elif HAVE_SYS_ENDIAN
30*54ba9607SSascha Wildner #include <sys/endian.h>
31*54ba9607SSascha Wildner #elif HAVE_NTOHL
32*54ba9607SSascha Wildner #include <arpa/inet.h>
33*54ba9607SSascha Wildner #endif
34*54ba9607SSascha Wildner #if HAVE_ERR
35*54ba9607SSascha Wildner #include <err.h>
36*54ba9607SSascha Wildner #endif
37*54ba9607SSascha Wildner #include <errno.h>
38*54ba9607SSascha Wildner #include <fcntl.h>
39*54ba9607SSascha Wildner #include <regex.h>
40*54ba9607SSascha Wildner #include <stdint.h>
41*54ba9607SSascha Wildner #include <stdlib.h>
42*54ba9607SSascha Wildner #include <string.h>
43*54ba9607SSascha Wildner #include <unistd.h>
44*54ba9607SSascha Wildner
45*54ba9607SSascha Wildner #include "mansearch.h"
46*54ba9607SSascha Wildner #include "dbm_map.h"
47*54ba9607SSascha Wildner #include "dbm.h"
48*54ba9607SSascha Wildner
49*54ba9607SSascha Wildner static struct stat st;
50*54ba9607SSascha Wildner static char *dbm_base;
51*54ba9607SSascha Wildner static int ifd;
52*54ba9607SSascha Wildner static int32_t max_offset;
53*54ba9607SSascha Wildner
54*54ba9607SSascha Wildner /*
55*54ba9607SSascha Wildner * Open a disk-based database for read-only access.
56*54ba9607SSascha Wildner * Validate the file format as far as it is not mandoc-specific.
57*54ba9607SSascha Wildner * Return 0 on success. Return -1 and set errno on failure.
58*54ba9607SSascha Wildner */
59*54ba9607SSascha Wildner int
dbm_map(const char * fname)60*54ba9607SSascha Wildner dbm_map(const char *fname)
61*54ba9607SSascha Wildner {
62*54ba9607SSascha Wildner int save_errno;
63*54ba9607SSascha Wildner const int32_t *magic;
64*54ba9607SSascha Wildner
65*54ba9607SSascha Wildner if ((ifd = open(fname, O_RDONLY)) == -1)
66*54ba9607SSascha Wildner return -1;
67*54ba9607SSascha Wildner if (fstat(ifd, &st) == -1)
68*54ba9607SSascha Wildner goto fail;
69*54ba9607SSascha Wildner if (st.st_size < 5) {
70*54ba9607SSascha Wildner warnx("dbm_map(%s): File too short", fname);
71*54ba9607SSascha Wildner errno = EFTYPE;
72*54ba9607SSascha Wildner goto fail;
73*54ba9607SSascha Wildner }
74*54ba9607SSascha Wildner if (st.st_size > INT32_MAX) {
75*54ba9607SSascha Wildner errno = EFBIG;
76*54ba9607SSascha Wildner goto fail;
77*54ba9607SSascha Wildner }
78*54ba9607SSascha Wildner if ((dbm_base = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED,
79*54ba9607SSascha Wildner ifd, 0)) == MAP_FAILED)
80*54ba9607SSascha Wildner goto fail;
81*54ba9607SSascha Wildner magic = dbm_getint(0);
82*54ba9607SSascha Wildner if (be32toh(*magic) != MANDOCDB_MAGIC) {
83*54ba9607SSascha Wildner if (strncmp(dbm_base, "SQLite format 3", 15))
84*54ba9607SSascha Wildner warnx("dbm_map(%s): "
85*54ba9607SSascha Wildner "Bad initial magic %x (expected %x)",
86*54ba9607SSascha Wildner fname, be32toh(*magic), MANDOCDB_MAGIC);
87*54ba9607SSascha Wildner else
88*54ba9607SSascha Wildner warnx("dbm_map(%s): "
89*54ba9607SSascha Wildner "Obsolete format based on SQLite 3",
90*54ba9607SSascha Wildner fname);
91*54ba9607SSascha Wildner errno = EFTYPE;
92*54ba9607SSascha Wildner goto fail;
93*54ba9607SSascha Wildner }
94*54ba9607SSascha Wildner magic = dbm_getint(1);
95*54ba9607SSascha Wildner if (be32toh(*magic) != MANDOCDB_VERSION) {
96*54ba9607SSascha Wildner warnx("dbm_map(%s): Bad version number %d (expected %d)",
97*54ba9607SSascha Wildner fname, be32toh(*magic), MANDOCDB_VERSION);
98*54ba9607SSascha Wildner errno = EFTYPE;
99*54ba9607SSascha Wildner goto fail;
100*54ba9607SSascha Wildner }
101*54ba9607SSascha Wildner max_offset = be32toh(*dbm_getint(3)) + sizeof(int32_t);
102*54ba9607SSascha Wildner if (st.st_size != max_offset) {
103*54ba9607SSascha Wildner warnx("dbm_map(%s): Inconsistent file size %lld (expected %d)",
104*54ba9607SSascha Wildner fname, (long long)st.st_size, max_offset);
105*54ba9607SSascha Wildner errno = EFTYPE;
106*54ba9607SSascha Wildner goto fail;
107*54ba9607SSascha Wildner }
108*54ba9607SSascha Wildner if ((magic = dbm_get(*dbm_getint(3))) == NULL) {
109*54ba9607SSascha Wildner errno = EFTYPE;
110*54ba9607SSascha Wildner goto fail;
111*54ba9607SSascha Wildner }
112*54ba9607SSascha Wildner if (be32toh(*magic) != MANDOCDB_MAGIC) {
113*54ba9607SSascha Wildner warnx("dbm_map(%s): Bad final magic %x (expected %x)",
114*54ba9607SSascha Wildner fname, be32toh(*magic), MANDOCDB_MAGIC);
115*54ba9607SSascha Wildner errno = EFTYPE;
116*54ba9607SSascha Wildner goto fail;
117*54ba9607SSascha Wildner }
118*54ba9607SSascha Wildner return 0;
119*54ba9607SSascha Wildner
120*54ba9607SSascha Wildner fail:
121*54ba9607SSascha Wildner save_errno = errno;
122*54ba9607SSascha Wildner close(ifd);
123*54ba9607SSascha Wildner errno = save_errno;
124*54ba9607SSascha Wildner return -1;
125*54ba9607SSascha Wildner }
126*54ba9607SSascha Wildner
127*54ba9607SSascha Wildner void
dbm_unmap(void)128*54ba9607SSascha Wildner dbm_unmap(void)
129*54ba9607SSascha Wildner {
130*54ba9607SSascha Wildner if (munmap(dbm_base, st.st_size) == -1)
131*54ba9607SSascha Wildner warn("dbm_unmap: munmap");
132*54ba9607SSascha Wildner if (close(ifd) == -1)
133*54ba9607SSascha Wildner warn("dbm_unmap: close");
134*54ba9607SSascha Wildner dbm_base = (char *)-1;
135*54ba9607SSascha Wildner }
136*54ba9607SSascha Wildner
137*54ba9607SSascha Wildner /*
138*54ba9607SSascha Wildner * Take a raw integer as it was read from the database.
139*54ba9607SSascha Wildner * Interpret it as an offset into the database file
140*54ba9607SSascha Wildner * and return a pointer to that place in the file.
141*54ba9607SSascha Wildner */
142*54ba9607SSascha Wildner void *
dbm_get(int32_t offset)143*54ba9607SSascha Wildner dbm_get(int32_t offset)
144*54ba9607SSascha Wildner {
145*54ba9607SSascha Wildner offset = be32toh(offset);
146*54ba9607SSascha Wildner if (offset < 0) {
147*54ba9607SSascha Wildner warnx("dbm_get: Database corrupt: offset %d", offset);
148*54ba9607SSascha Wildner return NULL;
149*54ba9607SSascha Wildner }
150*54ba9607SSascha Wildner if (offset >= max_offset) {
151*54ba9607SSascha Wildner warnx("dbm_get: Database corrupt: offset %d > %d",
152*54ba9607SSascha Wildner offset, max_offset);
153*54ba9607SSascha Wildner return NULL;
154*54ba9607SSascha Wildner }
155*54ba9607SSascha Wildner return dbm_base + offset;
156*54ba9607SSascha Wildner }
157*54ba9607SSascha Wildner
158*54ba9607SSascha Wildner /*
159*54ba9607SSascha Wildner * Assume the database starts with some integers.
160*54ba9607SSascha Wildner * Assume they are numbered starting from 0, increasing.
161*54ba9607SSascha Wildner * Get a pointer to one with the number "offset".
162*54ba9607SSascha Wildner */
163*54ba9607SSascha Wildner int32_t *
dbm_getint(int32_t offset)164*54ba9607SSascha Wildner dbm_getint(int32_t offset)
165*54ba9607SSascha Wildner {
166*54ba9607SSascha Wildner return (int32_t *)dbm_base + offset;
167*54ba9607SSascha Wildner }
168*54ba9607SSascha Wildner
169*54ba9607SSascha Wildner /*
170*54ba9607SSascha Wildner * The reverse of dbm_get().
171*54ba9607SSascha Wildner * Take pointer into the database file
172*54ba9607SSascha Wildner * and convert it to the raw integer
173*54ba9607SSascha Wildner * that would be used to refer to that place in the file.
174*54ba9607SSascha Wildner */
175*54ba9607SSascha Wildner int32_t
dbm_addr(const void * p)176*54ba9607SSascha Wildner dbm_addr(const void *p)
177*54ba9607SSascha Wildner {
178*54ba9607SSascha Wildner return htobe32((const char *)p - dbm_base);
179*54ba9607SSascha Wildner }
180*54ba9607SSascha Wildner
181*54ba9607SSascha Wildner int
dbm_match(const struct dbm_match * match,const char * str)182*54ba9607SSascha Wildner dbm_match(const struct dbm_match *match, const char *str)
183*54ba9607SSascha Wildner {
184*54ba9607SSascha Wildner switch (match->type) {
185*54ba9607SSascha Wildner case DBM_EXACT:
186*54ba9607SSascha Wildner return strcmp(str, match->str) == 0;
187*54ba9607SSascha Wildner case DBM_SUB:
188*54ba9607SSascha Wildner return strcasestr(str, match->str) != NULL;
189*54ba9607SSascha Wildner case DBM_REGEX:
190*54ba9607SSascha Wildner return regexec(match->re, str, 0, NULL, 0) == 0;
191*54ba9607SSascha Wildner default:
192*54ba9607SSascha Wildner abort();
193*54ba9607SSascha Wildner }
194*54ba9607SSascha Wildner }
195