xref: /openbsd-src/usr.bin/mandoc/dbm_map.c (revision 79a81166797a360cce61a4013d6bf48c3d0bcbbb)
1*79a81166Sschwarze /*	$OpenBSD: dbm_map.c,v 1.6 2017/02/09 18:26:17 schwarze Exp $ */
2ff2dbb0fSschwarze /*
3ff2dbb0fSschwarze  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4ff2dbb0fSschwarze  *
5ff2dbb0fSschwarze  * Permission to use, copy, modify, and distribute this software for any
6ff2dbb0fSschwarze  * purpose with or without fee is hereby granted, provided that the above
7ff2dbb0fSschwarze  * copyright notice and this permission notice appear in all copies.
8ff2dbb0fSschwarze  *
9ff2dbb0fSschwarze  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10ff2dbb0fSschwarze  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11ff2dbb0fSschwarze  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12ff2dbb0fSschwarze  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13ff2dbb0fSschwarze  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14ff2dbb0fSschwarze  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15ff2dbb0fSschwarze  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16ff2dbb0fSschwarze  *
17ff2dbb0fSschwarze  * Low-level routines for the map-based version
18ff2dbb0fSschwarze  * of the mandoc database, for read-only access.
19ff2dbb0fSschwarze  * The interface is defined in "dbm_map.h".
20ff2dbb0fSschwarze  */
21ff2dbb0fSschwarze #include <sys/mman.h>
22ff2dbb0fSschwarze #include <sys/stat.h>
23ff2dbb0fSschwarze #include <sys/types.h>
24ff2dbb0fSschwarze 
25ff2dbb0fSschwarze #include <endian.h>
26ff2dbb0fSschwarze #include <err.h>
27ff2dbb0fSschwarze #include <errno.h>
28ff2dbb0fSschwarze #include <fcntl.h>
29ff2dbb0fSschwarze #include <regex.h>
30ff2dbb0fSschwarze #include <stdint.h>
31ff2dbb0fSschwarze #include <stdlib.h>
32ff2dbb0fSschwarze #include <string.h>
33ff2dbb0fSschwarze #include <unistd.h>
34ff2dbb0fSschwarze 
35ff2dbb0fSschwarze #include "mansearch.h"
36ff2dbb0fSschwarze #include "dbm_map.h"
37ff2dbb0fSschwarze #include "dbm.h"
38ff2dbb0fSschwarze 
39ff2dbb0fSschwarze static struct stat	 st;
40ff2dbb0fSschwarze static char		*dbm_base;
41ff2dbb0fSschwarze static int		 ifd;
42ff2dbb0fSschwarze static int32_t		 max_offset;
43ff2dbb0fSschwarze 
44ff2dbb0fSschwarze /*
45ff2dbb0fSschwarze  * Open a disk-based database for read-only access.
46ff2dbb0fSschwarze  * Validate the file format as far as it is not mandoc-specific.
47ff2dbb0fSschwarze  * Return 0 on success.  Return -1 and set errno on failure.
48ff2dbb0fSschwarze  */
49ff2dbb0fSschwarze int
dbm_map(const char * fname)50ff2dbb0fSschwarze dbm_map(const char *fname)
51ff2dbb0fSschwarze {
52ff2dbb0fSschwarze 	int		 save_errno;
53ff2dbb0fSschwarze 	const int32_t	*magic;
54ff2dbb0fSschwarze 
55ff2dbb0fSschwarze 	if ((ifd = open(fname, O_RDONLY)) == -1)
56ff2dbb0fSschwarze 		return -1;
57ff2dbb0fSschwarze 	if (fstat(ifd, &st) == -1)
58ff2dbb0fSschwarze 		goto fail;
59ff2dbb0fSschwarze 	if (st.st_size < 5) {
60ff2dbb0fSschwarze 		warnx("dbm_map(%s): File too short", fname);
61ff2dbb0fSschwarze 		errno = EFTYPE;
62ff2dbb0fSschwarze 		goto fail;
63ff2dbb0fSschwarze 	}
64ff2dbb0fSschwarze 	if (st.st_size > INT32_MAX) {
65ff2dbb0fSschwarze 		errno = EFBIG;
66ff2dbb0fSschwarze 		goto fail;
67ff2dbb0fSschwarze 	}
68ff2dbb0fSschwarze 	if ((dbm_base = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED,
69ff2dbb0fSschwarze 	    ifd, 0)) == MAP_FAILED)
70ff2dbb0fSschwarze 		goto fail;
71ff2dbb0fSschwarze 	magic = dbm_getint(0);
72ff2dbb0fSschwarze 	if (be32toh(*magic) != MANDOCDB_MAGIC) {
73c26c9404Sschwarze 		if (strncmp(dbm_base, "SQLite format 3", 15))
74c26c9404Sschwarze 			warnx("dbm_map(%s): "
75c26c9404Sschwarze 			    "Bad initial magic %x (expected %x)",
76ff2dbb0fSschwarze 			    fname, be32toh(*magic), MANDOCDB_MAGIC);
77c26c9404Sschwarze 		else
78c26c9404Sschwarze 			warnx("dbm_map(%s): "
79c26c9404Sschwarze 			    "Obsolete format based on SQLite 3",
80c26c9404Sschwarze 			    fname);
81ff2dbb0fSschwarze 		errno = EFTYPE;
82ff2dbb0fSschwarze 		goto fail;
83ff2dbb0fSschwarze 	}
84ff2dbb0fSschwarze 	magic = dbm_getint(1);
85ff2dbb0fSschwarze 	if (be32toh(*magic) != MANDOCDB_VERSION) {
86ff2dbb0fSschwarze 		warnx("dbm_map(%s): Bad version number %d (expected %d)",
87ff2dbb0fSschwarze 		    fname, be32toh(*magic), MANDOCDB_VERSION);
88ff2dbb0fSschwarze 		errno = EFTYPE;
89ff2dbb0fSschwarze 		goto fail;
90ff2dbb0fSschwarze 	}
91ff2dbb0fSschwarze 	max_offset = be32toh(*dbm_getint(3)) + sizeof(int32_t);
92ff2dbb0fSschwarze 	if (st.st_size != max_offset) {
9371a346d7Sschwarze 		warnx("dbm_map(%s): Inconsistent file size %lld (expected %d)",
94753b3b59Sschwarze 		    fname, (long long)st.st_size, max_offset);
95ff2dbb0fSschwarze 		errno = EFTYPE;
96ff2dbb0fSschwarze 		goto fail;
97ff2dbb0fSschwarze 	}
98ff2dbb0fSschwarze 	if ((magic = dbm_get(*dbm_getint(3))) == NULL) {
99ff2dbb0fSschwarze 		errno = EFTYPE;
100ff2dbb0fSschwarze 		goto fail;
101ff2dbb0fSschwarze 	}
102ff2dbb0fSschwarze 	if (be32toh(*magic) != MANDOCDB_MAGIC) {
103ff2dbb0fSschwarze 		warnx("dbm_map(%s): Bad final magic %x (expected %x)",
104ff2dbb0fSschwarze 		    fname, be32toh(*magic), MANDOCDB_MAGIC);
105ff2dbb0fSschwarze 		errno = EFTYPE;
106ff2dbb0fSschwarze 		goto fail;
107ff2dbb0fSschwarze 	}
108ff2dbb0fSschwarze 	return 0;
109ff2dbb0fSschwarze 
110ff2dbb0fSschwarze fail:
111ff2dbb0fSschwarze 	save_errno = errno;
112ff2dbb0fSschwarze 	close(ifd);
113ff2dbb0fSschwarze 	errno = save_errno;
114ff2dbb0fSschwarze 	return -1;
115ff2dbb0fSschwarze }
116ff2dbb0fSschwarze 
117ff2dbb0fSschwarze void
dbm_unmap(void)118ff2dbb0fSschwarze dbm_unmap(void)
119ff2dbb0fSschwarze {
120ff2dbb0fSschwarze 	if (munmap(dbm_base, st.st_size) == -1)
121ff2dbb0fSschwarze 		warn("dbm_unmap: munmap");
122ff2dbb0fSschwarze 	if (close(ifd) == -1)
123ff2dbb0fSschwarze 		warn("dbm_unmap: close");
124ff2dbb0fSschwarze 	dbm_base = (char *)-1;
125ff2dbb0fSschwarze }
126ff2dbb0fSschwarze 
127ff2dbb0fSschwarze /*
128ff2dbb0fSschwarze  * Take a raw integer as it was read from the database.
129ff2dbb0fSschwarze  * Interpret it as an offset into the database file
130ff2dbb0fSschwarze  * and return a pointer to that place in the file.
131ff2dbb0fSschwarze  */
132ff2dbb0fSschwarze void *
dbm_get(int32_t offset)133ff2dbb0fSschwarze dbm_get(int32_t offset)
134ff2dbb0fSschwarze {
135ff2dbb0fSschwarze 	offset = be32toh(offset);
136467e53f5Sschwarze 	if (offset < 0) {
137467e53f5Sschwarze 		warnx("dbm_get: Database corrupt: offset %d", offset);
138467e53f5Sschwarze 		return NULL;
139467e53f5Sschwarze 	}
140467e53f5Sschwarze 	if (offset >= max_offset) {
141ff2dbb0fSschwarze 		warnx("dbm_get: Database corrupt: offset %d > %d",
142ff2dbb0fSschwarze 		    offset, max_offset);
143ff2dbb0fSschwarze 		return NULL;
144ff2dbb0fSschwarze 	}
145ff2dbb0fSschwarze 	return dbm_base + offset;
146ff2dbb0fSschwarze }
147ff2dbb0fSschwarze 
148ff2dbb0fSschwarze /*
149ff2dbb0fSschwarze  * Assume the database starts with some integers.
150ff2dbb0fSschwarze  * Assume they are numbered starting from 0, increasing.
151ff2dbb0fSschwarze  * Get a pointer to one with the number "offset".
152ff2dbb0fSschwarze  */
153ff2dbb0fSschwarze int32_t *
dbm_getint(int32_t offset)154ff2dbb0fSschwarze dbm_getint(int32_t offset)
155ff2dbb0fSschwarze {
156ff2dbb0fSschwarze 	return (int32_t *)dbm_base + offset;
157ff2dbb0fSschwarze }
158ff2dbb0fSschwarze 
159ff2dbb0fSschwarze /*
160ff2dbb0fSschwarze  * The reverse of dbm_get().
161ff2dbb0fSschwarze  * Take pointer into the database file
162ff2dbb0fSschwarze  * and convert it to the raw integer
163ff2dbb0fSschwarze  * that would be used to refer to that place in the file.
164ff2dbb0fSschwarze  */
165ff2dbb0fSschwarze int32_t
dbm_addr(const void * p)166ff2dbb0fSschwarze dbm_addr(const void *p)
167ff2dbb0fSschwarze {
168*79a81166Sschwarze 	return htobe32((const char *)p - dbm_base);
169ff2dbb0fSschwarze }
170ff2dbb0fSschwarze 
171ff2dbb0fSschwarze int
dbm_match(const struct dbm_match * match,const char * str)172ff2dbb0fSschwarze dbm_match(const struct dbm_match *match, const char *str)
173ff2dbb0fSschwarze {
174ff2dbb0fSschwarze 	switch (match->type) {
175ff2dbb0fSschwarze 	case DBM_EXACT:
176ff2dbb0fSschwarze 		return strcmp(str, match->str) == 0;
177ff2dbb0fSschwarze 	case DBM_SUB:
178ff2dbb0fSschwarze 		return strcasestr(str, match->str) != NULL;
179ff2dbb0fSschwarze 	case DBM_REGEX:
180ff2dbb0fSschwarze 		return regexec(match->re, str, 0, NULL, 0) == 0;
181ff2dbb0fSschwarze 	default:
182ff2dbb0fSschwarze 		abort();
183ff2dbb0fSschwarze 	}
184ff2dbb0fSschwarze }
185