1544c191cSchristos /* Id: dbm.c,v 1.6 2018/11/19 19:22:07 schwarze Exp */
29508192eSchristos /*
39508192eSchristos * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
49508192eSchristos *
59508192eSchristos * Permission to use, copy, modify, and distribute this software for any
69508192eSchristos * purpose with or without fee is hereby granted, provided that the above
79508192eSchristos * copyright notice and this permission notice appear in all copies.
89508192eSchristos *
99508192eSchristos * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
109508192eSchristos * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
119508192eSchristos * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
129508192eSchristos * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
139508192eSchristos * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
149508192eSchristos * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
159508192eSchristos * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
169508192eSchristos *
179508192eSchristos * Map-based version of the mandoc database, for read-only access.
189508192eSchristos * The interface is defined in "dbm.h".
199508192eSchristos */
209508192eSchristos #include "config.h"
219508192eSchristos
229508192eSchristos #include <assert.h>
239508192eSchristos #if HAVE_ENDIAN
249508192eSchristos #include <endian.h>
259508192eSchristos #elif HAVE_SYS_ENDIAN
269508192eSchristos #include <sys/endian.h>
279508192eSchristos #elif HAVE_NTOHL
289508192eSchristos #include <arpa/inet.h>
299508192eSchristos #endif
309508192eSchristos #if HAVE_ERR
319508192eSchristos #include <err.h>
329508192eSchristos #endif
339508192eSchristos #include <errno.h>
349508192eSchristos #include <regex.h>
359508192eSchristos #include <stdint.h>
369508192eSchristos #include <stdio.h>
379508192eSchristos #include <stdlib.h>
389508192eSchristos #include <string.h>
399508192eSchristos
409508192eSchristos #include "mansearch.h"
419508192eSchristos #include "dbm_map.h"
429508192eSchristos #include "dbm.h"
439508192eSchristos
449508192eSchristos struct macro {
459508192eSchristos int32_t value;
469508192eSchristos int32_t pages;
479508192eSchristos };
489508192eSchristos
499508192eSchristos struct page {
509508192eSchristos int32_t name;
519508192eSchristos int32_t sect;
529508192eSchristos int32_t arch;
539508192eSchristos int32_t desc;
549508192eSchristos int32_t file;
559508192eSchristos };
569508192eSchristos
579508192eSchristos enum iter {
589508192eSchristos ITER_NONE = 0,
599508192eSchristos ITER_NAME,
609508192eSchristos ITER_SECT,
619508192eSchristos ITER_ARCH,
629508192eSchristos ITER_DESC,
639508192eSchristos ITER_MACRO
649508192eSchristos };
659508192eSchristos
669508192eSchristos static struct macro *macros[MACRO_MAX];
679508192eSchristos static int32_t nvals[MACRO_MAX];
689508192eSchristos static struct page *pages;
699508192eSchristos static int32_t npages;
709508192eSchristos static enum iter iteration;
719508192eSchristos
729508192eSchristos static struct dbm_res page_bytitle(enum iter, const struct dbm_match *);
739508192eSchristos static struct dbm_res page_byarch(const struct dbm_match *);
749508192eSchristos static struct dbm_res page_bymacro(int32_t, const struct dbm_match *);
759508192eSchristos static char *macro_bypage(int32_t, int32_t);
769508192eSchristos
779508192eSchristos
789508192eSchristos /*** top level functions **********************************************/
799508192eSchristos
809508192eSchristos /*
819508192eSchristos * Open a disk-based mandoc database for read-only access.
829508192eSchristos * Map the pages and macros[] arrays.
839508192eSchristos * Return 0 on success. Return -1 and set errno on failure.
849508192eSchristos */
859508192eSchristos int
dbm_open(const char * fname)869508192eSchristos dbm_open(const char *fname)
879508192eSchristos {
889508192eSchristos const int32_t *mp, *ep;
899508192eSchristos int32_t im;
909508192eSchristos
919508192eSchristos if (dbm_map(fname) == -1)
929508192eSchristos return -1;
939508192eSchristos
949508192eSchristos if ((npages = be32toh(*dbm_getint(4))) < 0) {
959508192eSchristos warnx("dbm_open(%s): Invalid number of pages: %d",
969508192eSchristos fname, npages);
979508192eSchristos goto fail;
989508192eSchristos }
999508192eSchristos pages = (struct page *)dbm_getint(5);
1009508192eSchristos
1019508192eSchristos if ((mp = dbm_get(*dbm_getint(2))) == NULL) {
1029508192eSchristos warnx("dbm_open(%s): Invalid offset of macros array", fname);
1039508192eSchristos goto fail;
1049508192eSchristos }
1059508192eSchristos if (be32toh(*mp) != MACRO_MAX) {
1069508192eSchristos warnx("dbm_open(%s): Invalid number of macros: %d",
1079508192eSchristos fname, be32toh(*mp));
1089508192eSchristos goto fail;
1099508192eSchristos }
1109508192eSchristos for (im = 0; im < MACRO_MAX; im++) {
1119508192eSchristos if ((ep = dbm_get(*++mp)) == NULL) {
1129508192eSchristos warnx("dbm_open(%s): Invalid offset of macro %d",
1139508192eSchristos fname, im);
1149508192eSchristos goto fail;
1159508192eSchristos }
1169508192eSchristos nvals[im] = be32toh(*ep);
117*3cae1599Schristos macros[im] = (struct macro *)__UNCONST(++ep);
1189508192eSchristos }
1199508192eSchristos return 0;
1209508192eSchristos
1219508192eSchristos fail:
1229508192eSchristos dbm_unmap();
1239508192eSchristos errno = EFTYPE;
1249508192eSchristos return -1;
1259508192eSchristos }
1269508192eSchristos
1279508192eSchristos void
dbm_close(void)1289508192eSchristos dbm_close(void)
1299508192eSchristos {
1309508192eSchristos dbm_unmap();
1319508192eSchristos }
1329508192eSchristos
1339508192eSchristos
1349508192eSchristos /*** functions for handling pages *************************************/
1359508192eSchristos
1369508192eSchristos int32_t
dbm_page_count(void)1379508192eSchristos dbm_page_count(void)
1389508192eSchristos {
1399508192eSchristos return npages;
1409508192eSchristos }
1419508192eSchristos
1429508192eSchristos /*
1439508192eSchristos * Give the caller pointers to the data for one manual page.
1449508192eSchristos */
1459508192eSchristos struct dbm_page *
dbm_page_get(int32_t ip)1469508192eSchristos dbm_page_get(int32_t ip)
1479508192eSchristos {
1489508192eSchristos static struct dbm_page res;
1499508192eSchristos
1509508192eSchristos assert(ip >= 0);
1519508192eSchristos assert(ip < npages);
1529508192eSchristos res.name = dbm_get(pages[ip].name);
1539508192eSchristos if (res.name == NULL)
154544c191cSchristos res.name = "(NULL)\0";
1559508192eSchristos res.sect = dbm_get(pages[ip].sect);
1569508192eSchristos if (res.sect == NULL)
157544c191cSchristos res.sect = "(NULL)\0";
1589508192eSchristos res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL;
1599508192eSchristos res.desc = dbm_get(pages[ip].desc);
1609508192eSchristos if (res.desc == NULL)
1619508192eSchristos res.desc = "(NULL)";
1629508192eSchristos res.file = dbm_get(pages[ip].file);
1639508192eSchristos if (res.file == NULL)
164544c191cSchristos res.file = " (NULL)\0";
1659508192eSchristos res.addr = dbm_addr(pages + ip);
1669508192eSchristos return &res;
1679508192eSchristos }
1689508192eSchristos
1699508192eSchristos /*
1709508192eSchristos * Functions to start filtered iterations over manual pages.
1719508192eSchristos */
1729508192eSchristos void
dbm_page_byname(const struct dbm_match * match)1739508192eSchristos dbm_page_byname(const struct dbm_match *match)
1749508192eSchristos {
1759508192eSchristos assert(match != NULL);
1769508192eSchristos page_bytitle(ITER_NAME, match);
1779508192eSchristos }
1789508192eSchristos
1799508192eSchristos void
dbm_page_bysect(const struct dbm_match * match)1809508192eSchristos dbm_page_bysect(const struct dbm_match *match)
1819508192eSchristos {
1829508192eSchristos assert(match != NULL);
1839508192eSchristos page_bytitle(ITER_SECT, match);
1849508192eSchristos }
1859508192eSchristos
1869508192eSchristos void
dbm_page_byarch(const struct dbm_match * match)1879508192eSchristos dbm_page_byarch(const struct dbm_match *match)
1889508192eSchristos {
1899508192eSchristos assert(match != NULL);
1909508192eSchristos page_byarch(match);
1919508192eSchristos }
1929508192eSchristos
1939508192eSchristos void
dbm_page_bydesc(const struct dbm_match * match)1949508192eSchristos dbm_page_bydesc(const struct dbm_match *match)
1959508192eSchristos {
1969508192eSchristos assert(match != NULL);
1979508192eSchristos page_bytitle(ITER_DESC, match);
1989508192eSchristos }
1999508192eSchristos
2009508192eSchristos void
dbm_page_bymacro(int32_t im,const struct dbm_match * match)2019508192eSchristos dbm_page_bymacro(int32_t im, const struct dbm_match *match)
2029508192eSchristos {
2039508192eSchristos assert(im >= 0);
2049508192eSchristos assert(im < MACRO_MAX);
2059508192eSchristos assert(match != NULL);
2069508192eSchristos page_bymacro(im, match);
2079508192eSchristos }
2089508192eSchristos
2099508192eSchristos /*
2109508192eSchristos * Return the number of the next manual page in the current iteration.
2119508192eSchristos */
2129508192eSchristos struct dbm_res
dbm_page_next(void)2139508192eSchristos dbm_page_next(void)
2149508192eSchristos {
2159508192eSchristos struct dbm_res res = {-1, 0};
2169508192eSchristos
2179508192eSchristos switch(iteration) {
2189508192eSchristos case ITER_NONE:
2199508192eSchristos return res;
2209508192eSchristos case ITER_ARCH:
2219508192eSchristos return page_byarch(NULL);
2229508192eSchristos case ITER_MACRO:
2239508192eSchristos return page_bymacro(0, NULL);
2249508192eSchristos default:
2259508192eSchristos return page_bytitle(iteration, NULL);
2269508192eSchristos }
2279508192eSchristos }
2289508192eSchristos
2299508192eSchristos /*
2309508192eSchristos * Functions implementing the iteration over manual pages.
2319508192eSchristos */
2329508192eSchristos static struct dbm_res
page_bytitle(enum iter arg_iter,const struct dbm_match * arg_match)2339508192eSchristos page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match)
2349508192eSchristos {
2359508192eSchristos static const struct dbm_match *match;
2369508192eSchristos static const char *cp;
2379508192eSchristos static int32_t ip;
2389508192eSchristos struct dbm_res res = {-1, 0};
2399508192eSchristos
2409508192eSchristos assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC ||
2419508192eSchristos arg_iter == ITER_SECT);
2429508192eSchristos
2439508192eSchristos /* Initialize for a new iteration. */
2449508192eSchristos
2459508192eSchristos if (arg_match != NULL) {
2469508192eSchristos iteration = arg_iter;
2479508192eSchristos match = arg_match;
2489508192eSchristos switch (iteration) {
2499508192eSchristos case ITER_NAME:
2509508192eSchristos cp = dbm_get(pages[0].name);
2519508192eSchristos break;
2529508192eSchristos case ITER_SECT:
2539508192eSchristos cp = dbm_get(pages[0].sect);
2549508192eSchristos break;
2559508192eSchristos case ITER_DESC:
2569508192eSchristos cp = dbm_get(pages[0].desc);
2579508192eSchristos break;
2589508192eSchristos default:
2599508192eSchristos abort();
2609508192eSchristos }
2619508192eSchristos if (cp == NULL) {
2629508192eSchristos iteration = ITER_NONE;
2639508192eSchristos match = NULL;
2649508192eSchristos cp = NULL;
2659508192eSchristos ip = npages;
2669508192eSchristos } else
2679508192eSchristos ip = 0;
2689508192eSchristos return res;
2699508192eSchristos }
2709508192eSchristos
2719508192eSchristos /* Search for a name. */
2729508192eSchristos
2739508192eSchristos while (ip < npages) {
2749508192eSchristos if (iteration == ITER_NAME)
2759508192eSchristos cp++;
2769508192eSchristos if (dbm_match(match, cp))
2779508192eSchristos break;
2789508192eSchristos cp = strchr(cp, '\0') + 1;
2799508192eSchristos if (iteration == ITER_DESC)
2809508192eSchristos ip++;
2819508192eSchristos else if (*cp == '\0') {
2829508192eSchristos cp++;
2839508192eSchristos ip++;
2849508192eSchristos }
2859508192eSchristos }
2869508192eSchristos
2879508192eSchristos /* Reached the end without a match. */
2889508192eSchristos
2899508192eSchristos if (ip == npages) {
2909508192eSchristos iteration = ITER_NONE;
2919508192eSchristos match = NULL;
2929508192eSchristos cp = NULL;
2939508192eSchristos return res;
2949508192eSchristos }
2959508192eSchristos
2969508192eSchristos /* Found a match; save the quality for later retrieval. */
2979508192eSchristos
2989508192eSchristos res.page = ip;
2999508192eSchristos res.bits = iteration == ITER_NAME ? cp[-1] : 0;
3009508192eSchristos
3019508192eSchristos /* Skip the remaining names of this page. */
3029508192eSchristos
3039508192eSchristos if (++ip < npages) {
3049508192eSchristos do {
3059508192eSchristos cp++;
3069508192eSchristos } while (cp[-1] != '\0' ||
3079508192eSchristos (iteration != ITER_DESC && cp[-2] != '\0'));
3089508192eSchristos }
3099508192eSchristos return res;
3109508192eSchristos }
3119508192eSchristos
3129508192eSchristos static struct dbm_res
page_byarch(const struct dbm_match * arg_match)3139508192eSchristos page_byarch(const struct dbm_match *arg_match)
3149508192eSchristos {
3159508192eSchristos static const struct dbm_match *match;
3169508192eSchristos struct dbm_res res = {-1, 0};
3179508192eSchristos static int32_t ip;
3189508192eSchristos const char *cp;
3199508192eSchristos
3209508192eSchristos /* Initialize for a new iteration. */
3219508192eSchristos
3229508192eSchristos if (arg_match != NULL) {
3239508192eSchristos iteration = ITER_ARCH;
3249508192eSchristos match = arg_match;
3259508192eSchristos ip = 0;
3269508192eSchristos return res;
3279508192eSchristos }
3289508192eSchristos
3299508192eSchristos /* Search for an architecture. */
3309508192eSchristos
3319508192eSchristos for ( ; ip < npages; ip++)
3329508192eSchristos if (pages[ip].arch)
3339508192eSchristos for (cp = dbm_get(pages[ip].arch);
3349508192eSchristos *cp != '\0';
3359508192eSchristos cp = strchr(cp, '\0') + 1)
3369508192eSchristos if (dbm_match(match, cp)) {
3379508192eSchristos res.page = ip++;
3389508192eSchristos return res;
3399508192eSchristos }
3409508192eSchristos
3419508192eSchristos /* Reached the end without a match. */
3429508192eSchristos
3439508192eSchristos iteration = ITER_NONE;
3449508192eSchristos match = NULL;
3459508192eSchristos return res;
3469508192eSchristos }
3479508192eSchristos
3489508192eSchristos static struct dbm_res
page_bymacro(int32_t arg_im,const struct dbm_match * arg_match)3499508192eSchristos page_bymacro(int32_t arg_im, const struct dbm_match *arg_match)
3509508192eSchristos {
3519508192eSchristos static const struct dbm_match *match;
3529508192eSchristos static const int32_t *pp;
3539508192eSchristos static const char *cp;
3549508192eSchristos static int32_t im, iv;
3559508192eSchristos struct dbm_res res = {-1, 0};
3569508192eSchristos
3579508192eSchristos assert(im >= 0);
3589508192eSchristos assert(im < MACRO_MAX);
3599508192eSchristos
3609508192eSchristos /* Initialize for a new iteration. */
3619508192eSchristos
3629508192eSchristos if (arg_match != NULL) {
3639508192eSchristos iteration = ITER_MACRO;
3649508192eSchristos match = arg_match;
3659508192eSchristos im = arg_im;
3669508192eSchristos cp = nvals[im] ? dbm_get(macros[im]->value) : NULL;
3679508192eSchristos pp = NULL;
3689508192eSchristos iv = -1;
3699508192eSchristos return res;
3709508192eSchristos }
3719508192eSchristos if (iteration != ITER_MACRO)
3729508192eSchristos return res;
3739508192eSchristos
3749508192eSchristos /* Find the next matching macro value. */
3759508192eSchristos
3769508192eSchristos while (pp == NULL || *pp == 0) {
3779508192eSchristos if (++iv == nvals[im]) {
3789508192eSchristos iteration = ITER_NONE;
3799508192eSchristos return res;
3809508192eSchristos }
3819508192eSchristos if (iv)
3829508192eSchristos cp = strchr(cp, '\0') + 1;
3839508192eSchristos if (dbm_match(match, cp))
3849508192eSchristos pp = dbm_get(macros[im][iv].pages);
3859508192eSchristos }
3869508192eSchristos
3879508192eSchristos /* Found a matching page. */
3889508192eSchristos
3899508192eSchristos res.page = (struct page *)dbm_get(*pp++) - pages;
3909508192eSchristos return res;
3919508192eSchristos }
3929508192eSchristos
3939508192eSchristos
3949508192eSchristos /*** functions for handling macros ************************************/
3959508192eSchristos
3969508192eSchristos int32_t
dbm_macro_count(int32_t im)3979508192eSchristos dbm_macro_count(int32_t im)
3989508192eSchristos {
3999508192eSchristos assert(im >= 0);
4009508192eSchristos assert(im < MACRO_MAX);
4019508192eSchristos return nvals[im];
4029508192eSchristos }
4039508192eSchristos
4049508192eSchristos struct dbm_macro *
dbm_macro_get(int32_t im,int32_t iv)4059508192eSchristos dbm_macro_get(int32_t im, int32_t iv)
4069508192eSchristos {
4079508192eSchristos static struct dbm_macro macro;
4089508192eSchristos
4099508192eSchristos assert(im >= 0);
4109508192eSchristos assert(im < MACRO_MAX);
4119508192eSchristos assert(iv >= 0);
4129508192eSchristos assert(iv < nvals[im]);
4139508192eSchristos macro.value = dbm_get(macros[im][iv].value);
4149508192eSchristos macro.pp = dbm_get(macros[im][iv].pages);
4159508192eSchristos return ¯o;
4169508192eSchristos }
4179508192eSchristos
4189508192eSchristos /*
4199508192eSchristos * Filtered iteration over macro entries.
4209508192eSchristos */
4219508192eSchristos void
dbm_macro_bypage(int32_t im,int32_t ip)4229508192eSchristos dbm_macro_bypage(int32_t im, int32_t ip)
4239508192eSchristos {
4249508192eSchristos assert(im >= 0);
4259508192eSchristos assert(im < MACRO_MAX);
4269508192eSchristos assert(ip != 0);
4279508192eSchristos macro_bypage(im, ip);
4289508192eSchristos }
4299508192eSchristos
4309508192eSchristos char *
dbm_macro_next(void)4319508192eSchristos dbm_macro_next(void)
4329508192eSchristos {
4339508192eSchristos return macro_bypage(MACRO_MAX, 0);
4349508192eSchristos }
4359508192eSchristos
4369508192eSchristos static char *
macro_bypage(int32_t arg_im,int32_t arg_ip)4379508192eSchristos macro_bypage(int32_t arg_im, int32_t arg_ip)
4389508192eSchristos {
4399508192eSchristos static const int32_t *pp;
4409508192eSchristos static int32_t im, ip, iv;
4419508192eSchristos
4429508192eSchristos /* Initialize for a new iteration. */
4439508192eSchristos
4449508192eSchristos if (arg_im < MACRO_MAX && arg_ip != 0) {
4459508192eSchristos im = arg_im;
4469508192eSchristos ip = arg_ip;
4479508192eSchristos pp = dbm_get(macros[im]->pages);
4489508192eSchristos iv = 0;
4499508192eSchristos return NULL;
4509508192eSchristos }
4519508192eSchristos if (im >= MACRO_MAX)
4529508192eSchristos return NULL;
4539508192eSchristos
4549508192eSchristos /* Search for the next value. */
4559508192eSchristos
4569508192eSchristos while (iv < nvals[im]) {
4579508192eSchristos if (*pp == ip)
4589508192eSchristos break;
4599508192eSchristos if (*pp == 0)
4609508192eSchristos iv++;
4619508192eSchristos pp++;
4629508192eSchristos }
4639508192eSchristos
4649508192eSchristos /* Reached the end without a match. */
4659508192eSchristos
4669508192eSchristos if (iv == nvals[im]) {
4679508192eSchristos im = MACRO_MAX;
4689508192eSchristos ip = 0;
4699508192eSchristos pp = NULL;
4709508192eSchristos return NULL;
4719508192eSchristos }
4729508192eSchristos
4739508192eSchristos /* Found a match; skip the remaining pages of this entry. */
4749508192eSchristos
4759508192eSchristos if (++iv < nvals[im])
4769508192eSchristos while (*pp++ != 0)
4779508192eSchristos continue;
4789508192eSchristos
4799508192eSchristos return dbm_get(macros[im][iv - 1].value);
4809508192eSchristos }
481