xref: /netbsd-src/external/bsd/mdocml/dist/mansearch.c (revision a24efa7dea9f1f56c3bdb15a927d3516792ace1c)
1 /*	$Id: mansearch.c,v 1.2 2015/12/17 22:31:12 christos Exp $ */
2 /*
3  * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/mman.h>
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <glob.h>
28 #include <limits.h>
29 #include <regex.h>
30 #include <stdio.h>
31 #include <stdint.h>
32 #include <stddef.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #if HAVE_OHASH
38 #include <ohash.h>
39 #else
40 #include "compat_ohash.h"
41 #endif
42 #include <sqlite3.h>
43 #ifndef SQLITE_DETERMINISTIC
44 #define SQLITE_DETERMINISTIC 0
45 #endif
46 
47 #include "mandoc.h"
48 #include "mandoc_aux.h"
49 #include "manpath.h"
50 #include "mansearch.h"
51 
52 extern int mansearch_keymax;
53 extern const char *const mansearch_keynames[];
54 
55 #define	SQL_BIND_TEXT(_db, _s, _i, _v) \
56 	do { if (SQLITE_OK != sqlite3_bind_text \
57 		((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
58 		fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
59 	} while (0)
60 #define	SQL_BIND_INT64(_db, _s, _i, _v) \
61 	do { if (SQLITE_OK != sqlite3_bind_int64 \
62 		((_s), (_i)++, (_v))) \
63 		fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
64 	} while (0)
65 #define	SQL_BIND_BLOB(_db, _s, _i, _v) \
66 	do { if (SQLITE_OK != sqlite3_bind_blob \
67 		((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
68 		fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
69 	} while (0)
70 
71 struct	expr {
72 	regex_t		 regexp;  /* compiled regexp, if applicable */
73 	const char	*substr;  /* to search for, if applicable */
74 	struct expr	*next;    /* next in sequence */
75 	uint64_t	 bits;    /* type-mask */
76 	int		 equal;   /* equality, not subsring match */
77 	int		 open;    /* opening parentheses before */
78 	int		 and;	  /* logical AND before */
79 	int		 close;   /* closing parentheses after */
80 };
81 
82 struct	match {
83 	uint64_t	 pageid; /* identifier in database */
84 	uint64_t	 bits; /* name type mask */
85 	char		*desc; /* manual page description */
86 	int		 form; /* bit field: formatted, zipped? */
87 };
88 
89 static	void		 buildnames(const struct mansearch *,
90 				struct manpage *, sqlite3 *,
91 				sqlite3_stmt *, uint64_t,
92 				const char *, int form);
93 static	char		*buildoutput(sqlite3 *, sqlite3_stmt *,
94 				 uint64_t, uint64_t);
95 static	void		*hash_alloc(size_t, void *);
96 static	void		 hash_free(void *, void *);
97 static	void		*hash_calloc(size_t, size_t, void *);
98 static	struct expr	*exprcomp(const struct mansearch *,
99 				int, char *[]);
100 static	void		 exprfree(struct expr *);
101 static	struct expr	*exprterm(const struct mansearch *, char *, int);
102 static	int		 manpage_compare(const void *, const void *);
103 static	void		 sql_append(char **sql, size_t *sz,
104 				const char *newstr, int count);
105 static	void		 sql_match(sqlite3_context *context,
106 				int argc, sqlite3_value **argv);
107 static	void		 sql_regexp(sqlite3_context *context,
108 				int argc, sqlite3_value **argv);
109 static	char		*sql_statement(const struct expr *);
110 
111 
112 int
113 mansearch_setup(int start)
114 {
115 	static void	*pagecache;
116 	int		 c;
117 
118 #define	PC_PAGESIZE	1280
119 #define	PC_NUMPAGES	256
120 
121 	if (start) {
122 		if (NULL != pagecache) {
123 			fprintf(stderr, "pagecache already enabled\n");
124 			return((int)MANDOCLEVEL_BADARG);
125 		}
126 
127 		pagecache = mmap(NULL, PC_PAGESIZE * PC_NUMPAGES,
128 		    PROT_READ | PROT_WRITE,
129 		    MAP_SHARED | MAP_ANON, -1, 0);
130 
131 		if (MAP_FAILED == pagecache) {
132 			perror("mmap");
133 			pagecache = NULL;
134 			return((int)MANDOCLEVEL_SYSERR);
135 		}
136 
137 		c = sqlite3_config(SQLITE_CONFIG_PAGECACHE,
138 		    pagecache, PC_PAGESIZE, PC_NUMPAGES);
139 
140 		if (SQLITE_OK == c)
141 			return((int)MANDOCLEVEL_OK);
142 
143 		fprintf(stderr, "pagecache: %s\n", sqlite3_errstr(c));
144 
145 	} else if (NULL == pagecache) {
146 		fprintf(stderr, "pagecache missing\n");
147 		return((int)MANDOCLEVEL_BADARG);
148 	}
149 
150 	if (-1 == munmap(pagecache, PC_PAGESIZE * PC_NUMPAGES)) {
151 		perror("munmap");
152 		pagecache = NULL;
153 		return((int)MANDOCLEVEL_SYSERR);
154 	}
155 
156 	pagecache = NULL;
157 	return((int)MANDOCLEVEL_OK);
158 }
159 
160 int
161 mansearch(const struct mansearch *search,
162 		const struct manpaths *paths,
163 		int argc, char *argv[],
164 		struct manpage **res, size_t *sz)
165 {
166 	int		 fd, rc, c, indexbit;
167 	int64_t		 pageid;
168 	uint64_t	 outbit, iterbit;
169 	char		 buf[PATH_MAX];
170 	char		*sql;
171 	struct manpage	*mpage;
172 	struct expr	*e, *ep;
173 	sqlite3		*db;
174 	sqlite3_stmt	*s, *s2;
175 	struct match	*mp;
176 	struct ohash_info info;
177 	struct ohash	 htab;
178 	unsigned int	 idx;
179 	size_t		 i, j, cur, maxres;
180 
181 	info.calloc = hash_calloc;
182 	info.alloc = hash_alloc;
183 	info.free = hash_free;
184 	info.key_offset = offsetof(struct match, pageid);
185 
186 	*sz = cur = maxres = 0;
187 	sql = NULL;
188 	*res = NULL;
189 	fd = -1;
190 	e = NULL;
191 	rc = 0;
192 
193 	if (0 == argc)
194 		goto out;
195 	if (NULL == (e = exprcomp(search, argc, argv)))
196 		goto out;
197 
198 	if (NULL != search->outkey) {
199 		outbit = TYPE_Nd;
200 		for (indexbit = 0, iterbit = 1;
201 		     indexbit < mansearch_keymax;
202 		     indexbit++, iterbit <<= 1) {
203 			if (0 == strcasecmp(search->outkey,
204 			    mansearch_keynames[indexbit])) {
205 				outbit = iterbit;
206 				break;
207 			}
208 		}
209 	} else
210 		outbit = 0;
211 
212 	/*
213 	 * Save a descriptor to the current working directory.
214 	 * Since pathnames in the "paths" variable might be relative,
215 	 * and we'll be chdir()ing into them, we need to keep a handle
216 	 * on our current directory from which to start the chdir().
217 	 */
218 
219 	if (NULL == getcwd(buf, PATH_MAX)) {
220 		perror("getcwd");
221 		goto out;
222 	} else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
223 		perror(buf);
224 		goto out;
225 	}
226 
227 	sql = sql_statement(e);
228 
229 	/*
230 	 * Loop over the directories (containing databases) for us to
231 	 * search.
232 	 * Don't let missing/bad databases/directories phase us.
233 	 * In each, try to open the resident database and, if it opens,
234 	 * scan it for our match expression.
235 	 */
236 
237 	for (i = 0; i < paths->sz; i++) {
238 		if (-1 == fchdir(fd)) {
239 			perror(buf);
240 			free(*res);
241 			break;
242 		} else if (-1 == chdir(paths->paths[i])) {
243 			perror(paths->paths[i]);
244 			continue;
245 		}
246 
247 		c = sqlite3_open_v2(MANDOC_DB, &db,
248 		    SQLITE_OPEN_READONLY, NULL);
249 
250 		if (SQLITE_OK != c) {
251 			fprintf(stderr, "%s/%s: %s\n",
252 			    paths->paths[i], MANDOC_DB, strerror(errno));
253 			sqlite3_close(db);
254 			continue;
255 		}
256 
257 		/*
258 		 * Define the SQL functions for substring
259 		 * and regular expression matching.
260 		 */
261 
262 		c = sqlite3_create_function(db, "match", 2,
263 		    SQLITE_UTF8 | SQLITE_DETERMINISTIC,
264 		    NULL, sql_match, NULL, NULL);
265 		assert(SQLITE_OK == c);
266 		c = sqlite3_create_function(db, "regexp", 2,
267 		    SQLITE_UTF8 | SQLITE_DETERMINISTIC,
268 		    NULL, sql_regexp, NULL, NULL);
269 		assert(SQLITE_OK == c);
270 
271 		j = 1;
272 		c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
273 		if (SQLITE_OK != c)
274 			fprintf(stderr, "%s\n", sqlite3_errmsg(db));
275 
276 		for (ep = e; NULL != ep; ep = ep->next) {
277 			if (NULL == ep->substr) {
278 				SQL_BIND_BLOB(db, s, j, ep->regexp);
279 			} else
280 				SQL_BIND_TEXT(db, s, j, ep->substr);
281 			if (0 == ((TYPE_Nd | TYPE_Nm) & ep->bits))
282 				SQL_BIND_INT64(db, s, j, ep->bits);
283 		}
284 
285 		memset(&htab, 0, sizeof(struct ohash));
286 		ohash_init(&htab, 4, &info);
287 
288 		/*
289 		 * Hash each entry on its [unique] document identifier.
290 		 * This is a uint64_t.
291 		 * Instead of using a hash function, simply convert the
292 		 * uint64_t to a uint32_t, the hash value's type.
293 		 * This gives good performance and preserves the
294 		 * distribution of buckets in the table.
295 		 */
296 		while (SQLITE_ROW == (c = sqlite3_step(s))) {
297 			pageid = sqlite3_column_int64(s, 2);
298 			idx = ohash_lookup_memory(&htab,
299 			    (char *)&pageid, sizeof(uint64_t),
300 			    (uint32_t)pageid);
301 
302 			if (NULL != ohash_find(&htab, idx))
303 				continue;
304 
305 			mp = mandoc_calloc(1, sizeof(struct match));
306 			mp->pageid = pageid;
307 			mp->form = sqlite3_column_int(s, 1);
308 			mp->bits = sqlite3_column_int64(s, 3);
309 			if (TYPE_Nd == outbit)
310 				mp->desc = mandoc_strdup((const char *)
311 				    sqlite3_column_text(s, 0));
312 			ohash_insert(&htab, idx, mp);
313 		}
314 
315 		if (SQLITE_DONE != c)
316 			fprintf(stderr, "%s\n", sqlite3_errmsg(db));
317 
318 		sqlite3_finalize(s);
319 
320 		c = sqlite3_prepare_v2(db,
321 		    "SELECT sec, arch, name, pageid FROM mlinks "
322 		    "WHERE pageid=? ORDER BY sec, arch, name",
323 		    -1, &s, NULL);
324 		if (SQLITE_OK != c)
325 			fprintf(stderr, "%s\n", sqlite3_errmsg(db));
326 
327 		c = sqlite3_prepare_v2(db,
328 		    "SELECT bits, key, pageid FROM keys "
329 		    "WHERE pageid=? AND bits & ?",
330 		    -1, &s2, NULL);
331 		if (SQLITE_OK != c)
332 			fprintf(stderr, "%s\n", sqlite3_errmsg(db));
333 
334 		for (mp = ohash_first(&htab, &idx);
335 				NULL != mp;
336 				mp = ohash_next(&htab, &idx)) {
337 			if (cur + 1 > maxres) {
338 				maxres += 1024;
339 				*res = mandoc_reallocarray(*res,
340 				    maxres, sizeof(struct manpage));
341 			}
342 			mpage = *res + cur;
343 			mpage->ipath = i;
344 			mpage->bits = mp->bits;
345 			mpage->sec = 10;
346 			mpage->form = mp->form;
347 			buildnames(search, mpage, db, s, mp->pageid,
348 			    paths->paths[i], mp->form);
349 			if (mpage->names != NULL) {
350 				mpage->output = TYPE_Nd & outbit ?
351 				    mp->desc : outbit ?
352 				    buildoutput(db, s2, mp->pageid, outbit) :
353 				    NULL;
354 				cur++;
355 			}
356 			free(mp);
357 		}
358 
359 		sqlite3_finalize(s);
360 		sqlite3_finalize(s2);
361 		sqlite3_close(db);
362 		ohash_delete(&htab);
363 
364 		/*
365 		 * In man(1) mode, prefer matches in earlier trees
366 		 * over matches in later trees.
367 		 */
368 
369 		if (cur && search->firstmatch)
370 			break;
371 	}
372 	qsort(*res, cur, sizeof(struct manpage), manpage_compare);
373 	rc = 1;
374 out:
375 	if (-1 != fd) {
376 		if (-1 == fchdir(fd))
377 			perror(buf);
378 		close(fd);
379 	}
380 	exprfree(e);
381 	free(sql);
382 	*sz = cur;
383 	return(rc);
384 }
385 
386 void
387 mansearch_free(struct manpage *res, size_t sz)
388 {
389 	size_t	 i;
390 
391 	for (i = 0; i < sz; i++) {
392 		free(res[i].file);
393 		free(res[i].names);
394 		free(res[i].output);
395 	}
396 	free(res);
397 }
398 
399 static int
400 manpage_compare(const void *vp1, const void *vp2)
401 {
402 	const struct manpage	*mp1, *mp2;
403 	int			 diff;
404 
405 	mp1 = vp1;
406 	mp2 = vp2;
407 	return(	(diff = mp2->bits - mp1->bits) ? diff :
408 		(diff = mp1->sec - mp2->sec) ? diff :
409 		strcasecmp(mp1->names, mp2->names));
410 }
411 
412 static void
413 buildnames(const struct mansearch *search, struct manpage *mpage,
414 		sqlite3 *db, sqlite3_stmt *s,
415 		uint64_t pageid, const char *path, int form)
416 {
417 	glob_t		 globinfo;
418 	char		*firstname, *newnames, *prevsec, *prevarch;
419 	const char	*oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec;
420 	size_t		 i;
421 	int		 c, globres;
422 
423 	mpage->file = NULL;
424 	mpage->names = NULL;
425 	firstname = prevsec = prevarch = NULL;
426 	i = 1;
427 	SQL_BIND_INT64(db, s, i, pageid);
428 	while (SQLITE_ROW == (c = sqlite3_step(s))) {
429 
430 		/* Decide whether we already have some names. */
431 
432 		if (NULL == mpage->names) {
433 			oldnames = "";
434 			sep1 = "";
435 		} else {
436 			oldnames = mpage->names;
437 			sep1 = ", ";
438 		}
439 
440 		/* Fetch the next name, rejecting sec/arch mismatches. */
441 
442 		sec = (const char *)sqlite3_column_text(s, 0);
443 		if (search->sec != NULL && strcasecmp(sec, search->sec))
444 			continue;
445 		arch = (const char *)sqlite3_column_text(s, 1);
446 		if (search->arch != NULL && *arch != '\0' &&
447 		    strcasecmp(arch, search->arch))
448 			continue;
449 		name = (const char *)sqlite3_column_text(s, 2);
450 
451 		/* Remember the first section found. */
452 
453 		if (9 < mpage->sec && '1' <= *sec && '9' >= *sec)
454 			mpage->sec = (*sec - '1') + 1;
455 
456 		/* If the section changed, append the old one. */
457 
458 		if (NULL != prevsec &&
459 		    (strcmp(sec, prevsec) ||
460 		     strcmp(arch, prevarch))) {
461 			sep2 = '\0' == *prevarch ? "" : "/";
462 			mandoc_asprintf(&newnames, "%s(%s%s%s)",
463 			    oldnames, prevsec, sep2, prevarch);
464 			free(mpage->names);
465 			oldnames = mpage->names = newnames;
466 			free(prevsec);
467 			free(prevarch);
468 			prevsec = prevarch = NULL;
469 		}
470 
471 		/* Save the new section, to append it later. */
472 
473 		if (NULL == prevsec) {
474 			prevsec = mandoc_strdup(sec);
475 			prevarch = mandoc_strdup(arch);
476 		}
477 
478 		/* Append the new name. */
479 
480 		mandoc_asprintf(&newnames, "%s%s%s",
481 		    oldnames, sep1, name);
482 		free(mpage->names);
483 		mpage->names = newnames;
484 
485 		/* Also save the first file name encountered. */
486 
487 		if (mpage->file != NULL)
488 			continue;
489 
490 		if (form & FORM_SRC) {
491 			sep1 = "man";
492 			fsec = sec;
493 		} else {
494 			sep1 = "cat";
495 			fsec = "0";
496 		}
497 		sep2 = *arch == '\0' ? "" : "/";
498 		mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s",
499 		    path, sep1, sec, sep2, arch, name, fsec);
500 		if (access(mpage->file, R_OK) != -1)
501 			continue;
502 
503 		/* Handle unusual file name extensions. */
504 
505 		if (firstname == NULL)
506 			firstname = mpage->file;
507 		else
508 			free(mpage->file);
509 		mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.*",
510 		    path, sep1, sec, sep2, arch, name);
511 		globres = glob(mpage->file, 0, NULL, &globinfo);
512 		free(mpage->file);
513 		mpage->file = globres ? NULL :
514 		    mandoc_strdup(*globinfo.gl_pathv);
515 		globfree(&globinfo);
516 	}
517 	if (c != SQLITE_DONE)
518 		fprintf(stderr, "%s\n", sqlite3_errmsg(db));
519 	sqlite3_reset(s);
520 
521 	/* If none of the files is usable, use the first name. */
522 
523 	if (mpage->file == NULL)
524 		mpage->file = firstname;
525 	else if (mpage->file != firstname)
526 		free(firstname);
527 
528 	/* Append one final section to the names. */
529 
530 	if (prevsec != NULL) {
531 		sep2 = *prevarch == '\0' ? "" : "/";
532 		mandoc_asprintf(&newnames, "%s(%s%s%s)",
533 		    mpage->names, prevsec, sep2, prevarch);
534 		free(mpage->names);
535 		mpage->names = newnames;
536 		free(prevsec);
537 		free(prevarch);
538 	}
539 }
540 
541 static char *
542 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t pageid, uint64_t outbit)
543 {
544 	char		*output, *newoutput;
545 	const char	*oldoutput, *sep1, *data;
546 	size_t		 i;
547 	int		 c;
548 
549 	output = NULL;
550 	i = 1;
551 	SQL_BIND_INT64(db, s, i, pageid);
552 	SQL_BIND_INT64(db, s, i, outbit);
553 	while (SQLITE_ROW == (c = sqlite3_step(s))) {
554 		if (NULL == output) {
555 			oldoutput = "";
556 			sep1 = "";
557 		} else {
558 			oldoutput = output;
559 			sep1 = " # ";
560 		}
561 		data = (const char *)sqlite3_column_text(s, 1);
562 		mandoc_asprintf(&newoutput, "%s%s%s",
563 		    oldoutput, sep1, data);
564 		free(output);
565 		output = newoutput;
566 	}
567 	if (SQLITE_DONE != c)
568 		fprintf(stderr, "%s\n", sqlite3_errmsg(db));
569 	sqlite3_reset(s);
570 	return(output);
571 }
572 
573 /*
574  * Implement substring match as an application-defined SQL function.
575  * Using the SQL LIKE or GLOB operators instead would be a bad idea
576  * because that would require escaping metacharacters in the string
577  * being searched for.
578  */
579 static void
580 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
581 {
582 
583 	assert(2 == argc);
584 	sqlite3_result_int(context, NULL != strcasestr(
585 	    (const char *)sqlite3_value_text(argv[1]),
586 	    (const char *)sqlite3_value_text(argv[0])));
587 }
588 
589 /*
590  * Implement regular expression match
591  * as an application-defined SQL function.
592  */
593 static void
594 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
595 {
596 
597 	assert(2 == argc);
598 	sqlite3_result_int(context, !regexec(
599 	    (regex_t *)(intptr_t)sqlite3_value_blob(argv[0]),
600 	    (const char *)sqlite3_value_text(argv[1]),
601 	    0, NULL, 0));
602 }
603 
604 static void
605 sql_append(char **sql, size_t *sz, const char *newstr, int count)
606 {
607 	size_t		 newsz;
608 
609 	newsz = 1 < count ? (size_t)count : strlen(newstr);
610 	*sql = mandoc_realloc(*sql, *sz + newsz + 1);
611 	if (1 < count)
612 		memset(*sql + *sz, *newstr, (size_t)count);
613 	else
614 		memcpy(*sql + *sz, newstr, newsz);
615 	*sz += newsz;
616 	(*sql)[*sz] = '\0';
617 }
618 
619 /*
620  * Prepare the search SQL statement.
621  */
622 static char *
623 sql_statement(const struct expr *e)
624 {
625 	char		*sql;
626 	size_t		 sz;
627 	int		 needop;
628 
629 	sql = mandoc_strdup(e->equal ?
630 	    "SELECT desc, form, pageid, bits "
631 		"FROM mpages NATURAL JOIN names WHERE " :
632 	    "SELECT desc, form, pageid, 0 FROM mpages WHERE ");
633 	sz = strlen(sql);
634 
635 	for (needop = 0; NULL != e; e = e->next) {
636 		if (e->and)
637 			sql_append(&sql, &sz, " AND ", 1);
638 		else if (needop)
639 			sql_append(&sql, &sz, " OR ", 1);
640 		if (e->open)
641 			sql_append(&sql, &sz, "(", e->open);
642 		sql_append(&sql, &sz,
643 		    TYPE_Nd & e->bits
644 		    ? (NULL == e->substr
645 			? "desc REGEXP ?"
646 			: "desc MATCH ?")
647 		    : TYPE_Nm == e->bits
648 		    ? (NULL == e->substr
649 			? "pageid IN (SELECT pageid FROM names "
650 			  "WHERE name REGEXP ?)"
651 			: e->equal
652 			? "name = ? "
653 			: "pageid IN (SELECT pageid FROM names "
654 			  "WHERE name MATCH ?)")
655 		    : (NULL == e->substr
656 			? "pageid IN (SELECT pageid FROM keys "
657 			  "WHERE key REGEXP ? AND bits & ?)"
658 			: "pageid IN (SELECT pageid FROM keys "
659 			  "WHERE key MATCH ? AND bits & ?)"), 1);
660 		if (e->close)
661 			sql_append(&sql, &sz, ")", e->close);
662 		needop = 1;
663 	}
664 
665 	return(sql);
666 }
667 
668 /*
669  * Compile a set of string tokens into an expression.
670  * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
671  * "(", "foo=bar", etc.).
672  */
673 static struct expr *
674 exprcomp(const struct mansearch *search, int argc, char *argv[])
675 {
676 	uint64_t	 mask;
677 	int		 i, toopen, logic, igncase, toclose;
678 	struct expr	*first, *prev, *cur, *next;
679 
680 	first = cur = NULL;
681 	logic = igncase = toopen = toclose = 0;
682 
683 	for (i = 0; i < argc; i++) {
684 		if (0 == strcmp("(", argv[i])) {
685 			if (igncase)
686 				goto fail;
687 			toopen++;
688 			toclose++;
689 			continue;
690 		} else if (0 == strcmp(")", argv[i])) {
691 			if (toopen || logic || igncase || NULL == cur)
692 				goto fail;
693 			cur->close++;
694 			if (0 > --toclose)
695 				goto fail;
696 			continue;
697 		} else if (0 == strcmp("-a", argv[i])) {
698 			if (toopen || logic || igncase || NULL == cur)
699 				goto fail;
700 			logic = 1;
701 			continue;
702 		} else if (0 == strcmp("-o", argv[i])) {
703 			if (toopen || logic || igncase || NULL == cur)
704 				goto fail;
705 			logic = 2;
706 			continue;
707 		} else if (0 == strcmp("-i", argv[i])) {
708 			if (igncase)
709 				goto fail;
710 			igncase = 1;
711 			continue;
712 		}
713 		next = exprterm(search, argv[i], !igncase);
714 		if (NULL == next)
715 			goto fail;
716 		if (NULL == first)
717 			first = next;
718 		else
719 			cur->next = next;
720 		prev = cur = next;
721 
722 		/*
723 		 * Searching for descriptions must be split out
724 		 * because they are stored in the mpages table,
725 		 * not in the keys table.
726 		 */
727 
728 		for (mask = TYPE_Nm; mask <= TYPE_Nd; mask <<= 1) {
729 			if (mask & cur->bits && ~mask & cur->bits) {
730 				next = mandoc_calloc(1,
731 				    sizeof(struct expr));
732 				memcpy(next, cur, sizeof(struct expr));
733 				prev->open = 1;
734 				cur->bits = mask;
735 				cur->next = next;
736 				cur = next;
737 				cur->bits &= ~mask;
738 			}
739 		}
740 		prev->and = (1 == logic);
741 		prev->open += toopen;
742 		if (cur != prev)
743 			cur->close = 1;
744 
745 		toopen = logic = igncase = 0;
746 	}
747 	if ( ! (toopen || logic || igncase || toclose))
748 		return(first);
749 
750 fail:
751 	if (NULL != first)
752 		exprfree(first);
753 	return(NULL);
754 }
755 
756 static struct expr *
757 exprterm(const struct mansearch *search, char *buf, int cs)
758 {
759 	char		 errbuf[BUFSIZ];
760 	struct expr	*e;
761 	char		*key, *val;
762 	uint64_t	 iterbit;
763 	int		 i, irc;
764 
765 	if ('\0' == *buf)
766 		return(NULL);
767 
768 	e = mandoc_calloc(1, sizeof(struct expr));
769 
770 	if (search->argmode == ARG_NAME) {
771 		e->bits = TYPE_Nm;
772 		e->substr = buf;
773 		e->equal = 1;
774 		return(e);
775 	}
776 
777 	/*
778 	 * Separate macro keys from search string.
779 	 * If needed, request regular expression handling
780 	 * by setting e->substr to NULL.
781 	 */
782 
783 	if (search->argmode == ARG_WORD) {
784 		e->bits = TYPE_Nm;
785 		e->substr = NULL;
786 		mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", buf);
787 		cs = 0;
788 	} else if ((val = strpbrk(buf, "=~")) == NULL) {
789 		e->bits = TYPE_Nm | TYPE_Nd;
790 		e->substr = buf;
791 	} else {
792 		if (val == buf)
793 			e->bits = TYPE_Nm | TYPE_Nd;
794 		if ('=' == *val)
795 			e->substr = val + 1;
796 		*val++ = '\0';
797 		if (NULL != strstr(buf, "arch"))
798 			cs = 0;
799 	}
800 
801 	/* Compile regular expressions. */
802 
803 	if (NULL == e->substr) {
804 		irc = regcomp(&e->regexp, val,
805 		    REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE));
806 		if (search->argmode == ARG_WORD)
807 			free(val);
808 		if (irc) {
809 			regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
810 			fprintf(stderr, "regcomp: %s\n", errbuf);
811 			free(e);
812 			return(NULL);
813 		}
814 	}
815 
816 	if (e->bits)
817 		return(e);
818 
819 	/*
820 	 * Parse out all possible fields.
821 	 * If the field doesn't resolve, bail.
822 	 */
823 
824 	while (NULL != (key = strsep(&buf, ","))) {
825 		if ('\0' == *key)
826 			continue;
827 		for (i = 0, iterbit = 1;
828 		     i < mansearch_keymax;
829 		     i++, iterbit <<= 1) {
830 			if (0 == strcasecmp(key,
831 			    mansearch_keynames[i])) {
832 				e->bits |= iterbit;
833 				break;
834 			}
835 		}
836 		if (i == mansearch_keymax) {
837 			if (strcasecmp(key, "any")) {
838 				free(e);
839 				return(NULL);
840 			}
841 			e->bits |= ~0ULL;
842 		}
843 	}
844 
845 	return(e);
846 }
847 
848 static void
849 exprfree(struct expr *p)
850 {
851 	struct expr	*pp;
852 
853 	while (NULL != p) {
854 		pp = p->next;
855 		free(p);
856 		p = pp;
857 	}
858 }
859 
860 static void *
861 hash_calloc(size_t nmemb, size_t sz, void *arg)
862 {
863 
864 	return(mandoc_calloc(nmemb, sz));
865 }
866 
867 static void *
868 hash_alloc(size_t sz, void *arg)
869 {
870 
871 	return(mandoc_malloc(sz));
872 }
873 
874 static void
875 hash_free(void *p, void *arg)
876 {
877 
878 	free(p);
879 }
880